From c1608628d42fbc0d70d94abb7c5cff27c5469a24 Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Mon, 7 Jun 2021 15:13:41 +0300
Subject: [PATCH 01/41] LowLatency v2 ngraph transformation (#5160)

* LowLatency 2.0: transformation and unit tests

* low latency 2.0: unit tests

* documentation and ngraph codestyle

* update CNN Interface of LowLatency transformation

* fix build on Windows

* fix build on Windows

* investigation of a failed build on Win OS

* ngraph codestyle

* fix build (werrors)

* New unit tests, refactoring

* update functional tests for Memory

* update LowLatency functional tests

* extend Memory tests to cover LowLatency v2 transformation

* clean up, code style

* fix unit tests

* update and fix unit tests, add feature to apply LLTv2 after LLTv1

* update docs, refactoring

* add several gna tests to skip config

* fix python api tests

* update python api, rename LowLatency_v2 to LowLatency2

* deprecate LowLatency v1

* Deprecate LowLatency v1 in IE

* fix wrong merge, codestyle

* resolve review comments

* fix python test

* update skip config

* apply online review notes, fix unit tests

* clean up, code style

* fix docs

* Use debug_messages instead of exceptions in llt v2

* fix unit tests

* Resolve review remarks
---
 .../offline_transformations_api.pyx           |   4 +-
 .../offline_transformations_api_impl.cpp      |  11 +-
 .../offline_transformations_api_impl.hpp      |   2 +-
 .../offline_transformations_api_impl_defs.pxd |   5 +-
 .../python/tests/test_offline_api.py          |   2 +-
 .../include/ie_transformations.hpp            |  36 +
 .../inference_engine/ie_transformations.cpp   |  10 +
 .../transformations/low_latency_test.cpp      |  10 +
 .../transformations/low_latency_v2_test.cpp   | 829 ++++++++++++++++++
 .../single_layer_tests/memory.cpp             |   8 +
 .../subgraph_tests/memory_LSTMCell.cpp        |  45 +
 .../subgraph_tests/multiple_LSTMCell.cpp      |  10 +
 .../single_layer_tests/memory.cpp             |  12 +-
 .../skip_tests_config.cpp                     |   8 +
 .../subgraph_tests/memory_LSTMCell.cpp        |   9 +
 .../subgraph_tests/multiple_LSTMCell.cpp      |  10 +
 .../include/subgraph_tests/basic_lstm.hpp     |   2 +-
 .../subgraph_tests/memory_LSTMCell.hpp        |   8 -
 .../subgraph_tests/multiple_LSTMCell.hpp      |   8 -
 .../single_layer/memory.hpp                   |   9 +
 .../subgraph/memory_LSTMCell.hpp              |   8 +-
 .../subgraph/multiple_LSTMCell.hpp            |   8 +-
 .../src/single_layer/memory.cpp               | 116 ++-
 .../src/subgraph/memory_LSTMCell.cpp          | 281 +++---
 .../src/subgraph/multiple_LSTMCell.cpp        | 442 +++++-----
 .../ngraph_functions/utils/ngraph_helpers.hpp |  11 +
 .../src/utils/ngraph_helpers.cpp              |  27 +
 .../mo/back/offline_transformations.py        |   2 +-
 model-optimizer/mo/utils/cli_parser.py        |  17 +-
 .../unit_tests/mo/utils/cli_parser_test.py    |  45 +-
 .../core/include/ngraph/pass/low_latency.hpp  |  44 +-
 ngraph/core/src/op/tensor_iterator.cpp        |   1 -
 ngraph/core/src/pass/low_latency.cpp          | 203 ++++-
 33 files changed, 1811 insertions(+), 432 deletions(-)
 create mode 100644 inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp

diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
index bd101280fcb..266c1dc94d9 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
@@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
     C.ApplyPOTTransformations(network.impl, device)
 
 
-def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
-    C.ApplyLowLatencyTransformation(network.impl, num_iterations)
+def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True):
+    C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)
 
 
 def ApplyPruningTransformation(IENetwork network):
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
index b9ff879da8c..183deaccfb3 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
@@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
     manager.run_passes(network.actual->getFunction());
 }
 
-void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
+void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
     ngraph::pass::Manager manager;
-    // TODO: pass num_iterations to LowLatency
-    manager.register_pass<ngraph::pass::LowLatency>();
-    manager.register_pass<ngraph::pass::UnrollTensorIterator>();
-
-    auto pass_config = manager.get_pass_config();
-    pass_config->set_callback<ngraph::pass::UnrollTensorIterator>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
-        return node->get_rt_info().count("UNROLL_TI") == 0;
-    });
+    manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
     manager.run_passes(network.actual->getFunction());
 }
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
index 504388e4afc..3941c48a50c 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
@@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
 
 void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
 
-void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
+void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true);
 
 void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
index 726880e9353..551e56c27a8 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
@@ -3,7 +3,6 @@
 
 from libcpp cimport bool
 from libcpp.string cimport string
-from libc.stdint cimport int64_t
 
 from ..inference_engine.ie_api_impl_defs cimport IENetwork
 
@@ -12,10 +11,10 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
 
     cdef void ApplyPOTTransformations(IENetwork network, string device)
 
-    cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
+    cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer)
 
     cdef void ApplyPruningTransformation(IENetwork network)
 
     cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
 
-    cdef void CheckAPI()
\ No newline at end of file
+    cdef void CheckAPI()
diff --git a/inference-engine/ie_bridges/python/tests/test_offline_api.py b/inference-engine/ie_bridges/python/tests/test_offline_api.py
index b5565c04bb4..0bba0951c27 100644
--- a/inference-engine/ie_bridges/python/tests/test_offline_api.py
+++ b/inference-engine/ie_bridges/python/tests/test_offline_api.py
@@ -49,4 +49,4 @@ def test_pruning_transformations():
 
     f = ng.function_from_cnn(net)
     assert f != None
-    assert len(f.get_ops()) == 3
\ No newline at end of file
+    assert len(f.get_ops()) == 3
diff --git a/inference-engine/include/ie_transformations.hpp b/inference-engine/include/ie_transformations.hpp
index 6691fa74dae..0867b02972c 100644
--- a/inference-engine/include/ie_transformations.hpp
+++ b/inference-engine/include/ie_transformations.hpp
@@ -52,5 +52,41 @@ namespace InferenceEngine {
  * @param network A network to apply LowLatency transformation
  * *
  */
+
+INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
+                            "Use InferenceEngine::lowLatency2 instead.")
 INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
+
+
+/**
+ * @brief The transformation finds all TensorIterator/Loop layers in the network,
+ * processes all back edges that describe a connection between Result and Parameter
+ * of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
+ * input and output corresponding to this back edge.
+ * Supported platforms: CPU, GNA.
+ *
+ * The example below describes the changes made by the transformation
+ *  [] - TensorIterator body
+ *  () - new layer
+ *  BE - back-edge
+ *
+ *  before applying the transformation:
+ *  -> input1[BE_1 -> Parameter -> Layers ... -> Result  -> BE_1 ]output1->
+ *
+ *  after applying the transformation:
+ *  ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
+ *                                                                      \
+ *                                                                       ->...
+ * After applying the transformation, the resulting network can be inferred
+ * step by step, the states will store between inferences.
+ * @param network A network to apply LowLatency transformation
+ * @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations.
+          If "true", then the transformation inserts Constant before ReadValue operation.
+          If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
+ * Loop operation by a given number. Does not affect TensorIterators.
+ * *
+ */
+INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
+                                           bool use_const_initializer = true);
+
 } // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_transformations.cpp b/inference-engine/src/inference_engine/ie_transformations.cpp
index 15360ae97ea..2a87671ce25 100644
--- a/inference-engine/src/inference_engine/ie_transformations.cpp
+++ b/inference-engine/src/inference_engine/ie_transformations.cpp
@@ -11,6 +11,16 @@ using namespace InferenceEngine;
 void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
     auto function = network.getFunction();
     ngraph::pass::Manager manager;
+    NGRAPH_SUPPRESS_DEPRECATED_START
     manager.register_pass<ngraph::pass::LowLatency>();
+    NGRAPH_SUPPRESS_DEPRECATED_END
+    manager.run_passes(function);
+}
+
+void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
+                                  bool use_const_initializer) {
+    auto function = network.getFunction();
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
     manager.run_passes(function);
 }
diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp
index 69cb89991a3..2d1594d3841 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp
@@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) {
 
         ngraph::pass::Manager manager;
         manager.register_pass<ngraph::pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
         manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
         manager.run_passes(f);
     }
@@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) {
 
         ngraph::pass::Manager manager;
         manager.register_pass<ngraph::pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
         manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
         manager.run_passes(f);
 
@@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) {
 
         ngraph::pass::Manager manager;
         manager.register_pass<ngraph::pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
         manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
         manager.run_passes(f);
 
@@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) {
 
         ngraph::pass::Manager manager;
         manager.register_pass<ngraph::pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
         manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
         manager.run_passes(f);
     }
@@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) {
 
         ngraph::pass::Manager manager;
         manager.register_pass<ngraph::pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
         manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
         manager.run_passes(f);
     }
diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp
new file mode 100644
index 00000000000..5c66e8109ef
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp
@@ -0,0 +1,829 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <transformations/control_flow/unroll_tensor_iterator.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/common_optimizations/low_latency.hpp>
+#include <transformations/serialize.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace opset7;
+using namespace std;
+
+Output<Node> create_init_subgraph(const Output<Node>& in_node) {
+    auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
+    auto shape_of = make_shared<ShapeOf>(in_node);
+    auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
+    return broadcast->output(0);
+}
+
+Output<Node> insert_identity(const Output<Node>& in_node) {
+    auto axis_1 = Constant::create(element::i64, Shape{1}, {1});
+    auto identity_1 = std::make_shared<Unsqueeze>(in_node, axis_1);
+    return std::make_shared<Squeeze>(identity_1, axis_1);
+}
+
+std::shared_ptr<Function> createLSTMBody(const std::shared_ptr<Parameter>& Xi,
+                                                 const std::shared_ptr<Parameter>& H_t,
+                                                 const std::shared_ptr<Parameter>& C_t,
+                                                 bool is_loop = false) {
+    // Body
+    auto axis = Constant::create(element::i64, Shape{}, {0});
+    auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+    auto w_val = std::vector<float>(512 * 16, 0);
+    auto r_val = std::vector<float>(512 * 128, 0);
+    auto b_val = std::vector<float>(512, 0);
+    auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+    auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+    auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+    auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
+    auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
+    auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+    auto res_2 = std::make_shared<Result>(unsqueeze);
+    auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
+
+    auto func = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
+                                           ParameterVector{Xi, H_t, C_t});
+    if (is_loop) {
+        auto body_condition = std::make_shared<Constant>(
+                element::boolean, Shape{1}, true);
+        auto cond_res = std::make_shared<Result>(body_condition);
+        func->add_results({cond_res});
+    }
+    return func;
+}
+
+TEST(TransformationTests, LowLatency2_LSTM) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t);
+        auto results = body->get_results();
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+        tensor_iterator->set_friendly_name("LSTMTensorIterator");
+
+        tensor_iterator->set_merged_input(C_t, C_init, results[2]);
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(H_t, H_init, results[0]);
+
+        tensor_iterator->get_iter_value(results[0], -1);
+        tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
+                                               ParameterVector{X, H_init, C_init});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
+        auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
+        f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_GRU) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(384 * 16, 0);
+        auto r_val = std::vector<float>(384 * 128, 0);
+        auto b_val = std::vector<float>(384, 0);
+        auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{384}, b_val);
+
+        auto gru_cell = std::make_shared<GRUCell>(squeeze, Yi, W, R, B, 128);
+        auto res_1 = std::make_shared<Result>(gru_cell);
+        auto unsqueeze = std::make_shared<Unsqueeze>(gru_cell, axis);
+        auto res_2 = std::make_shared<Result>(unsqueeze);
+        auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi});
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(Yi, Y, res_1);
+
+        auto out0 = tensor_iterator->get_iter_value(res_1, -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("GRUTensorIterator/variable0");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(384 * 16, 0);
+        auto r_val = std::vector<float>(384 * 128, 0);
+        auto b_val = std::vector<float>(384, 0);
+        auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{384}, b_val);
+
+        auto rnn_cell = std::make_shared<GRUCell>(squeeze, read_value_H, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
+        auto res_1 = std::make_shared<Result>(assign_H);
+        auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        f_ref = std::make_shared<Function>(ResultVector {res_2}, ParameterVector{Xi, H_t});
+        f_ref->add_sinks({assign_H});
+        assign_H->add_control_dependency(read_value_H);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_RNN) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(128 * 16, 0);
+        auto r_val = std::vector<float>(128 * 128, 0);
+        auto b_val = std::vector<float>(128, 0);
+        auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{128}, b_val);
+
+        auto rnn_cell = std::make_shared<RNNCell>(squeeze, Yi, W, R, B, 128);
+        auto res_1 = std::make_shared<Result>(rnn_cell);
+        auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell, axis);
+        auto res_2 = std::make_shared<Result>(unsqueeze);
+        auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi,
+                                                                                                   Yi});
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(Yi, Y, res_1);
+
+        auto out0 = tensor_iterator->get_iter_value(res_1, -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("RNNTensorIterator/variable0");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(128 * 16, 0);
+        auto r_val = std::vector<float>(128 * 128, 0);
+        auto b_val = std::vector<float>(128, 0);
+        auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{128}, b_val);
+
+        auto rnn_cell = std::make_shared<RNNCell>(squeeze, read_value_H, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
+        auto res_1 = std::make_shared<Result>(assign_H);
+        auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        f_ref = std::make_shared<Function>(ResultVector{res_2}, ParameterVector{Xi, H_t});
+        f_ref->add_sinks({assign_H});
+        assign_H->add_control_dependency(read_value_H);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_LSTMReshape) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{2, 1, 16});
+        auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t);
+        auto results = body->get_results();
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_merged_input(C_t, C, results[2]);
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(H_t, H, results[0]);
+
+        auto out0 = tensor_iterator->get_iter_value(results[0], -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
+                                                                                                               C});
+
+        // Reshape
+        // change the number of iteration of TI. 2 -> 1
+        auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        f->replace_parameter(0, new_X);
+        f->validate_nodes_and_infer_types();
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
+        auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
+        f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_LSTM_Loop) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t, true);
+        auto results = body->get_results();
+
+        auto trip_count =
+                std::make_shared<Constant>(element::i64, Shape{}, 1);
+        auto exec_condition =
+                std::make_shared<Constant>(element::boolean, Shape{}, true);
+        auto loop = std::make_shared<Loop>(trip_count, exec_condition);
+        loop->set_special_body_ports({-1, 3});
+        loop->set_function(body);
+        loop->set_friendly_name("LSTMLoop");
+
+        loop->set_merged_input(C_t, C_init, results[2]);
+        loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        loop->set_merged_input(H_t, H_init, results[0]);
+
+        auto out0 = loop->get_iter_value(results[0], -1);
+        auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(loop->output(1));
+        auto res_ti_2 = std::make_shared<Result>(loop->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
+                                               ParameterVector{X, H_init, C_init});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
+        auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
+        f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_LSTM_several_iterations) {
+    constexpr int ITER_CNT = 5;
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
+        auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t);
+        auto results = body->get_results();
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_merged_input(C_t, C, results[2]);
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(H_t, H, results[0]);
+
+        auto out0 = tensor_iterator->get_iter_value(results[0], -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
+                                                                                       C});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    // TensorIterator not unrolled.
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
+        auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
+
+        // Body
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
+        auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell, axis);
+        auto res_2 = std::make_shared<Result>(unsqueeze);
+        auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
+        auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
+                                               ParameterVector{Xi, H_t, C_t});
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_merged_input(C_t, read_value_C, res_3);
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(H_t, read_value_H, res_1);
+
+        auto out0 = tensor_iterator->get_iter_value(res_1, -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
+        auto out2 = tensor_iterator->get_iter_value(res_3, -1);
+
+        auto assign_H = std::make_shared<Assign>(out0, variable_H);
+        auto assign_C = std::make_shared<Assign>(out2, variable_C);
+        auto outer_res_2 = std::make_shared<Result>(out1);
+        auto outer_res_1 = std::make_shared<Result>(out0);
+        f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
+        auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t, true);
+        auto results = body->get_results();
+
+        auto shape_of = std::make_shared<ShapeOf>(X);
+        const auto trip_count = std::make_shared<Gather>(shape_of, Constant::create(ngraph::element::i64, {1}, {0}),
+                                                         Constant::create(ngraph::element::i64, {1}, {0}));
+        auto exec_condition =
+                std::make_shared<Constant>(element::boolean, Shape{}, true);
+        auto loop = std::make_shared<Loop>(trip_count, exec_condition);
+        loop->set_special_body_ports({-1, 3});
+        loop->set_function(body);
+        loop->set_friendly_name("LSTMLoop");
+
+        loop->set_merged_input(C_t, C_init, results[2]);
+        loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        loop->set_merged_input(H_t, H_init, results[0]);
+
+        auto out0 = loop->get_iter_value(results[0], -1);
+        auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(loop->output(1));
+        auto res_ti_2 = std::make_shared<Result>(loop->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
+                                       ParameterVector{X, H_init, C_init});
+
+        // Reshape
+        // change the number of iteration of Loop. 10 -> 1
+        auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        f->replace_parameter(0, new_X);
+        f->validate_nodes_and_infer_types();
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>();
+
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
+        auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
+        auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
+        auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
+        f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+
+TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
+        auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto body = createLSTMBody(Xi, H_t, C_t, true);
+        auto results = body->get_results();
+
+        auto trip_count =
+                std::make_shared<Constant>(element::i64, Shape{}, 10);
+        auto exec_condition =
+                std::make_shared<Constant>(element::boolean, Shape{}, true);
+        auto loop = std::make_shared<Loop>(trip_count, exec_condition);
+        loop->set_special_body_ports({-1, 3});
+        loop->set_function(body);
+        loop->set_friendly_name("LSTMLoop");
+
+        loop->set_merged_input(C_t, C_init, results[2]);
+        loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        loop->set_merged_input(H_t, H_init, results[0]);
+
+        auto out0 = loop->get_iter_value(results[0], -1);
+        auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(loop->output(1));
+        auto res_ti_2 = std::make_shared<Result>(loop->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
+                                       ParameterVector{X, H_init, C_init});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::LowLatency2>(true);
+
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
+        auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        const std::string variable_name_H("LSTMTensorIterator/variable0");
+        const std::string variable_name_C("LSTMTensorIterator/variable1");
+        auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
+        auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
+        auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
+        auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
+
+        // Body
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
+        auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(unsqueeze);
+        auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
+        auto body_condition = std::make_shared<Constant>(
+                element::boolean, Shape{1}, true);
+        auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3, body_condition},
+                                               ParameterVector{Xi, H_t, C_t});
+
+        auto trip_count =
+                std::make_shared<Constant>(element::i64, Shape{}, 10);
+        auto exec_condition =
+                std::make_shared<Constant>(element::boolean, Shape{}, true);
+        auto loop = std::make_shared<Loop>(trip_count, exec_condition);
+        loop->set_special_body_ports({-1, 3});
+        loop->set_function(body);
+        loop->set_friendly_name("LSTMLoop");
+
+        loop->set_merged_input(C_t, read_value_C, res_3);
+        loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        loop->set_merged_input(H_t, read_value_H, res_1);
+
+        auto out0 = loop->get_iter_value(res_1, -1);
+        auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
+        auto out3 = loop->get_iter_value(res_3, -1);
+
+        auto assign_H = std::make_shared<Assign>(out0, variable_H);
+        auto assign_C = std::make_shared<Assign>(out3, variable_C);
+        auto outer_res_2 = std::make_shared<Result>(out1);
+        auto outer_res_1 = std::make_shared<Result>(out0);
+        f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
+        f_ref->add_sinks({assign_C, assign_H});
+        assign_H->add_control_dependency(read_value_H);
+        assign_C->add_control_dependency(read_value_C);
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
+        auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+        auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
+
+        // Body
+        auto axis = Constant::create(element::i64, Shape{}, {0});
+        auto squeeze = std::make_shared<Squeeze>(Xi, axis);
+
+        auto w_val = std::vector<float>(512 * 16, 0);
+        auto r_val = std::vector<float>(512 * 128, 0);
+        auto b_val = std::vector<float>(512, 0);
+        auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
+        auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
+        auto B = Constant::create(element::f32, Shape{512}, b_val);
+
+        auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
+        auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
+        auto res_2 = std::make_shared<Result>(unsqueeze);
+        auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
+        auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t});
+
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+        tensor_iterator->set_friendly_name("LSTMTensorIterator");
+
+        tensor_iterator->set_merged_input(C_t, C_init, res_3);
+        tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
+        tensor_iterator->set_merged_input(H_t, H_init, res_1);
+
+        auto out0 = tensor_iterator->get_iter_value(res_1, -1);
+        auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
+
+        auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
+        auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
+        f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
+                                               ParameterVector{X, H_init, C_init});
+
+        auto f_2 = ngraph::clone_function(*f);
+        pass::Manager manager_2;
+        manager_2.register_pass<pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
+        manager_2.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
+        EXPECT_NO_THROW(manager_2.run_passes(f_2));
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        NGRAPH_SUPPRESS_DEPRECATED_START
+        manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END
+        // LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted
+        // but unrolls TI/Loop
+        manager.register_pass<pass::LowLatency2>();
+
+        EXPECT_NO_THROW(manager.run_passes(f));
+    }
+}
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
index 750a0e4af5e..9ab20c3eda4 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp
@@ -10,6 +10,13 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
+std::vector<ngraph::helpers::MemoryTransformation> transformation {
+        ngraph::helpers::MemoryTransformation::NONE,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT,
+};
+
 const std::vector<InferenceEngine::SizeVector> inShapes = {
         {3},
         {100, 100},
@@ -27,6 +34,7 @@ const std::vector<int64_t> iterationCount {
 
 INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
         ::testing::Combine(
+                ::testing::ValuesIn(transformation),
                 ::testing::ValuesIn(iterationCount),
                 ::testing::ValuesIn(inShapes),
                 ::testing::ValuesIn(inputPrecisions),
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp
new file mode 100644
index 00000000000..8e975434347
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <subgraph_tests/memory_LSTMCell.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+    std::vector<ngraph::helpers::MemoryTransformation> transformation {
+            ngraph::helpers::MemoryTransformation::NONE,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
+    };
+
+    std::vector<size_t> input_sizes = {
+            80,
+            32,
+            64,
+            100,
+            25
+    };
+
+    std::vector<size_t> hidden_sizes = {
+            128,
+            200,
+            300,
+            24,
+            32,
+    };
+
+    std::map<std::string, std::string> additional_config = {
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(transformation),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                    ::testing::Values(InferenceEngine::Precision::FP32),
+                                    ::testing::ValuesIn(input_sizes),
+                                    ::testing::ValuesIn(hidden_sizes),
+                                    ::testing::Values(additional_config)),
+                            MemoryLSTMCellTest::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
index 3468d2b417f..4c18ee6ea05 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
@@ -7,6 +7,15 @@
 
 namespace SubgraphTestsDefinitions {
 namespace {
+
+std::vector<ngraph::helpers::MemoryTransformation> transformation {
+    ngraph::helpers::MemoryTransformation::NONE,
+    ngraph::helpers::MemoryTransformation::LOW_LATENCY,
+    ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
+    ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+    ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
+};
+
 std::vector<size_t> input_sizes = {
     80,
     32,
@@ -28,6 +37,7 @@ std::map<std::string, std::string> additional_config = {
 
 INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
     ::testing::Combine(
+        ::testing::ValuesIn(transformation),
         ::testing::Values(CommonTestUtils::DEVICE_CPU),
         ::testing::Values(InferenceEngine::Precision::FP32),
         ::testing::ValuesIn(input_sizes),
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp
index 2ab1357f674..c04b76705cc 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp
@@ -10,9 +10,17 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
+std::vector<ngraph::helpers::MemoryTransformation> transformation {
+        ngraph::helpers::MemoryTransformation::NONE,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT
+};
+
 const std::vector<InferenceEngine::SizeVector> inShapes = {
         {1, 1},
-        {1, 2}
+        {1, 2},
+        {1, 10}
 };
 
 const std::vector<InferenceEngine::Precision> inputPrecisions = {
@@ -22,11 +30,13 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
 const std::vector<int64_t> iterationCount {
     1,
     3,
+    4,
     10
 };
 
 INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
                         ::testing::Combine(
+                                ::testing::ValuesIn(transformation),
                                 ::testing::ValuesIn(iterationCount),
                                 ::testing::ValuesIn(inShapes),
                                 ::testing::ValuesIn(inputPrecisions),
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
index 9f2c05ab6d1..cb4cc459a95 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -64,5 +64,13 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*CachingSupport.*_batch2_.*)",
         // TODO: Issue 51525
         R"(.*CachingSupport.*KSOFunction.*)",
+        // TODO: Issue 57363 (Param -> Result subgraphs)
+        R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
+        // TODO: Issue 57368 (accuracy)
+        R"(.*smoke_MemoryTest.*LOW_LATENCY.*IS=\(1.10\).*)",
+        R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)",
+        R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)",
+        R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)",
+        R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
     };
 }
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp
index a8d651f6c80..5818a40d4ec 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp
@@ -6,6 +6,14 @@
 #include "common_test_utils/test_constants.hpp"
 
 namespace SubgraphTestsDefinitions {
+    std::vector<ngraph::helpers::MemoryTransformation> transformation {
+            ngraph::helpers::MemoryTransformation::NONE,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+            ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
+    };
+
     std::vector<size_t> input_sizes = {
         80,
         32,
@@ -30,6 +38,7 @@ namespace SubgraphTestsDefinitions {
 
     INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
         ::testing::Combine(
+            ::testing::ValuesIn(transformation),
             ::testing::Values(CommonTestUtils::DEVICE_GNA),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::ValuesIn(input_sizes),
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
index 2d94617991d..d59c023773b 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp
@@ -7,6 +7,15 @@
 
 namespace SubgraphTestsDefinitions {
 namespace {
+
+std::vector<ngraph::helpers::MemoryTransformation> transformation {
+        ngraph::helpers::MemoryTransformation::NONE,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
+        ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
+};
+
 std::vector<size_t> input_sizes = {
     80,
     32,
@@ -31,6 +40,7 @@ std::map<std::string, std::string> additional_config = {
 
 INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
     ::testing::Combine(
+        ::testing::ValuesIn(transformation),
         ::testing::Values(CommonTestUtils::DEVICE_GNA),
         ::testing::Values(InferenceEngine::Precision::FP32),
         ::testing::ValuesIn(input_sizes),
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp
index 17bf8d31cf0..7ae1d21a886 100644
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp
@@ -39,7 +39,7 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
 
     // Apply LowLatency and UnrollTensorIterator transformations
     ngraph::pass::Manager manager;
-    manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
+    manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
     manager.run_passes(function);
     LoadNetwork();
     IE_SUPPRESS_DEPRECATED_START
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp
index 947f0fa1e1a..a6f250301b6 100644
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp
@@ -12,12 +12,4 @@ TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
     Run();
 };
 
-TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
-    RunLowLatency();
-};
-
-TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
-    RunLowLatency(true);
-};
-
 } // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp
index 5dd18ff4463..0135c09e170 100644
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp
@@ -12,12 +12,4 @@ TEST_P(MultipleLSTMCellTest, CompareWithRefs) {
     Run();
 };
 
-TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
-    RunLowLatency();
-};
-
-TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
-    RunLowLatency(true);
-};
-
 } // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp
index ca16e30148e..59ad6c54e5a 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp
@@ -14,6 +14,7 @@
 namespace LayerTestsDefinitions {
 
 using MemoryTestParams = std::tuple<
+        ngraph::helpers::MemoryTransformation,   // Apply Memory transformation
         int64_t,                            // iterationCount
         InferenceEngine::SizeVector,        // inputShape
         InferenceEngine::Precision,         // netPrecision
@@ -28,9 +29,17 @@ protected:
     std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
     void SetUp() override;
 private:
+    void CreateTIFunc();
+    void CreateCommonFunc();
+    void ApplyLowLatency();
+
     InferenceEngine::Precision netPrecision;
     ngraph::EvaluationContext eval_context;
+    ngraph::helpers::MemoryTransformation transformation;
+
     int64_t iteration_count;
+    ngraph::element::Type ngPrc;
+    InferenceEngine::SizeVector inputShape;
 };
 
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp
index 489431e2a9b..cc0aeb26e2f 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp
@@ -10,6 +10,7 @@
 
 namespace SubgraphTestsDefinitions {
 typedef std::tuple<
+    ngraph::helpers::MemoryTransformation,   // Apply Memory transformation
     std::string,                        // Target device name
     InferenceEngine::Precision,         // Network precision
     size_t,                             // Input size
@@ -21,9 +22,13 @@ class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
     public testing::WithParamInterface<memoryLSTMCellParams> {
 private:
     // you have to Unroll TI manually and remove memory untill ngraph supports it
+    // since we switching models we need to generate and save weights biases and inputs in SetUp
     void switchToNgraphFriendlyModel();
     void CreatePureTensorIteratorModel();
-    // since we switching models we need to generate and save weights biases and inputs in SetUp
+    void InitMemory();
+    void ApplyLowLatency();
+
+    ngraph::helpers::MemoryTransformation transformation;
     std::vector<float> input_bias;
     std::vector<float> input_weights;
     std::vector<float> hidden_memory_init;
@@ -34,7 +39,6 @@ private:
 protected:
     void SetUp() override;
     void Run() override;
-    void RunLowLatency(bool regular_api = false);
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
 };
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp
index 7c1e72a7bb7..7932ba39a07 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp
@@ -10,6 +10,7 @@
 
 namespace SubgraphTestsDefinitions {
 typedef std::tuple<
+    ngraph::helpers::MemoryTransformation,   // Apply Memory transformation
     std::string,                        // Target device name
     InferenceEngine::Precision,         // Network precision
     size_t,                             // Input size
@@ -21,9 +22,12 @@ class MultipleLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
     public testing::WithParamInterface<multipleLSTMCellParams> {
 private:
     // you have to Unroll TI manually and remove memory untill ngraph supports it
+    // since we switching models we need to generate and save weights biases and inputs in SetUp
     void switchToNgraphFriendlyModel();
     void CreatePureTensorIteratorModel();
-    // since we switching models we need to generate and save weights biases and inputs in SetUp
+    void InitMemory();
+    void ApplyLowLatency();
+
     size_t hiddenSize;
     std::vector<float> input_bias;
     std::vector<float> input_weights;
@@ -33,10 +37,10 @@ private:
     std::vector<float> weights_2_vals;
     std::vector<float> reccurrenceWeights_vals;
     std::vector<float> bias_vals;
+    ngraph::helpers::MemoryTransformation transformation;
 protected:
     void SetUp() override;
     void Run() override;
-    void RunLowLatency(bool regular_api = false);
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj);
 };
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp
index 0984a4dbeab..c059768c4a2 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp
@@ -3,10 +3,18 @@
 //
 
 #include <signal.h>
+#include <ie_transformations.hpp>
+#include <transformations/control_flow/unroll_tensor_iterator.hpp>
+#include <transformations/serialize.hpp>
+#include <functional_test_utils/core_config.hpp>
 #include "ngraph/opsets/opset7.hpp"
 #include "ngraph_functions/builders.hpp"
+#include "ngraph/pass/low_latency.hpp"
 #include "shared_test_classes/single_layer/memory.hpp"
 
+using namespace ngraph;
+using namespace opset7;
+
 namespace LayerTestsDefinitions {
 
     std::string MemoryTest::getTestCaseName(const testing::TestParamInfo<MemoryTestParams> &obj) {
@@ -14,9 +22,11 @@ namespace LayerTestsDefinitions {
         InferenceEngine::Precision netPrecision;
         InferenceEngine::SizeVector inputShape;
         std::string targetDevice;
-        std::tie(iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
+        ngraph::helpers::MemoryTransformation transformation;
+        std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
 
         std::ostringstream result;
+        result << "transformation=" << transformation << "_";
         result << "iteration_count=" << iteration_count << "_";
         result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
         result << "netPRC=" << netPrecision.name() << "_";
@@ -26,20 +36,17 @@ namespace LayerTestsDefinitions {
     }
 
     void MemoryTest::SetUp() {
-        using namespace ngraph;
-        InferenceEngine::SizeVector inputShape;
-        std::tie(iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
-        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
+        ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
-        auto param = ngraph::builder::makeParams(ngPrc, {inputShape});
-        auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
-        auto read_value = std::make_shared<opset7::ReadValue>(param.at(0), variable);
-        auto add = std::make_shared<opset7::Add>(read_value, param.at(0));
-        auto assign = std::make_shared<opset7::Assign>(add, variable);
-        auto res = std::make_shared<opset7::Result>(add);
-        function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
+        if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
+            CreateCommonFunc();
+        } else {
+            CreateTIFunc();
+            ApplyLowLatency();
+        }
 
-        auto hostTensor = std::make_shared<ngraph::HostTensor>(ngPrc, inputShape);
+        auto hostTensor = std::make_shared<HostTensor>(ngPrc, inputShape);
         auto variable_context = std::make_shared<VariantWrapper<VariableContext>>(VariableContext());
         auto variable_value = std::make_shared<VariableValue>(hostTensor);
         variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value);
@@ -48,6 +55,7 @@ namespace LayerTestsDefinitions {
 
 
     void MemoryTest::Run() {
+        SKIP_IF_CURRENT_TEST_IS_DISABLED()
         using namespace LayerTestsUtils;
         auto crashHandler = [](int errCode) {
             auto &s = Summary::getInstance();
@@ -68,7 +76,13 @@ namespace LayerTestsDefinitions {
         }
 
         try {
-            LoadNetwork();
+            if (transformation != ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
+                LoadNetwork();
+            } else {
+                CoreConfiguration(this);
+                ConfigureNetwork();
+                executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
+            }
             GenerateInputs();
             for (int64_t i = 0; i < iteration_count; ++i) {
                 Infer();
@@ -88,12 +102,12 @@ namespace LayerTestsDefinitions {
         }
     }
 
-    std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
+    std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
         using namespace ngraph;
         function->validate_nodes_and_infer_types();
 
         auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
-        auto refInputsTypes = std::vector<ngraph::element::Type>(inputs.size());
+        auto refInputsTypes = std::vector<element::Type>(inputs.size());
         HostTensorVector inputTensors;
         for (auto & input : inputs) {
             const auto &dataSize = input->byteSize();
@@ -104,17 +118,25 @@ namespace LayerTestsDefinitions {
             const auto lockedMemory = memory->wmap();
             const auto buffer = lockedMemory.as<const std::uint8_t *>();
 
-            auto hostTensor = std::make_shared<ngraph::HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
+            auto hostTensor = std::make_shared<HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
                                                                    tensorDesc.getDims());
             hostTensor->write(buffer, dataSize);
             inputTensors.push_back(hostTensor);
         }
 
+        // evaluate method is not implemented for TI op.
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::UnrollTensorIterator>();
+        manager.run_passes(function);
+
         const auto &outInfo = executableNetwork.GetOutputsInfo();
-        HostTensorVector outputTensors(outInfo.size(), std::make_shared<ngraph::HostTensor>());
+        HostTensorVector outputTensors(outInfo.size());
+        for (auto& outTensor : outputTensors) {
+            outTensor = std::make_shared<HostTensor>();
+        }
         function->evaluate(outputTensors, inputTensors, eval_context);
 
-        std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
+        std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
         for (size_t idx = 0; idx < outInfo.size(); ++idx) {
             outputs[idx].first = outputTensors[idx]->get_element_type();
             outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes());
@@ -123,5 +145,61 @@ namespace LayerTestsDefinitions {
         return outputs;
     }
 
+    void MemoryTest::CreateTIFunc() {
+        auto param = builder::makeParams(ngPrc, {inputShape}).at(0);
+        std::vector<std::vector<size_t>> shape = {{static_cast<size_t>(iteration_count), 1}};
+        auto iter_count = builder::makeParams(ngPrc, shape).at(0);
+
+        // Body
+        auto X = builder::makeParams(ngPrc, {inputShape}).at(0);
+        auto Y = builder::makeParams(ngPrc, {inputShape}).at(0);
+        auto Iter = builder::makeParams(ngPrc, {Shape{1, 1}}).at(0);
+        auto add = std::make_shared<Add>(X, Y);
+        auto res = std::make_shared<Result>(add);
+        auto Iter_res = std::make_shared<Result>(Iter);
+        auto body = std::make_shared<Function>(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter});
+
+        // TI construction
+        auto tensor_iterator = std::make_shared<TensorIterator>();
+        tensor_iterator->set_body(body);
+
+        tensor_iterator->set_merged_input(X, param, res);
+        tensor_iterator->set_invariant_input(Y, param);
+        tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0);
+
+        auto output = tensor_iterator->get_iter_value(res, -1);
+        auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0);
+        function = std::make_shared<Function>(OutputVector{output, output_iter},
+                                              ParameterVector{param, iter_count},
+                                              "PureTI");
+    }
+
+    void MemoryTest::CreateCommonFunc() {
+        auto param = builder::makeParams(ngPrc, {inputShape});
+        auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
+        auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
+        auto add = std::make_shared<Add>(read_value, param.at(0));
+        auto assign = std::make_shared<Assign>(add, variable);
+        auto res = std::make_shared<Result>(add);
+        function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
+    }
+
+    void MemoryTest::ApplyLowLatency() {
+       if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
+           function->validate_nodes_and_infer_types();
+           pass::Manager manager;
+           manager.register_pass<pass::LowLatency2>();
+           manager.run_passes(function);
+       } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) {
+           function->validate_nodes_and_infer_types();
+           pass::Manager manager;
+           manager.register_pass<pass::LowLatency2>(false);
+           manager.run_passes(function);
+        } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
+            cnnNetwork = InferenceEngine::CNNNetwork{function};
+           InferenceEngine::lowLatency2(cnnNetwork, iteration_count);
+        }
+    }
+
 }  // namespace LayerTestsDefinitions
 
diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp
index a8d5c067334..bac0c293add 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp
@@ -9,6 +9,9 @@
 #include "ngraph_functions/builders.hpp"
 #include "shared_test_classes/subgraph/memory_LSTMCell.hpp"
 
+using namespace ngraph;
+using namespace opset7;
+
 namespace SubgraphTestsDefinitions {
 
     std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
@@ -17,9 +20,11 @@ namespace SubgraphTestsDefinitions {
         size_t inputSize;
         size_t hiddenSize;
         std::map<std::string, std::string> config;
-        std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
+        ngraph::helpers::MemoryTransformation transformation;
+        std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
         std::ostringstream result;
 
+        result << "transformation=" << transformation << "_";
         result << "netPrecision=" << netPrecision.name() << "_";
         result << "IS=" << inputSize << "_";
         result << "HS=" << hiddenSize << "_";
@@ -34,7 +39,7 @@ namespace SubgraphTestsDefinitions {
         InferenceEngine::Precision netPrecision;
         std::map<std::string, std::string> config;
         size_t inputSize;
-        std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+        std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
         configuration.insert(config.begin(), config.end());
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
@@ -51,49 +56,53 @@ namespace SubgraphTestsDefinitions {
         reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
         bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f);
 
-        auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+        auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-        auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-        auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+        auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+        auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-        auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-        auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+        auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-        auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+        auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
-        auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
-        auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
+        auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
+        auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
 
-        auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
-        auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
+        auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+        auto var_cell =
+                std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
+        auto var_hidden =
+                std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
+        auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
 
-        auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
-        auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
+        auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+        auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
 
         // Body - inputs
-        auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
-        auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-        auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+        auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
+        auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+        auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
         // Body - layers
-        auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
+        auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
 
-        auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-        auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-        auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-        auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
+        auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+        auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+        auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+        auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
 
-        auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+        auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
         // body - outputs
         auto H_o = lstm->output(0);
         auto C_o = lstm->output(1);
         auto unsqueeze_o = unsqueeze->output(0);
 
-        auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
+        auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
         // TI construction
-        auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+        auto tensor_iterator = std::make_shared<TensorIterator>();
         tensor_iterator->set_body(body);
         tensor_iterator->set_invariant_input(X, permute_in);
         tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
@@ -107,27 +116,27 @@ namespace SubgraphTestsDefinitions {
         out_hidden.get_tensor().set_element_type(ngPrc);
         out_cell.get_tensor().set_element_type(ngPrc);
 
-        auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
-        auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
+        auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
+        auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
 
-        auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
+        auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
                                                                                 std::vector<size_t>({1, 1, 1, hiddenSize}));
-        auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
+        auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
 
         cell_memory_write->add_control_dependency(cell_memory_read);
-        final_reshape->add_control_dependency(cell_memory_write);
-
         hidden_memory_write->add_control_dependency(hidden_memory_read);
-        final_reshape->add_control_dependency(hidden_memory_write);
 
-        function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
+        function = std::make_shared<Function>(OutputVector{final_reshape},
+                                              SinkVector{cell_memory_write, hidden_memory_write},
+                                              input_parameter,
+                                              "TI_with_memory");
     }
 
     void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
         InferenceEngine::Precision netPrecision;
         std::map<std::string, std::string> config;
         size_t inputSize;
-        std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+        std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
         std::vector<size_t> input_dims { 1, inputSize };
@@ -135,46 +144,46 @@ namespace SubgraphTestsDefinitions {
         std::vector<size_t> hidden_memory_dims {1, hiddenSize};
         std::vector<size_t> cell_memory_dims {1, hiddenSize};
 
-        auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+        auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-        auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-        auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+        auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+        auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-        auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-        auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+        auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-        auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+        auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
-        auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+        auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-        auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+        auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
         // Body - layers
-        auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
+        auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
 
-        auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-        auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-        auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-        auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
+        auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+        auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+        auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+        auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
                                                                reccurrenceWeightsNode, biasNode, hiddenSize);
 
-        auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+        auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
 
-        auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                            ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-        auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, final_reshape_pattern, false);
+        auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
+                                                                            Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+        auto final_reshape = std::make_shared<Reshape>(unsqueeze, final_reshape_pattern, false);
 
-        function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
+        function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
     }
 
     void MemoryLSTMCellTest::CreatePureTensorIteratorModel() {
         InferenceEngine::Precision netPrecision;
         std::map<std::string, std::string> config;
         size_t inputSize;
-        std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+        std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
         std::vector<size_t> input_dims { 1, inputSize };
@@ -182,49 +191,49 @@ namespace SubgraphTestsDefinitions {
         std::vector<size_t> hidden_memory_dims {1, hiddenSize};
         std::vector<size_t> cell_memory_dims {1, hiddenSize};
 
-        auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+        auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-        auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-        auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+        auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+        auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-        auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-        auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+        auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-        auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+        auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
-        auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
-        auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
+        auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
+        auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
 
-        auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+        auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-        auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+        auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
         // Body - inputs
-        auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
-        auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-        auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+        auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
+        auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+        auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
         H_t->set_friendly_name("hidden_state_1");
         C_t->set_friendly_name("cell_state_1");
         // Body - layers
-        auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
+        auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
 
-        auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-        auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-        auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-        auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
+        auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+        auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+        auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+        auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
 
-        auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-        auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+        auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+        auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
         // body - outputs
         auto H_o = lstm->output(0);
         auto C_o = lstm->output(1);
         auto unsqueeze_o = unsqueeze->output(0);
 
-        auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
+        auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
         // TI construction
-        auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+        auto tensor_iterator = std::make_shared<TensorIterator>();
         tensor_iterator->set_body(body);
         tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
         tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
@@ -237,56 +246,35 @@ namespace SubgraphTestsDefinitions {
         out_hidden.get_tensor().set_element_type(ngPrc);
         out_cell.get_tensor().set_element_type(ngPrc);
 
-        auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
+        auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
                                                                                 std::vector<size_t>({1, 1, 1, hiddenSize}));
-        auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
+        auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
 
-        function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
+        function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
     }
 
     void MemoryLSTMCellTest::Run() {
         SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-        IE_SUPPRESS_DEPRECATED_START
-        LoadNetwork();
-        auto states = executableNetwork.QueryState();
-        for (auto& state : states) {
-            auto name = state.GetName();
-            if (name == "cell_memory") {
-                auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
-                                                                           cell_memory_init.data(), cell_memory_init.size());
-                state.SetState(blob);
-            } else if (name == "hidden_memory") {
-                auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
-                                                                           hidden_memory_init.data(), hidden_memory_init.size());
-                state.SetState(blob);
-            } else {
-                GTEST_FAIL() << "unknown memory state";
-            }
+        if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
+            ApplyLowLatency();
+        } else {
+            LoadNetwork();
         }
-        IE_SUPPRESS_DEPRECATED_END
+
+        InitMemory();
         GenerateInputs();
         Infer();
-        switchToNgraphFriendlyModel();
+
+        // Calculate ref values
+        if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
+            switchToNgraphFriendlyModel();
+        } else {
+            CreatePureTensorIteratorModel();
+        }
         Validate();
     }
 
-    void MemoryLSTMCellTest::RunLowLatency(bool regular_api) {
-        SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-        CreatePureTensorIteratorModel();
-        if (regular_api) {
-            cnnNetwork = InferenceEngine::CNNNetwork{function};
-            InferenceEngine::LowLatency(cnnNetwork);
-            ConfigureNetwork();
-            executableNetwork = core->LoadNetwork(static_cast<const InferenceEngine::CNNNetwork>(cnnNetwork), targetDevice, configuration);
-        } else {
-            // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
-            ngraph::pass::Manager manager;
-            manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
-            manager.run_passes(function);
-            LoadNetwork();
-        }
+    void MemoryLSTMCellTest::InitMemory() {
         IE_SUPPRESS_DEPRECATED_START
         auto states = executableNetwork.QueryState();
         for (auto& state : states) {
@@ -304,13 +292,52 @@ namespace SubgraphTestsDefinitions {
             }
         }
         IE_SUPPRESS_DEPRECATED_END
-        GenerateInputs();
-        Infer();
+    }
 
+    void MemoryLSTMCellTest::ApplyLowLatency() {
+        // Calculate values after LowLatency transformation
         CreatePureTensorIteratorModel();
-        ngraph::pass::Manager manager_2;
-        manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
-        manager_2.run_passes(function);
-        Validate();
+        if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
+            function->validate_nodes_and_infer_types();
+            // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
+            pass::Manager manager;
+            NGRAPH_SUPPRESS_DEPRECATED_START
+            manager.register_pass<ngraph::pass::LowLatency>();
+            NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
+            manager.run_passes(function);
+            bool ti_found = helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, true);
+            LoadNetwork();
+        } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
+            function->validate_nodes_and_infer_types();
+            // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
+
+            pass::Manager manager;
+            manager.register_pass<pass::LowLatency2>();
+            manager.run_passes(function);
+            bool ti_found = helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, false);
+            LoadNetwork();
+        } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
+            cnnNetwork = InferenceEngine::CNNNetwork{function};
+            IE_SUPPRESS_DEPRECATED_START
+            InferenceEngine::LowLatency(cnnNetwork);
+            IE_SUPPRESS_DEPRECATED_END
+
+            bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
+            EXPECT_EQ(ti_found, true);
+
+            ConfigureNetwork();
+            executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
+        } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
+            cnnNetwork = InferenceEngine::CNNNetwork{function};
+            InferenceEngine::lowLatency2(cnnNetwork);
+
+            bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
+            EXPECT_EQ(ti_found, false);
+
+            ConfigureNetwork();
+            executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
+        }
     }
 }  // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp
index d854f704930..09f8020df41 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp
@@ -2,16 +2,19 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ngraph/opsets/opset5.hpp"
+#include "ie_transformations.hpp"
+#include "ngraph/opsets/opset7.hpp"
+#include "ngraph/op/util/variable_context.hpp"
 #include "ngraph/pass/low_latency.hpp"
 
-#include "ie_transformations.hpp"
-#include "transformations/control_flow/unroll_tensor_iterator.hpp"
-
 #include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
 
 #include "shared_test_classes/subgraph/multiple_LSTMCell.hpp"
 
+using namespace ngraph;
+using namespace opset7;
+
 namespace SubgraphTestsDefinitions {
 std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj) {
     std::string targetDevice;
@@ -19,9 +22,11 @@ std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<m
     size_t inputSize;
     size_t hiddenSize;
     std::map<std::string, std::string> config;
-    std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
+    ngraph::helpers::MemoryTransformation transformation;
+    std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
     std::ostringstream result;
 
+    result << "transformation=" << transformation << "_";
     result << "netPrecision=" << netPrecision.name() << "_";
     result << "IS=" << inputSize << "_";
     result << "HS=" << hiddenSize << "_";
@@ -33,7 +38,7 @@ void MultipleLSTMCellTest::SetUp() {
     InferenceEngine::Precision netPrecision;
     std::map<std::string, std::string> config;
     size_t inputSize;
-    std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+    std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
     configuration.insert(config.begin(), config.end());
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
@@ -51,51 +56,55 @@ void MultipleLSTMCellTest::SetUp() {
     reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
     bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f);
 
-    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+    auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-    auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-    auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+    auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+    auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-    auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-    auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+    auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+    auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-    auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+    auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
-    auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
-    auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
+    auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
+    auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
 
-    auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
-    auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
+    auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto var_cell =
+            std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
+    auto var_hidden =
+            std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
+    auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
     cell_memory_read->set_friendly_name("cell_memory");
 
-    auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
-    auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
+    auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
     hidden_memory_read->set_friendly_name("hidden_memory");
 
     // Body - inputs
-    auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
-    auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-    auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+    auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
+    auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+    auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
     // Body - layers
-    auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
+    auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
 
-    auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
+    auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+    auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
 
-    auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+    auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
     // body - outputs
     auto H_o = lstm->output(0);
     auto C_o = lstm->output(1);
     auto unsqueeze_o = unsqueeze->output(0);
 
-    auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
+    auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
     // TI construction
-    auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+    auto tensor_iterator = std::make_shared<TensorIterator>();
     tensor_iterator->set_body(body);
     tensor_iterator->set_invariant_input(X, permute_in);
     tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
@@ -108,49 +117,53 @@ void MultipleLSTMCellTest::SetUp() {
     out_hidden.get_tensor().set_element_type(ngPrc);
     out_cell.get_tensor().set_element_type(ngPrc);
 
-    auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
-    auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
+    auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
+    auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
 
-    auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
+    auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
+                                                                        Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
     // End of TI 1
 
-    auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
+    auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
 
     // Second TI
-    auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
-    auto cell_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_2_constant, "cell_memory_2");
+    auto var_cell_2 =
+            std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_2"});
+    auto var_hidden_2 =
+            std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_2"});
+    auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto cell_memory_2_read = std::make_shared<ReadValue>(cell_memory_2_constant, var_cell_2);
     cell_memory_2_read->set_friendly_name("cell_memory_2");
 
-    auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
-    auto hidden_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_2_constant, "hidden_memory_2");
+    auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_2_read = std::make_shared<ReadValue>(hidden_memory_2_constant, var_hidden_2);
     hidden_memory_2_read->set_friendly_name("hidden_memory_2");
 
     // Body - inputs
-    auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
-    auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-    auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+    auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
+    auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+    auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
     // Body - layers
-    auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
+    auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
 
-    auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
-    auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
+    auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
+    auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
 
-    auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
+    auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
     // body - outputs
     auto H_o_2 = lstm_2->output(0);
     auto C_o_2 = lstm_2->output(1);
     auto unsqueeze_o_2 = unsqueeze_2->output(0);
 
-    auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
+    auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
     // TI construction
-    auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
+    auto tensor_iterator_2 = std::make_shared<TensorIterator>();
     tensor_iterator_2->set_body(body_2);
     tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze);
     tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2);
@@ -163,33 +176,28 @@ void MultipleLSTMCellTest::SetUp() {
     out_hidden_2.get_tensor().set_element_type(ngPrc);
     out_cell_2.get_tensor().set_element_type(ngPrc);
 
-    auto cell_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_cell_2, "cell_memory_2");
-    auto hidden_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_hidden_2, "hidden_memory_2");
+    auto cell_memory_2_write = std::make_shared<Assign>(out_cell_2, var_cell_2);
+    auto hidden_memory_2_write = std::make_shared<Assign>(out_hidden_2, var_hidden_2);
 
-    auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
+    auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
 
     cell_memory_write->add_control_dependency(cell_memory_read);
-    final_reshape->add_control_dependency(cell_memory_write);
-
     hidden_memory_write->add_control_dependency(hidden_memory_read);
-    final_reshape->add_control_dependency(hidden_memory_write);
-
     cell_memory_2_write->add_control_dependency(cell_memory_2_read);
-    final_reshape->add_control_dependency(cell_memory_2_write);
-
     hidden_memory_2_write->add_control_dependency(hidden_memory_2_read);
-    final_reshape->add_control_dependency(hidden_memory_2_write);
 
-    function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
+    function = std::make_shared<Function>(OutputVector {final_reshape},
+                                          SinkVector{cell_memory_write, hidden_memory_write, cell_memory_2_write, hidden_memory_2_write},
+                                          input_parameter,
+                                          "TI_with_memory");
 }
 
 void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
     InferenceEngine::Precision netPrecision;
     std::map<std::string, std::string> config;
     size_t inputSize;
-    std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+    std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
     std::vector<size_t> input_dims { 1, inputSize };
@@ -197,72 +205,72 @@ void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
     std::vector<size_t> hidden_memory_dims {1, hiddenSize};
     std::vector<size_t> cell_memory_dims {1, hiddenSize};
 
-    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+    auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-    auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-    auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+    auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+    auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-    auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-    auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+    auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+    auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-    auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+    auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
     // Body 1 - layers
-    auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-    auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
-    auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
+    auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
 
-    auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
+    auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+    auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
                                                            reccurrenceWeightsNode, biasNode, hiddenSize);
 
-    auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+    auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
 
-    auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, first_reshape_pattern, false);
+    auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
+                                                                        Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto first_reshape = std::make_shared<Reshape>(unsqueeze, first_reshape_pattern, false);
     // Body 1 - end
 
-    auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
+    auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
 
     // Body 2 - layers
-    auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-    auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
-    auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(inbetween_squeeze, squeeze_2_const);
+    auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze_2 = std::make_shared<Squeeze>(inbetween_squeeze, squeeze_2_const);
 
-    auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
-    auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
+    auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
+    auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
         reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
 
-    auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
+    auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
 
-    auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze_2, final_reshape_pattern, false);
+    auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
+        Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto final_reshape = std::make_shared<Reshape>(unsqueeze_2, final_reshape_pattern, false);
     // Body 2 - end
 
-    function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
+    function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
 }
 
 void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
     InferenceEngine::Precision netPrecision;
     std::map<std::string, std::string> config;
     size_t inputSize;
-    std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
+    std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
     std::vector<size_t> input_dims { 1, inputSize };
@@ -270,49 +278,49 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
     std::vector<size_t> hidden_memory_dims {1, hiddenSize};
     std::vector<size_t> cell_memory_dims {1, hiddenSize};
 
-    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
+    auto input_parameter = builder::makeParams(ngPrc, {input_dims});
 
-    auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
-    auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
+    auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
+    auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
 
-    auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
-    auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
+    auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
+    auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
 
-    auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
+    auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
 
-    auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
-    auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
+    auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
+    auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
 
-    auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-    auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
     // Body - inputs
-    auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
-    auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-    auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+    auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
+    auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+    auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
     H_t->set_friendly_name("hidden_state_1");
     C_t->set_friendly_name("cell_state_1");
     // Body - layers
-    auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
+    auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
 
-    auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
-    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
+    auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
+    auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
 
-    auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
+    auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
     // body - outputs
     auto H_o = lstm->output(0);
     auto C_o = lstm->output(1);
     auto unsqueeze_o = unsqueeze->output(0);
 
-    auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
+    auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
     // TI construction
-    auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+    auto tensor_iterator = std::make_shared<TensorIterator>();
     tensor_iterator->set_body(body);
     tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
     tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
@@ -326,44 +334,44 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
     out_cell.get_tensor().set_element_type(ngPrc);
     tensor_iterator->validate_and_infer_types();
 
-    auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
+    auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
+                                                                        Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
     // End of TI 1
 
-    auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
+    auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
 
     // Second TI
-    auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
+    auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
 
-    auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
+    auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
 
     // Body - inputs
-    auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
-    auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
-    auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
+    auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
+    auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
+    auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
     H_t_2->set_friendly_name("hidden_state_2");
     C_t_2->set_friendly_name("cell_state_2");
     // Body - layers
-    auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
+    auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
 
-    auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
-    auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
-    auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
-    auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
+    auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
+    auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
+    auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
+    auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
 
-    auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
-    auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
+    auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
+    auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
     // body - outputs
     auto H_o_2 = lstm_2->output(0);
     auto C_o_2 = lstm_2->output(1);
     auto unsqueeze_o_2 = unsqueeze_2->output(0);
 
-    auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
+    auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
     // TI construction
-    auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
+    auto tensor_iterator_2 = std::make_shared<TensorIterator>();
     tensor_iterator_2->set_body(body_2);
     tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0);
     tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2);
@@ -376,70 +384,17 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
     out_hidden_2.get_tensor().set_element_type(ngPrc);
     out_cell_2.get_tensor().set_element_type(ngPrc);
     tensor_iterator_2->validate_and_infer_types();
-    auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
-                                                                        ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
-    auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
+    auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
+                                                                        Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
+    auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
 
-    function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
+    function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
 }
 
-void MultipleLSTMCellTest::Run() {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+void MultipleLSTMCellTest::InitMemory() {
     InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
                                                   InferenceEngine::SizeVector({1, hiddenSize}),
                                                   InferenceEngine::Layout::NC);
-    LoadNetwork();
-    IE_SUPPRESS_DEPRECATED_START
-    auto states = executableNetwork.QueryState();
-    for (auto& state : states) {
-        auto name = state.GetName();
-        if (name == "cell_memory") {
-            auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
-                                                                       cell_memory_init.data(), cell_memory_init.size());
-            state.SetState(blob);
-        } else if (name == "hidden_memory") {
-            auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
-                                                                       hidden_memory_init.data(), hidden_memory_init.size());
-            state.SetState(blob);
-        } else if (name == "cell_memory_2") {
-            auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
-                cell_memory_init.data(), cell_memory_init.size());
-            state.SetState(blob);
-        } else if (name == "hidden_memory_2") {
-            auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
-                hidden_memory_init.data(), hidden_memory_init.size());
-            state.SetState(blob);
-        } else {
-            GTEST_FAIL() << "unknown memory state";
-        }
-    }
-    IE_SUPPRESS_DEPRECATED_END
-    GenerateInputs();
-    Infer();
-    switchToNgraphFriendlyModel();
-    Validate();
-}
-
-void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
-                                                  InferenceEngine::SizeVector({1, hiddenSize}),
-                                                  InferenceEngine::Layout::NC);
-    // Calculate values after LowLatency transformation
-    CreatePureTensorIteratorModel();
-    if (regular_api) {
-        cnnNetwork = InferenceEngine::CNNNetwork{function};
-        InferenceEngine::LowLatency(cnnNetwork);
-        ConfigureNetwork();
-        executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
-    } else {
-        function->validate_nodes_and_infer_types();
-        // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
-        ngraph::pass::Manager manager;
-        manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
-        manager.run_passes(function);
-        LoadNetwork();
-    }
     IE_SUPPRESS_DEPRECATED_START
     auto states = executableNetwork.QueryState();
     for (auto& state : states) {
@@ -465,14 +420,73 @@ void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
         }
     }
     IE_SUPPRESS_DEPRECATED_END
+}
+
+void MultipleLSTMCellTest::ApplyLowLatency() {
+    // Calculate values after LowLatency transformation
+    CreatePureTensorIteratorModel();
+    if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
+        function->validate_nodes_and_infer_types();
+        // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
+        pass::Manager manager;
+        NGRAPH_SUPPRESS_DEPRECATED_START
+        manager.register_pass<ngraph::pass::LowLatency>();
+        NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
+        manager.run_passes(function);
+        bool ti_found = helpers::is_tensor_iterator_exist(function);
+        EXPECT_EQ(ti_found, true);
+        LoadNetwork();
+    } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
+        function->validate_nodes_and_infer_types();
+        // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
+
+        pass::Manager manager;
+        manager.register_pass<pass::LowLatency2>();
+        manager.run_passes(function);
+        bool ti_found = helpers::is_tensor_iterator_exist(function);
+        EXPECT_EQ(ti_found, false);
+        LoadNetwork();
+    } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
+        cnnNetwork = InferenceEngine::CNNNetwork{function};
+        IE_SUPPRESS_DEPRECATED_START
+        InferenceEngine::LowLatency(cnnNetwork);
+        IE_SUPPRESS_DEPRECATED_END
+
+        bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
+        EXPECT_EQ(ti_found, true);
+
+        ConfigureNetwork();
+        executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
+    } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
+        cnnNetwork = InferenceEngine::CNNNetwork{function};
+        InferenceEngine::lowLatency2(cnnNetwork);
+
+        bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
+        EXPECT_EQ(ti_found, false);
+
+        ConfigureNetwork();
+        executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
+    }
+}
+
+void MultipleLSTMCellTest::Run() {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
+        ApplyLowLatency();
+    } else {
+        LoadNetwork();
+    }
+
+    InitMemory();
     GenerateInputs();
     Infer();
 
-    // Calculate ref values for Unrolled TI
-    CreatePureTensorIteratorModel();
-    ngraph::pass::Manager manager_2;
-    manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
-    manager_2.run_passes(function);
+    // Calculate ref values
+    if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
+        switchToNgraphFriendlyModel();
+    } else {
+        CreatePureTensorIteratorModel();
+    }
     Validate();
 }
 }  // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
index de015677b4a..de2dbab0612 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@@ -214,6 +214,15 @@ enum class SequenceTestsMode {
     CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
 };
 
+enum class MemoryTransformation {
+    NONE,
+    LOW_LATENCY,
+    LOW_LATENCY_REGULAR_API,
+    LOW_LATENCY_V2,
+    LOW_LATENCY_V2_REGULAR_API,
+    LOW_LATENCY_V2_ORIGINAL_INIT
+};
+
 std::ostream &operator<<(std::ostream &os, const ReductionType &m);
 std::ostream &operator<<(std::ostream &os, const PadMode &m);
 
@@ -297,5 +306,7 @@ std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
 
 std::ostream& operator<<(std::ostream & os, SequenceTestsMode type);
 
+std::ostream& operator<<(std::ostream & os, MemoryTransformation type);
+
 }  // namespace helpers
 }  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
index 5de50203ba2..2c5a07540b0 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -817,5 +817,32 @@ std::ostream& operator<<(std::ostream & os, SequenceTestsMode type) {
     }
     return os;
 }
+
+std::ostream& operator<<(std::ostream & os, MemoryTransformation type) {
+    switch (type) {
+        case MemoryTransformation::NONE:
+            os << "NONE";
+            break;
+        case MemoryTransformation::LOW_LATENCY_V2:
+            os << "LOW_LATENCY_V2";
+            break;
+        case MemoryTransformation::LOW_LATENCY:
+            os << "LOW_LATENCY";
+            break;
+        case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API:
+            os << "LOW_LATENCY_V2_REGULAR_API";
+            break;
+        case MemoryTransformation::LOW_LATENCY_REGULAR_API:
+            os << "LOW_LATENCY_REGULAR_API";
+            break;
+        case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT:
+            os << "LOW_LATENCY_V2_ORIGINAL_INIT";
+            break;
+        default:
+            throw std::runtime_error("NOT_SUPPORTED_TYPE");
+    }
+    return os;
+}
+
 }  // namespace helpers
 }  // namespace ngraph
diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py
index a363a1ca250..1df5f6cb7a0 100644
--- a/model-optimizer/mo/back/offline_transformations.py
+++ b/model-optimizer/mo/back/offline_transformations.py
@@ -11,7 +11,7 @@ def get_available_transformations():
     try:
         from openvino.offline_transformations import ApplyLowLatencyTransformation  # pylint: disable=import-error,no-name-in-module
         return {
-            'LowLatency': ApplyLowLatencyTransformation,
+            'LowLatency2': ApplyLowLatencyTransformation,
         }
     except Exception as e:
         return {}
diff --git a/model-optimizer/mo/utils/cli_parser.py b/model-optimizer/mo/utils/cli_parser.py
index 21d2873b54e..e6b2f2d2517 100644
--- a/model-optimizer/mo/utils/cli_parser.py
+++ b/model-optimizer/mo/utils/cli_parser.py
@@ -8,6 +8,7 @@ import os
 import re
 from collections import OrderedDict
 from itertools import zip_longest
+from distutils.util import strtobool
 
 import numpy as np
 
@@ -257,9 +258,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                               help='Apply additional transformations. ' +
                                    'Usage: "--transform transformation_name1[args],transformation_name2..." ' +
                                    'where [args] is key=value pairs separated by semicolon. ' +
-                                   'Examples: "--transform LowLatency" or ' +
-                                   '          "--transform LowLatency[num_iterations=2]" ' +
-                                   'Available transformations: "LowLatency"',
+                                   'Examples: "--transform LowLatency2" or ' +
+                                   '          "--transform LowLatency2[use_const_initializer=False]" ' +
+                                   'Available transformations: "LowLatency2"',
                               default="")
     common_group.add_argument('--disable_fusing',
                               help='Turn off fusing of linear operations to Convolution',
@@ -1151,6 +1152,14 @@ def isfloat(value):
         return False
 
 
+def isbool(value):
+    try:
+        strtobool(value)
+        return True
+    except ValueError:
+        return False
+
+
 def convert_string_to_real_type(value: str):
     values = value.split(',')
     for i in range(len(values)):
@@ -1159,6 +1168,8 @@ def convert_string_to_real_type(value: str):
             values[i] = int(value)
         elif isfloat(value):
             values[i] = float(value)
+        elif isbool(value):
+            values[i] = strtobool(value)
 
     return values[0] if len(values) == 1 else values
 
diff --git a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py
index 68a9994d206..c6c1c96e46a 100644
--- a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py
+++ b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py
@@ -905,64 +905,65 @@ class TransformChecker(unittest.TestCase):
         self.assertEqual(parse_transform(""), [])
 
     def test_single_pass(self):
-        self.assertEqual(parse_transform("LowLatency"), [("LowLatency", {})])
+        self.assertEqual(parse_transform("LowLatency2"), [("LowLatency2", {})])
 
     def test_single_pass_with_args(self):
-        self.assertEqual(parse_transform("LowLatency[num_iterations=2]"),
-                         [("LowLatency", {"num_iterations": 2})])
+        self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True]"),
+                         [("LowLatency2", {"use_const_initializer": True})])
 
     def test_single_pass_with_multiple_args(self):
-        self.assertEqual(parse_transform("LowLatency[num_iterations=2;dummy_attr=3.14]"),
-                         [("LowLatency", {"num_iterations": 2, "dummy_attr": 3.14})])
+        self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True;dummy_attr=3.14]"),
+                         [("LowLatency2", {"use_const_initializer": True, "dummy_attr": 3.14})])
 
     def test_multiple_passes_with_args(self):
-        self.assertEqual(parse_transform("LowLatency[num_iterations=2],DummyPass[type=ReLU]"),
-                         [("LowLatency", {"num_iterations": 2}),
+        self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True],DummyPass[type=ReLU]"),
+                         [("LowLatency2", {"use_const_initializer": True}),
                           ("DummyPass", {"type": "ReLU"})])
 
     def test_multiple_passes_with_args2(self):
-        self.assertEqual(parse_transform("LowLatency[num_iterations=2,3,4.15],DummyPass1,DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
-                         [("LowLatency",  {"num_iterations": [2,3,4.15]}),
+        self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True,False],DummyPass1,"
+                                         "DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
+                         [("LowLatency2",  {"use_const_initializer": [True, False]}),
                           ("DummyPass1",  {}),
                           ("DummyPass2",  {"types": ["ReLU", "PReLU"], "values": [1,2,3]})])
 
     def test_multiple_passes_no_args(self):
-        self.assertEqual(parse_transform("DummyPass,LowLatency2"),
-                         [("DummyPass", {}), ("LowLatency2", {})])
+        self.assertEqual(parse_transform("DummyPass,LowLatency22"),
+                         [("DummyPass", {}), ("LowLatency22", {})])
 
     def test_single_pass_neg(self):
-        self.assertRaises(Error, parse_transform, "LowLatency!")
+        self.assertRaises(Error, parse_transform, "LowLatency2!")
 
     def test_multiple_passes_neg(self):
-        self.assertRaises(Error, parse_transform, "LowLatency;DummyPass")
+        self.assertRaises(Error, parse_transform, "LowLatency2;DummyPass")
 
     def test_single_pass_with_args_neg1(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[=2]")
+        self.assertRaises(Error, parse_transform, "LowLatency2[=2]")
 
     def test_single_pass_with_args_neg2(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[key=]")
+        self.assertRaises(Error, parse_transform, "LowLatency2[key=]")
 
     def test_single_pass_with_args_neg3(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[]")
+        self.assertRaises(Error, parse_transform, "LowLatency2[]")
 
     def test_single_pass_with_args_neg4(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[key=value;]")
+        self.assertRaises(Error, parse_transform, "LowLatency2[key=value;]")
 
     def test_single_pass_with_args_neg5(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[value]")
+        self.assertRaises(Error, parse_transform, "LowLatency2[value]")
 
     def test_single_pass_with_args_neg6(self):
-        self.assertRaises(Error, parse_transform, "LowLatency[key=value")
+        self.assertRaises(Error, parse_transform, "LowLatency2[key=value")
 
     @patch("mo.back.offline_transformations.get_available_transformations")
     def test_check_low_latency_is_available(self, available_transformations):
-        available_transformations.return_value = {"LowLatency": None}
+        available_transformations.return_value = {"LowLatency2": None}
         try:
-            check_available_transforms([("LowLatency" ,"")], True)
+            check_available_transforms([("LowLatency2", "")], True)
         except Error as e:
             self.assertTrue(False, "Exception \"{}\" is unexpected".format(e))
 
     @patch("mo.back.offline_transformations.get_available_transformations")
     def test_check_dummy_pass_is_available(self, available_transformations):
-        available_transformations.return_value = {"LowLatency": None}
+        available_transformations.return_value = {"LowLatency2": None}
         self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True)
diff --git a/ngraph/core/include/ngraph/pass/low_latency.hpp b/ngraph/core/include/ngraph/pass/low_latency.hpp
index 86757edb800..507ffe3a21b 100644
--- a/ngraph/core/include/ngraph/pass/low_latency.hpp
+++ b/ngraph/core/include/ngraph/pass/low_latency.hpp
@@ -8,13 +8,14 @@
 #include <vector>
 
 #include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pass/pass.hpp>
 
 namespace ngraph
 {
     namespace pass
     {
         /**
-         * @brief The transformation finds all TensorIterator layers in the network,
+         * @brief The transformation finds all TensorIterator/Loop layers in the network,
          * processes all back edges that describe a connection between Result and Parameter
          * of the TensorIterator body,and inserts ReadValue layer between Parameter
          * and the next layers after this Parameter, and Assign layer after the layers
@@ -42,11 +43,50 @@ namespace ngraph
          * by step, the states will store between inferences.
          */
 
-        class NGRAPH_API LowLatency : public ngraph::pass::MatcherPass
+        class NGRAPH_DEPRECATED("Use LowLatency2 instead.") NGRAPH_API LowLatency
+            : public ngraph::pass::MatcherPass
         {
         public:
             NGRAPH_RTTI_DECLARATION;
             LowLatency();
         };
+
+        /**
+         * @brief The transformation finds all TensorIterator/Loop layers in the network,
+         * processes all back edges that describe a connection between Result and Parameter
+         * of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
+         * input and output corresponding to this back edge.
+         * Supported platforms: CPU, GNA.
+         *
+         * The example below describes the changes made by the transformation
+         *  [] - TensorIterator body
+         *  () - new layer
+         *  BE - back-edge
+         *
+         *  before applying the transformation:
+         *  -> input1[BE_1 -> Parameter -> Layers ... -> Result  -> BE_1 ]output1->
+         *
+         *  after applying the transformation:
+         *  ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
+         *                                                                      \
+         *                                                                       ->...
+         * After applying the transformation, the resulting network can be inferred
+         * step by step, the states will store between inferences.
+         */
+        class NGRAPH_API LowLatency2 : public ngraph::pass::FunctionPass
+        {
+        public:
+            NGRAPH_RTTI_DECLARATION;
+
+            explicit LowLatency2(bool use_const_initializer = true)
+                : m_use_const_initializer(use_const_initializer)
+            {
+            }
+
+            bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+
+        private:
+            bool m_use_const_initializer;
+        };
     } // namespace pass
 } // namespace ngraph
diff --git a/ngraph/core/src/op/tensor_iterator.cpp b/ngraph/core/src/op/tensor_iterator.cpp
index 5252d2124f2..35162ef3ceb 100644
--- a/ngraph/core/src/op/tensor_iterator.cpp
+++ b/ngraph/core/src/op/tensor_iterator.cpp
@@ -129,7 +129,6 @@ void op::v0::TensorIterator::validate_and_infer_types()
                 m_body->get_results().at(merged_input_description->m_body_value_index)->input(0);
             ends.push_back(body_value.get_node()->shared_from_this());
 
-            auto body_value_partial_shape = body_value.get_partial_shape();
             auto body_parameter =
                 m_body->get_parameters().at(merged_input_description->m_body_parameter_index);
 
diff --git a/ngraph/core/src/pass/low_latency.cpp b/ngraph/core/src/pass/low_latency.cpp
index ea5effce4ab..d290eb14b7e 100644
--- a/ngraph/core/src/pass/low_latency.cpp
+++ b/ngraph/core/src/pass/low_latency.cpp
@@ -6,12 +6,29 @@
 
 #include <memory>
 
+#include <ngraph/log.hpp>
 #include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset7.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
 #include <ngraph/variant.hpp>
 
+NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency2, "LowLatency2", 0);
+
+NGRAPH_SUPPRESS_DEPRECATED_START
 NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
 
+using namespace std;
+using namespace ngraph;
+
+namespace
+{
+    string generate_variable_name(const string& op_name, const string& param_name, int variable_idx)
+    {
+        return op_name + "/" + param_name + "/" + "variable_" + to_string(variable_idx);
+    }
+
+} // namespace
 ngraph::pass::LowLatency::LowLatency()
 {
     auto tensor_iterator = ngraph::pattern::wrap_type<opset6::TensorIterator, opset6::Loop>();
@@ -58,11 +75,12 @@ ngraph::pass::LowLatency::LowLatency()
                 const auto& inputs_to = func->get_parameters()
                                             .at(merged_in->m_body_parameter_index)
                                             ->get_output_target_inputs(0);
-                const std::string variable_name(sub_graph_op->get_friendly_name() + "/" +
-                                                func->get_parameters()
-                                                    .at(merged_in->m_body_parameter_index)
-                                                    ->get_friendly_name() +
-                                                "/variable_" + std::to_string(variable_id));
+                const std::string variable_name(
+                    generate_variable_name(sub_graph_op->get_friendly_name(),
+                                           func->get_parameters()
+                                               .at(merged_in->m_body_parameter_index)
+                                               ->get_friendly_name(),
+                                           variable_id));
                 auto variable = std::make_shared<Variable>(
                     VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
                 auto read_value = std::make_shared<opset6::ReadValue>(
@@ -90,3 +108,178 @@ ngraph::pass::LowLatency::LowLatency()
     auto m = std::make_shared<ngraph::pattern::Matcher>(tensor_iterator, "LowLatency");
     register_matcher(m, callback);
 }
+NGRAPH_SUPPRESS_DEPRECATED_END
+
+void UnrollSingleIteration(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
+                           const shared_ptr<Function>& outer_f)
+{
+    using namespace opset7;
+
+    const auto& params = sub_graph_op->get_function()->get_parameters();
+    const auto& results = sub_graph_op->get_function()->get_results();
+
+    // before: Layer1 -> TI [input -> bodyParameter -> Layer2 -> ...]
+    // after:  Layer1 -> Layer2 ->...
+    for (const auto& in : sub_graph_op->get_input_descriptions())
+    {
+        const auto& connect_to = sub_graph_op->get_input_source_output(in->m_input_index);
+        for (auto& output : params.at(in->m_body_parameter_index)->outputs())
+        {
+            output.replace(connect_to);
+        }
+    }
+
+    // before: TI [...-> Layer1 -> Result -> output] -> Layer2 -> ...
+    // after:  ...-> Layer1 -> Layer2 -> ...
+    NodeVector new_ops;
+    for (const auto& out : sub_graph_op->get_output_descriptions())
+    {
+        const auto& connect_to = results.at(out->m_body_value_index)->get_input_source_output(0);
+        for (auto& input_to : sub_graph_op->output(out->m_output_index).get_target_inputs())
+        {
+            // create IE output name
+            std::string out_name = sub_graph_op->get_friendly_name();
+            if (sub_graph_op->get_output_size() != 1)
+                out_name += "." + std::to_string(out->m_output_index);
+
+            // IECompatibility: insert identity (Unsqueeze + Squeeze) to store the TensorIterator
+            // output names
+            auto axis_1 = Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+            auto identity_1 = std::make_shared<Unsqueeze>(connect_to, axis_1);
+            auto identity_2 = std::make_shared<Squeeze>(identity_1, axis_1);
+            identity_2->set_friendly_name(out_name);
+            new_ops.push_back(identity_1);
+            new_ops.push_back(identity_2);
+
+            input_to.replace_source_output(identity_2);
+        }
+    }
+    outer_f->add_sinks(sub_graph_op->get_function()->get_sinks());
+    ngraph::copy_runtime_info(sub_graph_op, sub_graph_op->get_function()->get_ops());
+    ngraph::copy_runtime_info(sub_graph_op, new_ops);
+}
+
+Output<Node> create_init_subgraph(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
+                                  const Output<Node>& in_node)
+{
+    using namespace opset7;
+
+    auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
+    auto shape_of = make_shared<ShapeOf>(in_node);
+    auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
+    copy_runtime_info(sub_graph_op, {const_zero, shape_of, broadcast});
+    return broadcast->output(0);
+}
+
+bool pass::LowLatency2::run_on_function(shared_ptr<Function> f)
+{
+    using namespace opset7;
+
+    SinkVector assigns;
+    for (const auto& op : f->get_ordered_ops())
+    {
+        if (const auto& sub_graph_op = dynamic_pointer_cast<op::util::SubGraphOp>(op))
+        {
+            int64_t variable_id = 0;
+            const auto& func = sub_graph_op->get_function();
+            const auto& params = func->get_parameters();
+            for (const auto& in : sub_graph_op->get_input_descriptions())
+            {
+                // Process all back edges
+                if (const auto& merged_in =
+                        dynamic_pointer_cast<op::util::SubGraphOp::MergedInputDescription>(in))
+                {
+                    // create new Variable
+                    const string& param_name =
+                        params.at(merged_in->m_body_parameter_index)->get_friendly_name();
+                    const string& var_name = generate_variable_name(
+                        sub_graph_op->get_friendly_name(), param_name, variable_id);
+
+                    const auto& input = sub_graph_op->input(merged_in->m_input_index);
+                    if (std::dynamic_pointer_cast<op::ReadValueBase>(
+                            input.get_source_output().get_node_shared_ptr()) != nullptr)
+                    {
+                        NGRAPH_DEBUG
+                            << "LowLatency2 transformation cannot be applied because the "
+                            << "ReadValue node is already an input to the TensorIterator."
+                            << "LowLatency2 transformation may have already been applied, please "
+                            << "do not call it more then once.";
+                        return false;
+                    }
+
+                    const auto& param = sub_graph_op->get_function()->get_parameters().at(
+                        merged_in->m_body_parameter_index);
+                    for (const auto& in_to : param->output(0).get_target_inputs())
+                    {
+                        if (dynamic_cast<op::ReadValueBase*>(in_to.get_node()) != nullptr)
+                        {
+                            NGRAPH_DEBUG
+                                << "LowLatency2 transformation cannot be applied because the "
+                                << "ReadValue node is already inside the TensorIterator. "
+                                << "LowLatency transformation may have been applied, please do "
+                                << "not call LowLatency2 after LowLatency.";
+                            return false;
+                        }
+                    }
+
+                    VariableInfo var_info{PartialShape::dynamic(), element::dynamic, var_name};
+                    auto variable = make_shared<Variable>(var_info);
+
+                    // insert ReadValue
+                    // Layers -> [new op: ReadValue] -> Subgraph operation
+                    Output<Node> read_value_in = input.get_source_output();
+                    if (m_use_const_initializer)
+                    {
+                        read_value_in = create_init_subgraph(sub_graph_op, read_value_in);
+                    }
+                    auto read_value = make_shared<ReadValue>(read_value_in, variable);
+                    input.replace_source_output(read_value->output(0));
+                    read_value->set_friendly_name(var_name);
+                    ngraph::copy_runtime_info(sub_graph_op, read_value);
+
+                    /* insert Assign
+                    // Subgraph operation -> [new op: Assign]
+                    //                    \
+                    //                      ---> Layers -> ...
+                    */
+                    const auto& out_desc = sub_graph_op->get_output_descriptions();
+                    bool is_output_exist = std::any_of(
+                        out_desc.begin(),
+                        out_desc.end(),
+                        [&merged_in](
+                            const std::shared_ptr<op::util::SubGraphOp::OutputDescription>& out) {
+                            return out->m_body_value_index == merged_in->m_body_value_index;
+                        });
+                    // Create new output if it doesn't exist.
+                    if (!is_output_exist)
+                    {
+                        sub_graph_op->get_iter_value(
+                            func->get_results().at(merged_in->m_body_value_index));
+                    }
+                    for (const auto& out : sub_graph_op->get_output_descriptions())
+                    {
+                        if (out->m_body_value_index == merged_in->m_body_value_index)
+                        {
+                            auto assign = make_shared<Assign>(
+                                sub_graph_op->output(out->m_output_index), variable);
+                            ngraph::copy_runtime_info(sub_graph_op, assign);
+                            // control dependency so that ReadValue is processed before Assign
+                            assign->add_control_dependency(read_value);
+                            assigns.emplace_back(assign);
+                            break;
+                        }
+                    }
+                }
+
+                variable_id++;
+            }
+
+            if (sub_graph_op->get_num_iterations() == 1)
+            {
+                UnrollSingleIteration(sub_graph_op, f);
+            }
+        }
+    }
+    f->add_sinks(assigns);
+    return true;
+}

From 6a18b45337bd64542155416415d53e46627d31b9 Mon Sep 17 00:00:00 2001
From: Yegor Kruglov <yegor.kruglov@intel.com>
Date: Mon, 7 Jun 2021 16:44:16 +0300
Subject: [PATCH 02/41] [MO] Implementation of names uniqueness check (#5651)

* added new transformation to check the uniqueness of nodes names

* added unittest

* remove redundant line

* conversation resolving

* updated unittest

* added new unittest, added check for uniqueness of new node name

* added a description

* added renaming of several results with the same name and unittest for this case

* another implementation, updated unittests

* added a comment

* updated comments

* added comment to the nodes_with_equal_names func

* added a condition

* added a result name check in unittests
---
 model-optimizer/automation/package_BOM.txt    |  1 +
 .../extensions/back/names_uniqueness_check.py | 67 +++++++++++++++++
 .../back/names_uniqueness_check_test.py       | 71 +++++++++++++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 model-optimizer/extensions/back/names_uniqueness_check.py
 create mode 100644 model-optimizer/unit_tests/extensions/back/names_uniqueness_check_test.py

diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index 128c9c98117..be0dc1c37e0 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -37,6 +37,7 @@ extensions/back/LRNToNorm.py
 extensions/back/MarkNodesWithShapeValues.py
 extensions/back/MatMulNormalizer.py
 extensions/back/MaxPool.py
+extensions/back/names_uniqueness_check.py
 extensions/back/NormalizeToNormalizeL2.py
 extensions/back/op_versioning.py
 extensions/back/OptimizeTransposeReshapeSequence.py
diff --git a/model-optimizer/extensions/back/names_uniqueness_check.py b/model-optimizer/extensions/back/names_uniqueness_check.py
new file mode 100644
index 00000000000..188df9f4427
--- /dev/null
+++ b/model-optimizer/extensions/back/names_uniqueness_check.py
@@ -0,0 +1,67 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+from collections import defaultdict
+from extensions.back.pass_separator import BackFinish
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph, rename_node
+
+
+def nodes_with_equal_names(graph: Graph):
+    """
+    :param graph: Graph to operate on
+    :return: Dictionary with node names as keys and a list of their corresponding nodes as values
+    """
+    names_dict = defaultdict(list)
+    for node in graph.get_op_nodes():
+        node_name = node.soft_get('name', node.id)
+        names_dict[node_name].append(node)
+    return names_dict
+
+
+def make_node_names_unique(nodes: list, node_names: set):
+    """
+    :param nodes: List with nodes matching a specific name
+    :param node_names: Set with all node names contained in the graph
+    :return: None
+
+    Result nodes will be renamed only when it is absolutely necessary(if there are several Result nodes with the same name).
+    Function finds a position of Result nodes in the "nodes" list, take the first and rename all other nodes.
+    If the "nodes" list does not contain Result nodes, then all nodes starting from the second one will be renamed.
+    All new names are added to the "node_names" set.
+    """
+    results_pos = [idx for idx, node in enumerate(nodes) if node.op == 'Result']
+    node_position_to_keep = 0
+    if len(results_pos) != 0:
+        node_position_to_keep = results_pos[0]
+    for idx, node in enumerate(nodes):
+        if idx != node_position_to_keep:
+            new_node_name = node.soft_get('name', node.id) + '_' + str(idx)
+            # preparing a new unique name for the node
+            while new_node_name in node_names:
+                new_node_name += '_' + str(idx)
+            node_names.add(new_node_name)
+            rename_node(node, new_node_name)
+
+
+class NamesUniquenessCheck(BackReplacementPattern):
+    """
+    If there are several layers with the same name in the original model and they are saved in the IR, IE will fail with
+    the invalid IR error. IE checks the uniqueness of the names and, if it is not true, throws an exception. The way how
+    to fix it on the MO side is to rename this nodes (one node will remain with the original name). Since we prefer to
+    save framework names for the output nodes, nodes with op=Result will not be renamed, except the case when there are
+    several Result nodes with the same name.
+    """
+    enabled = True
+
+    def run_after(self):
+        return [BackFinish]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        names_to_nodes = nodes_with_equal_names(graph)
+        node_names = set(names_to_nodes.keys())
+        for nodes in names_to_nodes.values():
+            if len(nodes) > 1:
+                make_node_names_unique(nodes, node_names)
diff --git a/model-optimizer/unit_tests/extensions/back/names_uniqueness_check_test.py b/model-optimizer/unit_tests/extensions/back/names_uniqueness_check_test.py
new file mode 100644
index 00000000000..94f83af1a1b
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/back/names_uniqueness_check_test.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from extensions.back.names_uniqueness_check import NamesUniquenessCheck
+from mo.graph.graph import Node
+from unit_tests.utils.graph import build_graph
+
+
+class TestNamesUniquenessCheck(unittest.TestCase):
+
+    def test_1(self):
+        graph = build_graph(
+            nodes_attrs={
+                'input': {'kind': 'op', 'op': 'Parameter', 'name': 'node'},
+                'cast': {'kind': 'op', 'op': 'Cast', 'name': 'node'},
+                'result': {'kind': 'op', 'op': 'Result', 'name': 'node'}
+            },
+            edges=[
+                ('input', 'cast'),
+                ('cast', 'result')
+            ]
+        )
+
+        NamesUniquenessCheck().find_and_replace_pattern(graph)
+        names = [node.name for node in graph.get_op_nodes()]
+        result_name = Node(graph, 'result').name
+
+        self.assertTrue(len(set(names)) == 3)
+        self.assertTrue(result_name == 'node')
+
+    def test_2(self):
+        graph = build_graph(
+            nodes_attrs={
+                'input': {'kind': 'op', 'op': 'Parameter', 'name': 'node'},
+                'cast': {'kind': 'op', 'op': 'Cast', 'name': 'node_0'},
+                'result': {'kind': 'op', 'op': 'Result', 'name': 'node'}
+            },
+            edges=[
+                ('input', 'cast'),
+                ('cast', 'result')
+            ]
+        )
+
+        NamesUniquenessCheck().find_and_replace_pattern(graph)
+        names = [node.name for node in graph.get_op_nodes()]
+        result_name = Node(graph, 'result').name
+
+        self.assertTrue(len(set(names)) == 3)
+        self.assertTrue(result_name == 'node')
+
+    def test_3(self):
+        graph = build_graph(
+            nodes_attrs={
+                'input': {'kind': 'op', 'op': 'Parameter', 'name': 'node_0'},
+                'cast': {'kind': 'op', 'op': 'Cast', 'name': 'node_1'},
+                'result_1': {'kind': 'op', 'op': 'Result', 'name': 'node'},
+                'result_2': {'kind': 'op', 'op': 'Result', 'name': 'node'}
+            },
+            edges=[
+                ('input', 'cast'),
+                ('cast', 'result_1'),
+                ('cast', 'result_2'),
+            ]
+        )
+        NamesUniquenessCheck().find_and_replace_pattern(graph)
+        names = [node.name for node in graph.get_op_nodes()]
+
+        self.assertTrue('node' in names)
+        self.assertTrue(len(set(names)) == 4)

From c80e939474e965437d68c082e2d13e8cb3f36dca Mon Sep 17 00:00:00 2001
From: Alexander Zhogov <alexander.zhogov@intel.com>
Date: Mon, 7 Jun 2021 17:53:06 +0300
Subject: [PATCH 03/41] GitHub CI: non PR author commits are allowed (#6064)

---
 .github/org_control/check_pr.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/org_control/check_pr.py b/.github/org_control/check_pr.py
index 882854724ca..e0b48832ead 100644
--- a/.github/org_control/check_pr.py
+++ b/.github/org_control/check_pr.py
@@ -139,14 +139,15 @@ def update_labels(gh_api, pull, non_org_intel_pr_users, non_org_pr_users):
 
 def get_wrong_commits(pull):
     """Returns commits with incorrect user and email"""
-    print("GitHub PR user email:", pull.user.email)
+    pr_author_email = pull.user.email.lower()
+    print("GitHub PR author email:", pr_author_email)
     print("Check commits:")
     wrong_commits = set()
     for commit in pull.get_commits():
         # import pprint; pprint.pprint(commit.raw_data)
         print("Commit SHA:", commit.sha)
         # Use raw data because commit author can be non GitHub user
-        commit_email = commit.raw_data["commit"]["author"]["email"]
+        commit_email = commit.raw_data["commit"]["author"]["email"].lower()
         print("    Commit email:", commit_email)
         if not github_api.is_valid_user(commit.author):
             print(
@@ -159,9 +160,8 @@ def get_wrong_commits(pull):
                 "    WARNING: The commit is not verified. Reason:",
                 commit.raw_data["commit"]["verification"]["reason"],
             )
-            if pull.user.email != commit_email:
-                print("    ERROR: Commit email and GitHub user public email are differnt")
-                wrong_commits.add(commit.sha)
+            if pr_author_email != commit_email:
+                print("    WARNING: Commit email and GitHub PR author public email are differnt")
     return wrong_commits
 
 

From 56ada41bd05f47ceeddd86f1e99f61858e26a2ba Mon Sep 17 00:00:00 2001
From: Mikhail Treskin <mikhail.treskin@intel.com>
Date: Mon, 7 Jun 2021 18:19:52 +0300
Subject: [PATCH 04/41] Add get_constant_from_source capability to pyngraph
 (#6018)

---
 ngraph/python/src/pyngraph/util.cpp           | 19 +++++++++++++
 ngraph/python/tests/test_ngraph/test_utils.py | 28 +++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 ngraph/python/tests/test_ngraph/test_utils.py

diff --git a/ngraph/python/src/pyngraph/util.cpp b/ngraph/python/src/pyngraph/util.cpp
index a2eac625bab..5178e84fe90 100644
--- a/ngraph/python/src/pyngraph/util.cpp
+++ b/ngraph/python/src/pyngraph/util.cpp
@@ -5,6 +5,7 @@
 #include <pybind11/numpy.h>
 
 #include "pyngraph/util.hpp"
+#include "ngraph/validation_util.hpp"
 
 namespace py = pybind11;
 
@@ -18,4 +19,22 @@ void regmodule_pyngraph_util(py::module m)
 {
     py::module mod = m.def_submodule("util", "ngraph.impl.util");
     mod.def("numpy_to_c", &numpy_to_c);
+    mod.def("get_constant_from_source",
+            &ngraph::get_constant_from_source,
+            py::arg("output"),
+            R"(
+                    Runs an estimation of source tensor.
+
+                    Parameters
+                    ----------
+                    output : Output
+                        output node
+
+                    Returns
+                    ----------
+                    get_constant_from_source : Constant or None
+                        If it succeeded to calculate both bounds and
+                        they are the same returns Constant operation
+                        from the resulting bound, otherwise Null.
+                )");
 }
diff --git a/ngraph/python/tests/test_ngraph/test_utils.py b/ngraph/python/tests/test_ngraph/test_utils.py
new file mode 100644
index 00000000000..49b90017305
--- /dev/null
+++ b/ngraph/python/tests/test_ngraph/test_utils.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import ngraph as ng
+from ngraph.impl import Shape
+
+
+def test_get_constant_from_source_success():
+    dtype = np.int
+    input1 = ng.parameter(Shape([5, 5]), dtype=dtype, name="input_1")
+    input2 = ng.parameter(Shape([25]), dtype=dtype, name="input_2")
+    shape_of = ng.shape_of(input2, name="shape_of")
+    reshape = ng.reshape(input1, shape_of, special_zero=True)
+    folded_const = ng.impl.util.get_constant_from_source(reshape.input(1).get_source_output())
+
+    assert folded_const is not None
+    assert folded_const.get_vector() == [25]
+
+
+def test_get_constant_from_source_failed():
+    dtype = np.int
+    input1 = ng.parameter(Shape([5, 5]), dtype=dtype, name="input_1")
+    input2 = ng.parameter(Shape([1]), dtype=dtype, name="input_2")
+    reshape = ng.reshape(input1, input2, special_zero=True)
+    folded_const = ng.impl.util.get_constant_from_source(reshape.input(1).get_source_output())
+
+    assert folded_const is None

From 64d7a40ae4d783eda53a34d8201b928ebf4e3479 Mon Sep 17 00:00:00 2001
From: Andrei Gorbachev <andrei.gorbachev@intel.com>
Date: Mon, 7 Jun 2021 18:36:38 +0300
Subject: [PATCH 05/41] [IE CLDNN] 54304 fix reduce ops (#5986)

---
 .../src/cldnn_engine/ops/reduce.cpp           | 25 ++++++-
 .../subgraph_tests/reduce_eltwise.cpp         | 47 +++++++++++++
 .../include/subgraph_tests/reduce_eltwise.hpp | 15 +++++
 .../subgraph/reduce_eltwise.hpp               | 36 ++++++++++
 .../src/subgraph/reduce_eltwise.cpp           | 67 +++++++++++++++++++
 5 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/reduce_eltwise.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/reduce_eltwise.hpp
 create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reduce_eltwise.hpp
 create mode 100644 inference-engine/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp

diff --git a/inference-engine/src/cldnn_engine/ops/reduce.cpp b/inference-engine/src/cldnn_engine/ops/reduce.cpp
index 3331d581b86..26343ffb813 100644
--- a/inference-engine/src/cldnn_engine/ops/reduce.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reduce.cpp
@@ -18,6 +18,7 @@
 
 #include "api/reduce.hpp"
 #include "api/reorder.hpp"
+#include "api/reshape.hpp"
 
 namespace CLDNNPlugin {
 
@@ -78,6 +79,28 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
 
     p.AddPrimitive(reducePrim);
 
+    auto resultLayerName = layerName;
+    auto out_dims = op->get_output_shape(0).size();
+    if (out_dims == 3 && !keep_dims && rank >= 4) {
+        resultLayerName = layerName + "_reshape";
+        auto out_shape = op->get_output_shape(0);
+        cldnn::tensor outTensor;
+        switch (rank) {
+            case 6:
+                outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
+                                          1, TensorValue(out_shape[2]), 1, 1);
+            case 5:
+                outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
+                                          1, TensorValue(out_shape[2]), 1);
+            case 4:
+                outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
+                                          1, TensorValue(out_shape[2]));
+        }
+        auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor);
+        p.AddPrimitive(reshape_prim);
+        p.AddPrimitiveToProfiler(op, resultLayerName);
+    }
+
     auto reorderLayerName = layerName + "_reorder";
     cldnn::format out_format = cldnn::format::any;
     auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
@@ -89,7 +112,7 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
         else if (rank - rawAxes.size() <= 4)
             out_format = cldnn::format::bfyx;
 
-        auto reorder_prim = cldnn::reorder(reorderLayerName, layerName, out_format, out_dt);
+        auto reorder_prim = cldnn::reorder(reorderLayerName, resultLayerName, out_format, out_dt);
         p.AddPrimitive(reorder_prim);
         p.AddPrimitiveToProfiler(op, reorderLayerName);
     } else {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/reduce_eltwise.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/reduce_eltwise.cpp
new file mode 100644
index 00000000000..04dbc6e8005
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/reduce_eltwise.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/reduce_eltwise.hpp"
+
+using namespace SubgraphTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_ReduceEltwise6D, ReduceEltwiseTest,
+                        testing::Combine(
+                                testing::Values(std::vector<size_t>{2, 3, 4, 5, 6, 7}),
+                                testing::Values(std::vector<int>{2, 3, 4}),
+                                testing::Values(CommonTestUtils::OpType::VECTOR),
+                                testing::Values(false),
+                                testing::ValuesIn(netPrecisions),
+                                testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ReduceEltwiseTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_ReduceEltwise5D, ReduceEltwiseTest,
+                        testing::Combine(
+                                testing::Values(std::vector<size_t>{2, 3, 4, 5, 6}),
+                                testing::Values(std::vector<int>{2, 3}),
+                                testing::Values(CommonTestUtils::OpType::VECTOR),
+                                testing::Values(false),
+                                testing::ValuesIn(netPrecisions),
+                                testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ReduceEltwiseTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_ReduceEltwise4D, ReduceEltwiseTest,
+                        testing::Combine(
+                                testing::Values(std::vector<size_t>{2, 3, 4, 5}),
+                                testing::Values(std::vector<int>{2}),
+                                testing::Values(CommonTestUtils::OpType::VECTOR),
+                                testing::Values(false),
+                                testing::ValuesIn(netPrecisions),
+                                testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        ReduceEltwiseTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/reduce_eltwise.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/reduce_eltwise.hpp
new file mode 100644
index 00000000000..0b65a847820
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/reduce_eltwise.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/subgraph/reduce_eltwise.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+TEST_P(ReduceEltwiseTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reduce_eltwise.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reduce_eltwise.hpp
new file mode 100644
index 00000000000..df04891bf90
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/reduce_eltwise.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+using ReduceEltwiseParamsTuple = typename std::tuple<
+        std::vector<size_t>,              // Input shapes
+        std::vector<int>,                 // Axis to reduce order
+        CommonTestUtils::OpType,          // Scalar or vector type axis
+        bool,                             // Keep dims
+        InferenceEngine::Precision,       // Network precision
+        std::string>;                     // Device name
+
+class ReduceEltwiseTest:
+        public testing::WithParamInterface<ReduceEltwiseParamsTuple>,
+        public LayerTestsUtils::LayerTestsCommon{
+public:
+    std::shared_ptr<ngraph::Function> fn;
+    static std::string getTestCaseName(const testing::TestParamInfo<ReduceEltwiseParamsTuple> &obj);
+protected:
+    void SetUp() override;
+};
+
+}  // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp
new file mode 100644
index 00000000000..b51ca490090
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/reduce_eltwise.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/subgraph/reduce_eltwise.hpp"
+
+namespace SubgraphTestsDefinitions {
+std::string ReduceEltwiseTest::getTestCaseName(const testing::TestParamInfo<ReduceEltwiseParamsTuple> &obj) {
+    std::vector<size_t> inputShapes;
+    std::vector<int> axes;
+    CommonTestUtils::OpType opType;
+    bool keepDims;
+    InferenceEngine::Precision netPrecision;
+    std::string targetName;
+    std::tie(inputShapes, axes, opType, keepDims, netPrecision, targetName) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "axes=" << CommonTestUtils::vec2str(axes) << "_";
+    result << "opType=" << opType << "_";
+    if (keepDims) result << "KeepDims_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetName;
+    return result.str();
+}
+
+void ReduceEltwiseTest::SetUp() {
+    std::vector<size_t> inputShape;
+    std::vector<int> axes;
+    CommonTestUtils::OpType opType;
+    bool keepDims;
+    InferenceEngine::Precision netPrecision;
+    std::string targetName;
+    std::tie(inputShape, axes, opType, keepDims, netPrecision, targetName) = this->GetParam();
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    std::vector<size_t> shapeAxes;
+    switch (opType) {
+        case CommonTestUtils::OpType::SCALAR: {
+            if (axes.size() > 1)
+                FAIL() << "In reduce op if op type is scalar, 'axis' input's must contain 1 element";
+            break;
+        }
+        case CommonTestUtils::OpType::VECTOR: {
+            shapeAxes.push_back(axes.size());
+            break;
+        }
+        default:
+            FAIL() << "Reduce op doesn't support operation type: " << opType;
+    }
+    auto reductionAxesNode = std::dynamic_pointer_cast<ngraph::Node>(
+                             std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i64, ngraph::Shape(shapeAxes), axes));
+
+    auto reduce = std::make_shared<ngraph::opset3::ReduceSum>(paramOuts[0], reductionAxesNode, keepDims);
+
+    std::vector<size_t> constShape(reduce.get()->get_output_size(), 1);
+    constShape[2] = inputShape.back();
+    auto constant = ngraph::builder::makeConstant<float>(ngPrc, constShape, {}, true);
+    auto eltw = ngraph::builder::makeEltwise(reduce, constant, ngraph::helpers::EltwiseTypes::MULTIPLY);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(eltw)};
+    function = std::make_shared<ngraph::Function>(results, params, "ReduceEltwise");
+}
+} // namespace SubgraphTestsDefinitions

From 6a42b47c2f4bba88456fe39b8b203e6a33211293 Mon Sep 17 00:00:00 2001
From: Jozef Daniecki <jozef.daniecki@intel.com>
Date: Mon, 7 Jun 2021 19:12:57 +0200
Subject: [PATCH 06/41] RegionYolo operation specification refactoring. (#5926)

* RegionYolo spec refactored against explicit type indication.

* Improve readability.
---
 docs/ops/detection/RegionYolo_1.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/ops/detection/RegionYolo_1.md b/docs/ops/detection/RegionYolo_1.md
index c4eece6ff8b..ebcc79c2377 100644
--- a/docs/ops/detection/RegionYolo_1.md
+++ b/docs/ops/detection/RegionYolo_1.md
@@ -6,7 +6,7 @@
 
 **Short description**: *RegionYolo* computes the coordinates of regions with probability for each class.
 
-**Detailed description**: This operation is directly mapped to the original YOLO layer. [Reference](https://arxiv.org/pdf/1612.08242.pdf)
+**Detailed description**: This operation is directly mapped to the [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf) paper. 
 
 **Attributes**:
 
@@ -78,14 +78,17 @@
 
 **Inputs**:
 
-*   **1**: `data` - 4D input tensor with floating point elements and shape `[N, C, H, W]`. Required.
+*   **1**: `data` - 4D tensor of type `T` and shape `[N, C, H, W]`. **Required.**
 
 **Outputs**:
 
-*   **1**: output tensor of rank 4 or less that codes detected regions. Refer to the original YOLO paper to decode the output as boxes. `anchors` should be used to decode real box coordinates. If `do_softmax` is set to 0, then the output shape is `[N, (classes + coords + 1)*len(mask), H, W]`. If `do_softmax` is set to 1, then output shape is partially flattened and defined in the following way:
+*   **1**: tensor of type `T` and rank 4 or less that codes detected regions. Refer to the [YOLO9000: Better, Faster, Stronger](https://arxiv.org/pdf/1612.08242.pdf) paper to decode the output as boxes. `anchors` should be used to decode real box coordinates. If `do_softmax` is set to `0`, then the output shape is `[N, (classes + coords + 1) * len(mask), H, W]`. If `do_softmax` is set to `1`, then output shape is partially flattened and defined in the following way:
 
-    flat_dim = data.shape[axis] * data.shape[axis+1] * ... * data.shape[end_axis]
-    output.shape = [data.shape[0], ..., data.shape[axis-1], flat_dim, data.shape[end_axis + 1], ...]
+    `flat_dim = data.shape[axis] * data.shape[axis+1] * ... * data.shape[end_axis]`  
+    `output.shape = [data.shape[0], ..., data.shape[axis-1], flat_dim, data.shape[end_axis + 1], ...]`
+
+**Types**
+* *T*: any supported floating point type.
 
 **Example**
 

From 4d9fe14ec62641e7ab480d08cd70be0151aecd62 Mon Sep 17 00:00:00 2001
From: Yegor Kruglov <yegor.kruglov@intel.com>
Date: Mon, 7 Jun 2021 20:22:26 +0300
Subject: [PATCH 07/41] [MO] ConvolutionWithGroupResolver update to enable TF
 DepthwiseConv2dNative with in_channels=1  (#5528)

* changed permutation attribute in conv extractor

* changed conv get_group parameter

* implemented a transformation

* updated BOM

* specified transformation for in_channels 1

* added unittest and comment string

* updated convolution normalizer to convert depthwise convolution with group=1 to group convolution

* renamed function

* updated IR reader

* conversations resolving

* condition change
---
 .../extensions/back/ConvolutionNormalizer.py  | 106 ++++++++----------
 .../mo/utils/ir_reader/layer_to_class.py      |   7 ++
 .../back/ConvolutionNormalizer_test.py        |  72 +++++++++++-
 3 files changed, 123 insertions(+), 62 deletions(-)

diff --git a/model-optimizer/extensions/back/ConvolutionNormalizer.py b/model-optimizer/extensions/back/ConvolutionNormalizer.py
index 04da65631c5..0abfbec0e20 100644
--- a/model-optimizer/extensions/back/ConvolutionNormalizer.py
+++ b/model-optimizer/extensions/back/ConvolutionNormalizer.py
@@ -8,10 +8,43 @@ from extensions.back.ReverseInputChannels import ApplyReverseChannels
 from mo.back.replacement import BackReplacementPattern
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.tf.graph_utils import create_op_node_with_second_input, create_op_with_const_inputs
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, Node
 from mo.ops.const import Const
 from mo.ops.reshape import Reshape
 from mo.ops.strided_slice import StridedSlice
+from mo.utils.error import Error
+
+
+def resolve_convolution_with_group(node: Node, group: int, ir_version: str):
+    input_shape = node.in_port(0).data.get_shape()
+    assert len(input_shape) in [3, 4, 5]
+
+    weights_shape = node.in_port(1).data.get_shape()
+    assert weights_shape is not None
+    assert len(weights_shape) in [3, 4, 5]
+    assert weights_shape[0] % group == 0
+
+    assert int64_array(node.output).ndim == 0
+    if ir_version == 'V7':
+        if weights_shape[0] == node.output:
+            # weights are already is in [G*O I X Y] format
+            return
+        new_shape = int64_array([node.output, -1, *weights_shape[2:]])
+
+    elif ir_version == 'V10':
+        I = input_shape[1]
+        new_shape = int64_array([group, node.output / group, I / group, *weights_shape[2:]])
+        assert np.prod(weights_shape) == np.prod(new_shape), \
+            'Initial weights shape {}, grouped weights shape {}'.format(weights_shape, new_shape)
+        del node['group']
+        node['type'] = 'GroupConvolution'
+    else:
+        raise Error("Unknown IR version: {}".format(ir_version))
+
+    reshape = create_op_node_with_second_input(node.graph, Reshape, int64_array(new_shape),
+                                               {'override_output_shape': True})
+
+    node.in_port(1).get_connection().insert_node(reshape)
 
 
 class ConvolutionNormalizer(BackReplacementPattern):
@@ -37,33 +70,12 @@ class V7ConvolutionWithGroupsResolver(BackReplacementPattern):
     """
     enabled = False
 
-    @staticmethod
-    def pattern():
-        return dict(
-            nodes=[
-                ('node', dict(type='Convolution', group=lambda g: g is not None and g != 1))
-            ],
-            edges=[]
-        )
-
-    def replace_pattern(self, graph: Graph, match: dict):
-        node = match['node']
-
-        group = node.group
-        assert group > 1
-
-        weights_shape = node.in_port(1).data.get_shape()
-        assert weights_shape is not None
-        assert weights_shape[0] % group == 0
-
-        if weights_shape[0] == node.output:
-            # weights are already is in [G*O I X Y] format
-            return
-
-        new_shape = int64_array([node.output, -1, *weights_shape[2:]])
-        reshape = create_op_node_with_second_input(graph, Reshape, int64_array(new_shape),
-                                                   {'override_output_shape': True})
-        node.in_port(1).get_connection().insert_node(reshape)
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes(type='Convolution'):
+            group = node.soft_get('group', None)
+            if group is not None:
+                if group != 1 or node.soft_get('op') == 'DepthwiseConv2dNative':
+                    resolve_convolution_with_group(node, group, ir_version='V7')
 
 
 class V10ConvolutionWithGroupsResolver(BackReplacementPattern):
@@ -73,38 +85,12 @@ class V10ConvolutionWithGroupsResolver(BackReplacementPattern):
     """
     enabled = False
 
-    @staticmethod
-    def pattern():
-        return dict(
-            nodes=[
-                ('node', dict(type='Convolution', group=lambda g: g is not None and g != 1))
-            ],
-            edges=[]
-        )
-
-    def replace_pattern(self, graph: Graph, match: dict):
-        node = match['node']
-
-        group = node.group
-        assert group > 1
-
-        weights_shape = node.in_port(1).data.get_shape()
-        assert weights_shape is not None
-        assert weights_shape[0] % group == 0
-        I = node.in_port(0).data.get_shape()[1]
-
-        new_shape = int64_array([group, node.output / group, I / group, *weights_shape[2:]])
-
-        assert np.prod(weights_shape) == np.prod(new_shape), \
-            'Initial weights shape {}, grouped weights shape {}'.format(weights_shape, new_shape)
-
-        del node['group']
-        node['type'] = 'GroupConvolution'
-
-        reshape = create_op_node_with_second_input(graph, Reshape, int64_array(new_shape),
-                                                   {'override_output_shape': True})
-
-        node.in_port(1).get_connection().insert_node(reshape)
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes(type='Convolution'):
+            group = node.soft_get('group', None)
+            if group is not None:
+                if group != 1 or node.soft_get('op') == 'DepthwiseConv2dNative':
+                    resolve_convolution_with_group(node, group, ir_version='V10')
 
 
 class ConvolutionWithGroupsResolver(BackReplacementPattern):
diff --git a/model-optimizer/mo/utils/ir_reader/layer_to_class.py b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
index d1afed6918d..f9f5fd9fb1d 100644
--- a/model-optimizer/mo/utils/ir_reader/layer_to_class.py
+++ b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
@@ -195,6 +195,13 @@ def groupconv_to_conv(op: Node):
             'Weight shape and calculated shape mismatch in GroupConv node {}.'.format(op.name)
     # we need to set this attrs for correct shape infer as convolution
     op['group'] = group
+    # The only way GroupConvolution with 'group' = 1 appears in IR is by converting from TF DepthwiseConv2dNative.
+    # In this case we need to specify 'op' parameter for the
+    # extensions.back.ConvolutionNormalizer.ConvolutionWithGroupsResolver to work properly.
+    # Otherwise  there will be 'Convolution' instead 'GroupConvolution' in restored IR, since 'GroupConvolution' is
+    # extended as node with 'type' = 'Convolution' by IR reader
+    if group == 1:
+        op['op'] = 'DepthwiseConv2dNative'
     op.type = 'Convolution'
 
 
diff --git a/model-optimizer/unit_tests/extensions/back/ConvolutionNormalizer_test.py b/model-optimizer/unit_tests/extensions/back/ConvolutionNormalizer_test.py
index d7a2f4809fc..47a7fc9ac28 100644
--- a/model-optimizer/unit_tests/extensions/back/ConvolutionNormalizer_test.py
+++ b/model-optimizer/unit_tests/extensions/back/ConvolutionNormalizer_test.py
@@ -102,7 +102,7 @@ class TestPullReshapeThroughFQ(unittest.TestCase):
 class TestV7ConvolutionWithGroupsResolver(unittest.TestCase):
     def test_v7_group_convolution_resolver(self):
         nodes = {
-            **regular_op_with_shaped_data('input', None, {'type': 'Parameter'}),
+            **regular_op_with_shaped_data('input', [1, 3, 224, 224], {'type': 'Parameter'}),
 
             **valued_const_with_data('weights', np.ones([3, 8, 7, 7])),
 
@@ -133,7 +133,7 @@ class TestV7ConvolutionWithGroupsResolver(unittest.TestCase):
 
     def test_v7_group_convolution_resolver_weight_are_in_the_right_layout(self):
         nodes = {
-            **regular_op_with_shaped_data('input', None, {'type': 'Parameter'}),
+            **regular_op_with_shaped_data('input', [1, 3, 224, 224], {'type': 'Parameter'}),
             **valued_const_with_data('weights', np.ones([24, 1, 7, 7])),
             **regular_op_with_shaped_data('convolution', None, {'type': 'Convolution', 'group': 3, 'output': 24}),
             **result(),
@@ -149,6 +149,38 @@ class TestV7ConvolutionWithGroupsResolver(unittest.TestCase):
         (flag, resp) = compare_graphs(graph, graph_ref, last_node='output', check_op_attrs=True)
         self.assertTrue(flag, resp)
 
+    def test_v7_group_convolution_resolver_depthwise_conv2d(self):
+        nodes = {
+            **regular_op_with_shaped_data('input', [1, 1, 224, 224], {'type': 'Parameter'}),
+
+            **valued_const_with_data('weights', np.ones([1, 8, 7, 7])),
+
+            **valued_const_with_data('dim', int64_array([8, -1, 7, 7])),
+            **regular_op_with_empty_data('reshape', {'type': 'Reshape'}),
+
+            **regular_op_with_shaped_data('convolution', None, {'type': 'Convolution', 'group': 1, 'output': 8,
+                                                                'op': 'DepthwiseConv2dNative'}),
+
+            **result(),
+        }
+        graph = build_graph(nodes, [
+            *connect('input', '0:convolution'),
+            *connect('weights', '1:convolution'),
+            *connect('convolution', 'output'),
+        ], nodes_with_edges_only=True)
+
+        V7ConvolutionWithGroupsResolver().find_and_replace_pattern(graph)
+        graph_ref = build_graph(nodes, [
+            *connect('input', '0:convolution'),
+            *connect('weights', '0:reshape'),
+            *connect('dim', '1:reshape'),
+            *connect('reshape', '1:convolution'),
+            *connect('convolution', 'output'),
+        ], nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
 
 class TestV10ConvolutionWithGroupsResolver(unittest.TestCase):
     def test_v10_group_convolution_resolver(self):
@@ -185,3 +217,39 @@ class TestV10ConvolutionWithGroupsResolver(unittest.TestCase):
 
         (flag, resp) = compare_graphs(graph, graph_ref, last_node='output', check_op_attrs=True)
         self.assertTrue(flag, resp)
+
+    def test_v10_group_convolution_resolver_depthwise_conv2d(self):
+        nodes = {
+            **regular_op_with_shaped_data('input', [1, 1, 224, 224], {'type': 'Parameter'}),
+
+            **valued_const_with_data('weights', np.ones([1, 8, 7, 7])),
+
+            **valued_const_with_data('dim', int64_array([1, 8, 1, 7, 7])),
+            **regular_op_with_empty_data('reshape', {'type': 'Reshape'}),
+
+            **regular_op_with_shaped_data('convolution', None, {'type': 'Convolution', 'group': 1, 'output': 8,
+                                                                'op': 'DepthwiseConv2dNative'}),
+
+            **result(),
+        }
+        graph = build_graph(nodes, [
+            *connect('input', '0:convolution'),
+            *connect('weights', '1:convolution'),
+            *connect('convolution', 'output'),
+        ], nodes_with_edges_only=True)
+
+        V10ConvolutionWithGroupsResolver().find_and_replace_pattern(graph)
+
+        nodes['convolution']['type'] = 'GroupConvolution'
+        del nodes['convolution']['group']
+
+        graph_ref = build_graph(nodes, [
+            *connect('input', '0:convolution'),
+            *connect('weights', '0:reshape'),
+            *connect('dim', '1:reshape'),
+            *connect('reshape', '1:convolution'),
+            *connect('convolution', 'output'),
+        ], nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='output', check_op_attrs=True)
+        self.assertTrue(flag, resp)

From bc7f61be241544c14d82d2b66c83174bf5a13281 Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Tue, 8 Jun 2021 05:51:41 +0200
Subject: [PATCH 08/41] PRelu reference implementation and ReshapePRelu
 transformation alignment (#5915)

* Apply  ReshapePRelu transformation only to const slope input

* Remove xfail from onnx backend prelu_broadcast test

* Fix and add Prelu SLT

* Update PRelu mkldnn transformation to extend broadcast support

* Fix and update PRelu reference implementation

* ONNX Prelu tests

* Add prelu backend tests

* Update ie tests manifest

* Comments clean up

* Fix STL Fill leakyslope blob

* Code refactor

* Unify layer tests slope input values generation
---
 .../ngraph_transformations/reshape_prelu.cpp  |  12 +-
 .../single_layer_tests/activation.cpp         |  28 +-
 .../skip_tests_config.cpp                     |   2 -
 .../single_layer/activation.hpp               |   4 +-
 .../src/single_layer/activation.cpp           |  92 +-
 .../ngraph/runtime/reference/prelu.hpp        |  17 +-
 ngraph/python/tests/__init__.py               |   2 -
 ngraph/python/tests/test_onnx/test_backend.py |   5 +-
 ngraph/test/CMakeLists.txt                    |   1 +
 ngraph/test/backend/fused_op.in.cpp           |  49 -
 ngraph/test/backend/prelu.in.cpp              | 942 ++++++++++++++++++
 ngraph/test/models/onnx/prelu_1d.prototxt     |  72 ++
 .../test/models/onnx/prelu_batch_nd.prototxt  |  81 ++
 ngraph/test/models/onnx/prelu_c_1_1.prototxt  |  78 ++
 ngraph/test/onnx/onnx_import.in.cpp           | 125 ++-
 ngraph/test/runtime/ie/unit_test.manifest     |   3 -
 16 files changed, 1362 insertions(+), 151 deletions(-)
 create mode 100644 ngraph/test/backend/prelu.in.cpp
 create mode 100644 ngraph/test/models/onnx/prelu_1d.prototxt
 create mode 100644 ngraph/test/models/onnx/prelu_batch_nd.prototxt
 create mode 100644 ngraph/test/models/onnx/prelu_c_1_1.prototxt

diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
index 0cc1a33cbc3..69fd75ea57a 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
@@ -20,8 +20,16 @@ MKLDNNPlugin::ReshapePRelu::ReshapePRelu() {
         if (!prelu || ngraph::shape_size(prelu->get_input_shape(1)) == 1 || prelu->get_input_shape(1).size() != 1) {
             return false;
         }
-        ngraph::Shape new_shape(prelu->input_value(0).get_shape().size(), 1);
-        new_shape[new_shape.size() > 1 ? 1 : 0] = prelu->input_value(1).get_shape()[0];
+        const auto prelu_shape = prelu->input_value(0).get_shape();
+        const auto slope_shape = prelu->input_value(1).get_shape();
+        ngraph::Shape new_shape(prelu_shape.size(), 1);
+        const auto slope_dim = slope_shape[0];
+        const auto channel_dim_idx = prelu_shape.size() > 1 ? 1 : 0;
+        if (slope_dim != prelu_shape[channel_dim_idx]) {
+            return false;
+        }
+        new_shape[channel_dim_idx] = slope_dim;
+
         auto slope = ngraph::op::util::reshapeTo(prelu->input_value(1), new_shape);
         auto new_prelu = std::make_shared<ngraph::opset1::PRelu>(prelu->input(0).get_source_output(), slope);
         new_prelu->set_friendly_name(prelu->get_friendly_name());
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
index 76607e55f76..6e762ed562c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -69,8 +69,8 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTy
 };
 
 const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
-    {PReLu, {{-0.01f}}},
-    {LeakyRelu, {{0.01f}}}
+        {PReLu, {{}}}, // Slope will be filled with increasing values from -10 to match slope input shape
+        {LeakyRelu, {{0.01f}}}
 };
 
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
@@ -81,16 +81,12 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> preluBasic = {
         {{1, 50}, {{1}, {50}}},
         {{1, 128}, {{1}, {128}}},
-        {{20, 128}, {{128}}},
-        {{1, 20, 128}, {{1}, {20}}},
-        {{1, 20, 128, 128}, {{1}, {20}}},
-        {{1, 20, 20, 128, 128}, {{1}, {20}}}
-        // according to spec second input for PRelu must be 1D and must be broadcastabe per channel
-        // at this moment these cases unsupported
-        // {{20, 128}, {{20}, {20, 128}}},
-        // {{1, 20, 128}, {{128}, {20, 128}}},
-        // {{1, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}}},
-        // {{1, 20, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}, {20, 20, 128, 128}}},
+
+        // Broadcast check
+        {{3, 2}, {{1}, {2}, {3, 2}}},
+        {{3, 2, 5}, {{1}, {2}, {5}, {2, 5}, {3, 1, 5}, {1, 2, 1}, {1, 1, 5}, {3, 1, 1}, {3, 2, 5}}},
+        {{2, 1, 2}, {{2}, {2, 1, 1}}},
+        {{3, 2, 5, 7}, {{1}, {7}, {2}, {5, 7}, {2, 5, 7}, {2, 1, 1}, {1, 2, 1, 1}, {3, 2, 1, 1}, {3, 2, 5, 7}}},
 };
 
 const auto basicCases = ::testing::Combine(
@@ -127,11 +123,9 @@ const auto basicIntegerOperations = ::testing::Combine(
 );
 
 INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
-INSTANTIATE_TEST_CASE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basicIntegerOperations, ActivationLayerTest::getTestCaseName);
-INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
-
-INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
-
 INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationDynamicLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Integer_Activation_Basic, ActivationLayerTest, basicIntegerOperations, ActivationLayerTest::getTestCaseName);
 
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu_Const, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu_Param, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index f59f8d954e0..b3134e9953e 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -35,8 +35,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ReluShapeOfSubgraphTest.*)",
         // TODO: Issue: 34805
         R"(.*ActivationLayerTest.*Ceiling.*)",
-        // TODO: Issue: 32032
-        R"(.*ActivationParamLayerTest.*)",
         // TODO: Issue: 43314
         R"(.*Broadcast.*mode=BIDIRECTIONAL.*inNPrec=BOOL.*)",
         // TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
index bdf996cd141..684942ee184 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
@@ -96,13 +96,11 @@ protected:
 };
 
 class ActivationParamLayerTest : public ActivationLayerTest {
-public:
-    void Infer() override;
-
 protected:
     void SetUp() override;
 
 private:
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
     void generateActivationBlob(std::vector<float> constantsValue);
     ngraph::ParameterVector createActivationParams(
         ngraph::element::Type ngPrc, std::vector<size_t> inShape = {});
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
index 3136c604e7d..5b90cfc2079 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
@@ -41,6 +41,13 @@ void ActivationLayerTest::SetUp() {
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
     auto params = ngraph::builder::makeParams(ngPrc, {shapes.first});
     params[0]->set_friendly_name("Input");
+
+    if (activationType == ngraph::helpers::ActivationTypes::PReLu && constantsValue.empty()) {
+        const auto elemnts_count = ngraph::shape_size(shapes.second);
+        constantsValue.resize(elemnts_count);
+        std::iota(constantsValue.begin(), constantsValue.end(), -10);
+    }
+
     auto activation = ngraph::builder::makeActivation(params[0], ngPrc, activationType, shapes.second, constantsValue);
 
     function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params);
@@ -163,70 +170,26 @@ ngraph::ParameterVector ActivationParamLayerTest::createActivationParams(ngraph:
     }
 }
 
-void ActivationParamLayerTest::generateActivationBlob(std::vector<float> constantsValue) {
-    switch (activationType) {
-        case ngraph::helpers::ActivationTypes::PReLu: {
-            auto blobNegativeSlope = inferRequest.GetBlob("negativeSlope");
-            float negativeSlope = constantsValue[0];
-            blobNegativeSlope = FuncTestUtils::createAndFillBlobWithFloatArray(blobNegativeSlope->getTensorDesc(), &negativeSlope, 1);
-            inferRequest.SetBlob("negativeSlope", blobNegativeSlope);
-            inputs.push_back(blobNegativeSlope);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::LeakyRelu: {
-            auto blobLeakySlope = inferRequest.GetBlob("leakySlope");
-            float leakySlope = constantsValue[0];
-            blobLeakySlope = FuncTestUtils::createAndFillBlobWithFloatArray(blobLeakySlope->getTensorDesc(), &leakySlope, 1);
-            inferRequest.SetBlob("leakySlope", blobLeakySlope);
-            inputs.push_back(blobLeakySlope);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::HardSigmoid: {
-            auto blobHardSigmoidAlpha = inferRequest.GetBlob("alpha");
-            auto blobHardSigmoidBeta = inferRequest.GetBlob("beta");
-            float alpha = constantsValue[0], beta = constantsValue[1];
-            blobHardSigmoidAlpha = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidAlpha->getTensorDesc(), &alpha, 1);
-            blobHardSigmoidBeta = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidBeta->getTensorDesc(), &beta, 1);
-            inferRequest.SetBlob("alpha", blobHardSigmoidAlpha);
-            inferRequest.SetBlob("beta", blobHardSigmoidBeta);
-            inputs.push_back(blobHardSigmoidAlpha);
-            inputs.push_back(blobHardSigmoidBeta);
-            break;
-        }
-        case ngraph::helpers::ActivationTypes::Selu: {
-            auto blobHardSigmoidAlpha = inferRequest.GetBlob("alpha");
-            auto blobHardSigmoidLambda = inferRequest.GetBlob("lambda");
-            float alpha = constantsValue[0], lambda = constantsValue[1];
-            blobHardSigmoidAlpha = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidAlpha->getTensorDesc(), &alpha, 1);
-            blobHardSigmoidLambda = FuncTestUtils::createAndFillBlobWithFloatArray(blobHardSigmoidLambda->getTensorDesc(), &lambda, 1);
-            inferRequest.SetBlob("alpha", blobHardSigmoidAlpha);
-            inferRequest.SetBlob("lambda", blobHardSigmoidLambda);
-            inputs.push_back(blobHardSigmoidAlpha);
-            inputs.push_back(blobHardSigmoidLambda);
-            break;
-        }
-        default:
-            IE_THROW() << "Unsupported activation type for Params test type";
+InferenceEngine::Blob::Ptr ActivationParamLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    InferenceEngine::Blob::Ptr blobPtr;
+    const std::string& name = info.name();
+    if (name == "negativeSlope") {
+        const auto elemnts_count = ngraph::shape_size(function->get_parameters()[1]->get_shape());
+        std::vector<float> param_data(elemnts_count);
+        std::iota(param_data.begin(), param_data.end(), -10);
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &param_data[0], elemnts_count);
+    } else if (name == "leakySlope") {
+        const auto elemnts_count = ngraph::shape_size(function->get_parameters()[1]->get_shape());
+        std::vector<float> param_data(elemnts_count, constantsValue[0]);
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &param_data[0], elemnts_count);
+    } else if (name == "alpha") {
+         blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &constantsValue[0], 1);
+    } else if (name == "beta" || name == "lambda") {
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), &constantsValue[1], 1);
+    } else {
+        blobPtr = FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 20, -10, 1);
     }
-}
-
-void ActivationParamLayerTest::Infer() {
-    inferRequest = executableNetwork.CreateInferRequest();
-
-    auto blobInput = inferRequest.GetBlob("Input");
-    blobInput = FuncTestUtils::createAndFillBlobFloat(blobInput->getTensorDesc());
-    inferRequest.SetBlob("Input", blobInput);
-    inputs.push_back(blobInput);
-
-    generateActivationBlob(constantsValue);
-
-    if (configuration.count(InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED) &&
-        configuration.count(InferenceEngine::PluginConfigParams::YES)) {
-        auto batchSize = executableNetwork.GetInputsInfo().begin()->second->getTensorDesc().getDims()[0] / 2;
-        inferRequest.SetBatch(batchSize);
-    }
-
-    inferRequest.Infer();
+    return blobPtr;
 }
 
 void ActivationParamLayerTest::SetUp() {
@@ -245,7 +208,8 @@ void ActivationParamLayerTest::SetUp() {
     params.insert(params.end(), activationParams.begin(), activationParams.end());
 
     auto activation = ngraph::builder::makeActivation(params, ngPrc, activationType);
-    function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(activation)};
+    function = std::make_shared<ngraph::Function>(results, params);
 }
 
 void ActivationDynamicLayerTest::Run() {
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
index 34a420399ac..d546bbd79af 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/prelu.hpp
@@ -24,12 +24,21 @@ namespace ngraph
                        const Shape& arg_shape,
                        const Shape& slope_shape)
             {
-                int cnt = 0;
-                for (size_t i = 0; i < shape_size(arg_shape); ++i)
+                Shape slope_shape_tmp = slope_shape;
+                const auto channel_dim_idx = arg_shape.size() > 1 ? 1 : 0;
+                if (slope_shape.size() == 1 && arg_shape[channel_dim_idx] == slope_shape[0])
                 {
-                    out[i] =
-                        arg[i] < T(0) ? T(arg[i] * slope[cnt++ % shape_size(slope_shape)]) : arg[i];
+                    Shape channel_slope_shape(arg_shape.size(), 1);
+                    channel_slope_shape[channel_dim_idx] = slope_shape[0];
+                    std::swap(slope_shape_tmp, channel_slope_shape);
                 }
+                autobroadcast_binop(arg,
+                                    slope,
+                                    out,
+                                    arg_shape,
+                                    slope_shape_tmp,
+                                    ngraph::op::AutoBroadcastType::NUMPY,
+                                    [](T x, T y) -> T { return x < T(0) ? T(x * y) : x; });
             }
         } // namespace reference
     }     // namespace runtime
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 930be465a63..c179f312f1e 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -110,8 +110,6 @@ xfail_issue_46762 = xfail_test(reason="Incorrect result of Minimum op if uint da
 xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64")
 xfail_issue_47337 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::OneHot")
 xfail_issue_33593 = xfail_test(reason="Current implementation of MaxPool doesn't support indices output")
-xfail_issue_51993 = xfail_test(reason="PRelu supports only 1D tensor for 'slope' input broadcasted"
-                                      "by channel")
 xfail_issue_55760 = xfail_test(reason="RuntimeError: Reversed axis have axes above the source space shape")
 
 # Model MSFT issues:
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index d411919e977..a8ee1cbb065 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -59,7 +59,6 @@ from tests import (BACKEND_NAME,
                    xfail_issue_49753,
                    xfail_issue_49754,
                    xfail_issue_52463,
-                   xfail_issue_51993,
                    xfail_issue_55760)
 
 
@@ -369,9 +368,7 @@ tests_expected_to_fail = [
      "OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
     (xfail_issue_33593,
      "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu",
-     "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",),
-    (xfail_issue_51993,
-     "OnnxBackendNodeModelTest.test_prelu_broadcast_cpu",)
+     "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",)
 ]
 
 for test_group in tests_expected_to_fail:
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 52148150e81..ba3c6a9d052 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -399,6 +399,7 @@ set(MULTI_TEST_SRC
     backend/pad.in.cpp
     backend/parameter_as_output.in.cpp
     backend/power.in.cpp
+    backend/prelu.in.cpp
     backend/proposal.in.cpp
     backend/psroi_pooling.in.cpp
     backend/range.in.cpp
diff --git a/ngraph/test/backend/fused_op.in.cpp b/ngraph/test/backend/fused_op.in.cpp
index 74117a7fb9f..4b6bb15cb51 100644
--- a/ngraph/test/backend/fused_op.in.cpp
+++ b/ngraph/test/backend/fused_op.in.cpp
@@ -41,22 +41,6 @@ static string s_manifest = "${MANIFEST}";
 
 using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 
-NGRAPH_TEST(${BACKEND_NAME}, prelu)
-{
-    Shape shape{3, 2};
-    Shape rshape{3};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{0, 0.5, 1};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{0, 3, -1, 1, -1, 0});
-    test_case.run();
-}
 
 NGRAPH_TEST(${BACKEND_NAME}, hardsigmoid)
 {
@@ -94,39 +78,6 @@ NGRAPH_TEST(${BACKEND_NAME}, hardsigmoid)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, prelu_shared_slope)
-{
-    Shape shape{3, 2};
-    Shape rshape{};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{0.5};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{-1, 3, -1, 1, -0.5, 0});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope)
-{
-    Shape shape{3, 2};
-    Shape rshape{};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, rshape);
-    auto prelu = make_shared<op::PRelu>(A, B);
-    auto f = make_shared<Function>(NodeVector{prelu}, ParameterVector{A, B});
-    std::vector<float> a{-2, 3, -2, 1, -1, 0};
-    std::vector<float> b{-0.5};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_multiple_inputs<float>({a, b});
-    test_case.add_expected_output<float>(vector<float>{1, 3, 1, 1, 0.5, 0});
-    test_case.run();
-}
 
 NGRAPH_TEST(${BACKEND_NAME}, space_to_depth_block_first)
 {
diff --git a/ngraph/test/backend/prelu.in.cpp b/ngraph/test/backend/prelu.in.cpp
new file mode 100644
index 00000000000..8523aeb39c3
--- /dev/null
+++ b/ngraph/test/backend/prelu.in.cpp
@@ -0,0 +1,942 @@
+// Co pyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+static string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_1_param)
+{
+    Shape shape_a{6};
+    Shape shape_slope{1};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2};
+    std::vector<float> out{1, 2, -6, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_1_const)
+{
+    Shape shape_a{6};
+    Shape shape_slope{1};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2};
+    std::vector<float> out{1, 2, -6, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_param)
+{
+    Shape shape_a{6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 3, 4, 5, 6, 7};
+    std::vector<float> out{1, 2, -12, -20, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_6_const)
+{
+    Shape shape_a{6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 3, 4, 5, 6, 7};
+    std::vector<float> out{1, 2, -12, -20, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_2_W_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{0, 1};
+    std::vector<float> out{0, 3, 0, 1, 0, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_2_W_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{0, 1};
+    std::vector<float> out{0, 3, 0, 1, 0, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_6_W_param)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         7, 8, -9, -10, 11, 12};
+    std::vector<float> slope{1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, 5, 6,
+                            7, 8, -27, -40, 11, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_6_W_const)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         7, 8, -9, -10, 11, 12};
+    std::vector<float> slope{1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, 5, 6,
+                            7, 8, -27, -40, 11, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_C_2_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-1, -1, -1, -1, -1, -1};
+    std::vector<float> slope{2, 0.5};
+    std::vector<float> out{-2, -0.5, -2, -0.5, -2, -0.5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_C_2_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-1, -1, -1, -1, -1, -1};
+    std::vector<float> slope{2, 0.5};
+    std::vector<float> out{-2, -0.5, -2, -0.5, -2, -0.5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_equal_dims_slope_param)
+{
+    Shape shape_a{2, 2, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-0.5, -2, -3, -4, -5, -6, -7, -8};
+    std::vector<float> slope{-0.5, -2};
+    // std::vector<float> out{0.25, 4, 1.5, 8, 2.5, 12, 3.5, 16}; // broadcast (1, 1, 2)
+    std::vector<float> out{0.25, 1, 6, 8, 2.5, 3, 14, 16}; // broadcast (1, 2, 1)
+    // std::vector<float> out{0.25, 1, 1.5, 2, 10, 12, 14, 16}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_equal_dims_slope_const)
+{
+    Shape shape_a{2, 2, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-0.5, -2, -3, -4, -5, -6, -7, -8};
+    std::vector<float> slope{-0.5, -2};
+    // std::vector<float> out{0.25, 4, 1.5, 8, 2.5, 12, 3.5, 16}; // broadcast (1, 1, 2)
+    std::vector<float> out{0.25, 1, 6, 8, 2.5, 3, 14, 16}; // broadcast (1, 2, 1)
+    // std::vector<float> out{0.25, 1, 1.5, 2, 10, 12, 14, 16}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope_param)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{-0.5, -1};
+    std::vector<float> out{1, 3, 1, 1, 0.5, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_negative_slope_const)
+{
+    Shape shape_a{3, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-2, 3, -2, 1, -1, 0};
+    std::vector<float> slope{-0.5, -1};
+    std::vector<float> out{1, 3, 1, 1, 0.5, 0};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_same_shape_param)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{2, 6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 2, 2, 2, 2, 2,
+                         1, 1, 4, 2, 1, 1};
+    std::vector<float> out{1, 2, -6, -8, 5, 6, 1, 2, -12, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_2d_same_shape_const)
+{
+    Shape shape_a{2, 6};
+    Shape shape_slope{2, 6};
+
+    std::vector<float> a{1, 2, -3, -4, 5, 6,
+                         1, 2, -3, -4, 5, 6};
+    std::vector<float> slope{2, 2, 2, 2, 2, 2,
+                         1, 1, 4, 2, 1, 1};
+    std::vector<float> out{1, 2, -6, -8, 5, 6, 1, 2, -12, -8, 5, 6};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_W_slope_param)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_W_slope_const)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_slope_1_C_1_W_param)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{1, 2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_slope_1_C_1_W_const)
+{
+    Shape shape_a{2, 2, 2, 2};
+    Shape shape_slope{1, 2, 1, 2};
+
+    std::vector<float> a{1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4, 1, 2, -3, -4};
+    std::vector<float> slope{1, 2, 3, 4};
+    std::vector<float> out{1, 2, -3, -8, 1, 2, -9, -16, 1, 2, -3, -8, 1, 2, -9, -16};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_W_param)
+{
+    Shape shape_a{2, 2, 6};
+    Shape shape_slope{2, 1, 6};
+
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4, -5, -6,
+                         1, 2, -3, -4, 5, 6,
+                         -2, 4, -6, -8, 10, 12};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                         1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -2, -2, -9, -16, -5, -42,
+                            1, 2, -9, -16, 5, 6,
+                            -2, 4, -18, -32, 10, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_W_const)
+{
+    Shape shape_a{2, 2, 6};
+    Shape shape_slope{2, 1, 6};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4, -5, -6,
+                         1, 2, -3, -4, 5, 6,
+                         -2, 4, -6, -8, 10, 12};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                         1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -2, -2, -9, -16, -5, -42,
+                            1, 2, -9, -16, 5, 6,
+                            -2, 4, -18, -32, 10, 12};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_same_shape_param)
+{
+    Shape shape_a{2, 3, 2};
+    Shape shape_slope{2, 3, 2};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4,-5, -6,};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                            1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -1, -4, -9, -16, -25, -36};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_same_shape_const)
+{
+    Shape shape_a{2, 3, 2};
+    Shape shape_slope{2, 3, 2};
+
+    std::vector<float> a{1, 2, -3, -4, -5, 6,
+                         -1, -2, -3, -4,-5, -6,};
+    std::vector<float> slope{2, 1, 3, 4, 1, 7,
+                            1, 2, 3, 4, 5, 6};
+    std::vector<float> out{1, 2, -9, -16, -5, 6,
+                            -1, -4, -9, -16, -25, -36};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_broadcast_C_W_slope_param)
+{
+    Shape shape_a{2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 2, 1)
+    // std::vector<float> out{-1, -1, -100, -100}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_3d_broadcast_C_W_slope_const)
+{
+    Shape shape_a{2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 2, 1)
+    // std::vector<float> out{-1, -1, -100, -100}; // broadcast (2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_W_slope_param)
+{
+    Shape shape_a{1, 2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    // std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 1, 1, 2)
+    std::vector<float> out{-1, -1, -100, -100}; // broadcast (1, 2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_W_slope_const)
+{
+    Shape shape_a{1, 2, 1, 2};
+    Shape shape_slope{2};
+
+    std::vector<float> a{-10, -10, -10, -10};
+    std::vector<float> slope{0.1, 10};
+    // std::vector<float> out{-1, -100, -1, -100}; // broadcast (1, 1, 1, 2)
+    std::vector<float> out{-1, -1, -100, -100}; // broadcast (1, 2, 1, 1)
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_slope_param)
+{
+    Shape shape_a{1, 5, 1, 1};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1, 0, -1, -1, -1};
+    std::vector<float> slope{1, 2, 3, 4, 5};
+    std::vector<float> out{-1, 0, -3, -4, -5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_4d_broadcast_C_slope_const)
+{
+    Shape shape_a{1, 5, 1, 1};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1, 0, -1, -1, -1};
+    std::vector<float> slope{1, 2, 3, 4, 5};
+    std::vector<float> out{-1, 0, -3, -4, -5};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_batch_nd_elementwise_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{2, 3, 4, 5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+    std::vector<float> slope(shape_size(shape_slope));
+    std::iota(std::begin(slope), std::end(slope), 0);
+
+    std::vector<float> out{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_batch_nd_elementwise_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{2, 3, 4, 5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+    std::vector<float> slope(shape_size(shape_slope));
+    std::iota(std::begin(slope), std::end(slope), 0);
+
+    std::vector<float> out{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_W_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2, 3, 4};
+
+    std::vector<float> out{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_W_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{5};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2, 3, 4};
+
+    std::vector<float> out{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_C_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_1d_C_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    const auto f = make_shared<Function>(make_shared<op::v0::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_1_slope_param)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3, 1, 1};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    const auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    const auto SLOPE = make_shared<op::Parameter>(element::f32, shape_slope);
+    const auto f = make_shared<Function>(make_shared<op::PRelu>(A, SLOPE), ParameterVector{A, SLOPE});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a, slope});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, prelu_C_1_1_slope_const)
+{
+    Shape shape_a{2, 3, 4, 5};
+    Shape shape_slope{3, 1, 1};
+
+    std::vector<float> a{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.};
+
+    std::vector<float> slope{0, 1, 2};
+
+    std::vector<float> out{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    auto A = make_shared<op::Parameter>(element::f32, shape_a);
+    auto SLOPE = make_shared<op::Constant>(element::f32, shape_slope, slope);
+    auto f = make_shared<Function>(make_shared<op::PRelu>(A, SLOPE), ParameterVector{A});
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_multiple_inputs<float>({a});
+    test_case.add_expected_output<float>(shape_a, out);
+    test_case.run();
+}
diff --git a/ngraph/test/models/onnx/prelu_1d.prototxt b/ngraph/test/models/onnx/prelu_1d.prototxt
new file mode 100644
index 00000000000..9034d1d42f6
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_1d.prototxt
@@ -0,0 +1,72 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/models/onnx/prelu_batch_nd.prototxt b/ngraph/test/models/onnx/prelu_batch_nd.prototxt
new file mode 100644
index 00000000000..cf693db83c3
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_batch_nd.prototxt
@@ -0,0 +1,81 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/models/onnx/prelu_c_1_1.prototxt b/ngraph/test/models/onnx/prelu_c_1_1.prototxt
new file mode 100644
index 00000000000..83f2a748860
--- /dev/null
+++ b/ngraph/test/models/onnx/prelu_c_1_1.prototxt
@@ -0,0 +1,78 @@
+ir_version: 7
+producer_name: "onnx-importer-test"
+graph {
+  node {
+    input: "X"
+    input: "SLOPE"
+    output: "Y"
+    op_type: "PRelu"
+  }
+  name: "test-model-prelu"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "SLOPE"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 12
+}
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index 6a5ecfdcdf8..a8522c08fa1 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -2101,7 +2101,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_leaky_relu)
     test_case.run();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu)
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_nd)
 {
     auto function =
         onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu.prototxt"));
@@ -2133,6 +2133,129 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu)
     test_case.run();
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_batch_nd.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{2, 3, 4, 5}
+    std::vector<float> slope(shape_size(Shape{2, 3, 4, 5}));
+    std::iota(std::begin(slope), std::end(slope), 0);
+    inputs.emplace_back(slope);
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0.,   -1.,   -2.,   -3.,   -4.,   -5.,   -6.,   -7.,   -8.,
+         -9.,  -10.,  -11.,  -12.,  -13.,  -14.,  -15.,  -16.,  -17.,
+        -18.,  -19.,  -20.,  -21.,  -22.,  -23.,  -24.,  -25.,  -26.,
+        -27.,  -28.,  -29.,  -30.,  -31.,  -32.,  -33.,  -34.,  -35.,
+        -36.,  -37.,  -38.,  -39.,  -40.,  -41.,  -42.,  -43.,  -44.,
+        -45.,  -46.,  -47.,  -48.,  -49.,  -50.,  -51.,  -52.,  -53.,
+        -54.,  -55.,  -56.,  -57.,  -58.,  -59.,  -60.,  -61.,  -62.,
+        -63.,  -64.,  -65.,  -66.,  -67.,  -68.,  -69.,  -70.,  -71.,
+        -72.,  -73.,  -74.,  -75.,  -76.,  -77.,  -78.,  -79.,  -80.,
+        -81.,  -82.,  -83.,  -84.,  -85.,  -86.,  -87.,  -88.,  -89.,
+        -90.,  -91.,  -92.,  -93.,  -94.,  -95.,  -96.,  -97.,  -98.,
+        -99., -100., -101., -102., -103., -104., -105., -106., -107.,
+       -108., -109., -110., -111., -112., -113., -114., -115., -116.,
+       -117., -118., -119.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_1d)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_1d.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{5}
+    inputs.emplace_back(std::vector<float>{0, 1, 2, 3, 4});
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4.,
+       -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2.,
+       -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0.,
+       -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1., -2., -3.,
+       -4., -0., -1., -2., -3., -4., -0., -1., -2., -3., -4., -0., -1.,
+       -2., -3., -4.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_C_1_1)
+{
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_c_1_1.prototxt"));
+
+    Inputs inputs;
+    // Shape{2, 3, 4, 5}
+    inputs.emplace_back(std::vector<float>{-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1.});
+
+    // Shape{3, 1, 1}
+    inputs.emplace_back(std::vector<float>{0, 1, 2});
+
+    // Shape{2, 3, 4, 5}
+    auto expected_output = std::vector<float>{-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -0., -0., -0., -0., -0.,
+       -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
+       -0., -0., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
+       -1., -1., -1., -1., -1., -1., -1., -1., -1., -2., -2., -2., -2.,
+       -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
+       -2., -2., -2.};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_multiple_inputs(inputs);
+    test_case.add_expected_output(expected_output);
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_selu)
 {
     auto function =
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 9d6ea205f82..c476f2dfac3 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -758,9 +758,6 @@ lrn_2d_across_outermost_axis
 dyn_group_convolution_backprop_filters
 
 # Layer <op_name> input port 1 is not connected to any data
-prelu
-prelu_shared_slope
-prelu_negative_slope
 convolution_simple_padding
 
 # Cannot cast ngraph node Stack to CNNLayer!

From 6022df66877219abed3e6f0705055d2edb460d1a Mon Sep 17 00:00:00 2001
From: Maria Kaglinskaya <maria.kaglinskaya@intel.com>
Date: Tue, 8 Jun 2021 09:49:53 +0300
Subject: [PATCH 09/41] Pruning with FQ support (#5925)

* Add FQ, Concat, exted Eltwise support

* Fix tests after rebase + small refactoring

* Added Reshape on GroupConv weights mask propagating

* Added printing of  reduced weights to test transformation

* Turn off pruning for test

* Fixed comments + revert transformation comment

* Fixed last comments
---
 .../include/mask_attribute.hpp                |  80 ++++
 .../include/pruning.hpp                       |  11 +
 .../src/pruning/init_const_mask.cpp           |   2 +-
 .../src/pruning/init_masks.cpp                |  64 +++
 .../src/pruning/propagate_masks.cpp           | 415 +++++++++++++---
 .../src/pruning/pruning.cpp                   |   5 +
 .../src/pruning/shrink_weights.cpp            |   2 +
 .../transformations/pruning_test.cpp          | 444 +++++++++++++++++-
 8 files changed, 946 insertions(+), 77 deletions(-)
 create mode 100644 inference-engine/src/offline_transformations/src/pruning/init_masks.cpp

diff --git a/inference-engine/src/offline_transformations/include/mask_attribute.hpp b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
index 70cce141567..48c5b4ee9f0 100644
--- a/inference-engine/src/offline_transformations/include/mask_attribute.hpp
+++ b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
@@ -54,10 +54,90 @@ public:
                            });
     }
 
+    std::vector<size_t> get_not_empty_dims() {
+        std::vector<size_t> not_empty_dims;
+        for (size_t i = 0; i < this->size(); i++) {
+            if (!this->at(i).empty())
+                not_empty_dims.push_back(i);
+        }
+        return not_empty_dims;
+    }
+
     bool is_shape_like() const { return m_is_shape_like; }
 
     void set_shape_like(bool flag) { m_is_shape_like = flag; }
 
+    void copy_value_from_mask(Mask *const mask) {
+        auto cur_mask_iter = begin();
+        auto mask_iter = mask->begin();
+        while (cur_mask_iter != end() && mask_iter != mask->end()) {
+            *cur_mask_iter = *mask_iter;
+
+            cur_mask_iter++;
+            mask_iter++;
+        }
+    }
+
+    void copy_value_from_mask_reversed(Mask *const mask) {
+        auto cur_mask_iter = rbegin();
+        auto mask_iter = mask->rbegin();
+        while (cur_mask_iter != rend() && mask_iter != mask->rend()) {
+            *cur_mask_iter = *mask_iter;
+
+            cur_mask_iter++;
+            mask_iter++;
+        }
+    }
+
+    Mask::Ptr intersect_masks_reversed(Mask *const mask) {
+        auto result_mask = std::make_shared<Mask>(std::max(size(), mask->size()));
+        auto result_iter = result_mask->rbegin();
+        auto mask_1_iter = rbegin();
+        auto mask_2_iter = mask->rbegin();
+
+        while (mask_1_iter != rend() &&
+               mask_2_iter != mask->rend()) {
+            // Merge mask dimension values for both masks
+            // Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
+            for (const auto & value : *mask_1_iter) {
+                if (mask_2_iter->count(value)) {
+                    result_iter->insert(value);
+                }
+            }
+
+            result_iter++;
+            mask_1_iter++;
+            mask_2_iter++;
+        }
+        return result_mask;
+    }
+
+    Mask::Ptr union_masks_reversed(Mask *const mask) {
+        auto result_mask = std::make_shared<Mask>(std::max(size(), mask->size()));
+        auto result_iter = result_mask->rbegin();
+        auto mask_1_iter = rbegin();
+        auto mask_2_iter = mask->rbegin();
+
+        while (mask_1_iter != rend() &&
+               mask_2_iter != mask->rend()) {
+            // Union mask dimension values for both masks
+            // Example: (MaskValue[1,2,3,4], MaskValue[2, 5]) -> MaskValue[1, 2, 3, 4, 5]
+            for (const auto & value : *mask_1_iter) {
+                result_iter->insert(value);
+            }
+            for (const auto & value : *mask_2_iter) {
+                if (!result_iter->count(value)) {
+                    result_iter->insert(value);
+                }
+            }
+
+            result_iter++;
+            mask_1_iter++;
+            mask_2_iter++;
+        }
+        return result_mask;
+    }
+
     void add_callback(const std::function<bool(Mask::Ptr)> & receive_callback, Mask::Ptr mask) {
         m_callbacks[mask.get()] = receive_callback;
         m_dependencies.push_back(mask.get());
diff --git a/inference-engine/src/offline_transformations/include/pruning.hpp b/inference-engine/src/offline_transformations/include/pruning.hpp
index f398ab1713f..7831ee14246 100644
--- a/inference-engine/src/offline_transformations/include/pruning.hpp
+++ b/inference-engine/src/offline_transformations/include/pruning.hpp
@@ -14,6 +14,7 @@ namespace ngraph {
 namespace pass {
 
 class InitConstMask;
+class InitMasks;
 class PropagateMasks;
 class ShrinkWeights;
 
@@ -22,6 +23,16 @@ class Pruning;
 } // namespace pass
 } // namespace ngraph
 
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Initialising masks for pruned operations
+ */
+class ngraph::pass::InitMasks : public ngraph::pass::GraphRewrite {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    InitMasks();
+};
+
 /**
  * @ingroup ie_transformation_common_api
  * @brief Check Constant operation values by given dimensions and set
diff --git a/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp b/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
index 73929487b80..01e9520082b 100644
--- a/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
@@ -17,7 +17,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::InitConstMask, "InitConstMask", 0);
 ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
                                            const std::function<bool(const double & value)> & condition) {
     auto constant = pattern::wrap_type<opset6::Constant>(
-            pattern::type_matches_any({element::f16, element::f32, element::f64}));
+            pattern::type_matches_any({element::i8, element::u8, element::f16, element::f32, element::f64}));
 
     matcher_pass_callback callback = [=](pattern::Matcher& m) {
         auto const_node = std::dynamic_pointer_cast<opset6::Constant>(m.get_match_root());
diff --git a/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp
new file mode 100644
index 00000000000..2b4394b6c32
--- /dev/null
+++ b/inference-engine/src/offline_transformations/src/pruning/init_masks.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "pruning.hpp"
+#include "mask_attribute.hpp"
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/log.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::InitMasks, "InitMasks", 0);
+
+namespace ngraph {
+namespace pass {
+namespace init_masks {
+
+class InitConvMask;
+
+} // namespace init_masks
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::init_masks::InitConvMask : public MatcherPass {
+public:
+    InitConvMask() {
+        auto input = pattern::any_input();
+        auto weights = pattern::any_input();
+        auto conv = pattern::wrap_type<opset6::Convolution, opset6::GroupConvolution>({input, weights});
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_output = pattern_map.at(conv);
+
+            // Initializing weights mask:
+            // 1. Looking for Const node with weights
+            NodeVector weights_calculation_nodes;
+            auto cur_node = m_output.get_node()->get_input_node_shared_ptr(1);
+
+            while (!ngraph::is_type<opset6::Constant>(cur_node) && cur_node->inputs().size()) {
+                weights_calculation_nodes.push_back(cur_node);
+                cur_node = cur_node->get_input_node_shared_ptr(0);
+            }
+            if (!ngraph::is_type<opset6::Constant>(cur_node)) {
+                NGRAPH_DEBUG << "Can't find Constant weights for Convolution: " <<
+                m_output.get_node()->get_friendly_name() << std::endl;
+                return false;
+            }
+
+            // 2. Init mask for Const node
+            InitConstMask({0}/* check only output channels dim */).apply(cur_node);
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "ConvolutionInitMask");
+        register_matcher(m, callback);
+    }
+};
+
+
+ngraph::pass::InitMasks::InitMasks() {
+    add_matcher<init_masks::InitConvMask>();
+}
+
diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
index ac7a8e8b685..424b6ae9583 100644
--- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
@@ -7,7 +7,9 @@
 
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset5.hpp>
 #include <ngraph/log.hpp>
+#include <ngraph/rt_info.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::PropagateMasks, "PropagateMasks", 0);
 
@@ -20,11 +22,23 @@ class GroupConvolution;
 class Elementwise;
 class PassThrough;
 class StopPropagation;
+class FakeQuantize;
+class Concat;
+class Reshape;
 
 } // namespace mask_propagation
 } // namespace pass
 } // namespace ngraph
 
+ngraph::Shape broadcast_shape_to_rank(ngraph::Shape shape_to_broadcast, int64_t dst_rank) {
+    auto initial_rank = static_cast<int64_t>(shape_to_broadcast.size());
+    auto num_of_broadcased_dims = dst_rank - initial_rank;
+    std::vector<size_t> dims(num_of_broadcased_dims, 1);
+    dims.insert(dims.end(), shape_to_broadcast.begin(), shape_to_broadcast.end());
+    auto new_shape = ngraph::Shape(dims);
+    return new_shape;
+}
+
 class ngraph::pass::mask_propagation::Convolution : public MatcherPass {
 public:
     Convolution() {
@@ -38,12 +52,15 @@ public:
             const auto & m_output = pattern_map.at(conv);
             const auto & m_input = pattern_map.at(input);
 
-            // In case if weights are Constant we initialize Mask
-            InitConstMask({0}/* check only output channel */).apply(m_weights.get_node_shared_ptr());
-
             auto weights_mask = getMask(m_weights);
-            // If weights are not a Constant and we didn't set Mask value before we will get nullptr
-            if (!weights_mask) return false;
+
+            // Nullptr in weights-mask means that mask for this node wasn't initialized earlier.
+            // Weights mask for convolution should be initialized in the InitMasks pass (and propagate after it).
+            // If mask isn't initialized - this weights (and hence all convolution) can't be pruned for some reason.
+            if (!weights_mask) {
+                NGRAPH_DEBUG << "No weights mask for " << m_output.get_node()->get_friendly_name() << "\n";
+                return false;
+            }
             auto weights_mask_row = weights_mask.get();
 
             if (auto input_mask = getMask(m_input)) {
@@ -119,9 +136,15 @@ public:
 
             auto weights_mask = getMask(m_weights);
             if (!weights_mask) {
-                // TODO: only if weights are constant
-                weights_mask = std::make_shared<Mask>(weights_shape.size());
-                setMask(m_weights, weights_mask);
+                // Setting mask only if weights are constant
+                if (ngraph::is_type<opset6::Constant>(m_output.get_node_shared_ptr())) {
+                    weights_mask = std::make_shared<Mask>(weights_shape.size());
+                    setMask(m_weights, weights_mask);
+                } else {
+                    NGRAPH_DEBUG << "GroupConvolution: No weights mask and weights aren't constant for " <<
+                    *m_output.get_node() << "\n";
+                    return false;
+                }
             }
             auto weights_mask_row = weights_mask.get();
 
@@ -169,13 +192,85 @@ public:
     }
 };
 
+class ngraph::pass::mask_propagation::Reshape : public MatcherPass {
+public:
+    Reshape() {
+        auto input = pattern::any_input(pattern::has_static_shape());
+        auto shape = pattern::any_input();
+        // Working only for Reshapes on Group Convolution weights
+        auto reshape = pattern::wrap_type<opset6::Reshape>({input, shape}, pattern::consumers_count(1));
+        auto gconv = pattern::wrap_type<opset6::GroupConvolution>({pattern::any_input(), reshape},
+                                                                  pattern::has_static_shape());
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_shape = pattern_map.at(shape);
+            const auto & m_output = pattern_map.at(reshape);
+            const auto & m_input = pattern_map.at(input);
+
+            auto shape_val = m_shape.get_node_shared_ptr();
+
+            // In Depthwise Convolutions Reshape on weights just add additional dimension for output channels count
+            // (1 in case of the depthwise) of kernel.
+            // Example: Reshape from [G, 1 (I), X, Y, Z] -> [G, 1 (O), 1 (I), X, Y, Z], where G - group numbers,
+            // X, Y, Z -  spartial dimensions (can be only X or X, Y), I, O - number of input/output channels of kernel.
+
+            // Checking that matched Reshape meets this conditions (add 1-d dim on 1 position of shape constant)
+            auto inp_shape = m_input.get_shape();
+            auto out_shape = m_output.get_shape();
+            inp_shape.insert(inp_shape.begin() + 1, 1);
+            if (inp_shape != out_shape) {
+                return false;
+            }
+
+            auto input_mask = getMask(m_input);
+            if (!input_mask) {
+                return false;
+            }
+            auto input_mask_row = input_mask.get();
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            // Depthwise Convolution pruned only by input channels (== groups) ->
+            // Propagating mask from Group (0) dim in Reshape input to Group (0) dim in Reshape output and back
+            input_mask->add_callback([output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(0) = output_mask_row->at(0);
+                return true;
+            }, output_mask);
+            output_mask->add_callback([input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(0) = input_mask_row->at(0);
+                return true;
+            }, input_mask);
+            input_mask->apply_callback(output_mask);
+
+            // To allow pruning on weights (allow reshape input Group (0) dim changing) replace Reshape Shape constant
+            // [G, 1, 1, X, Y, Z] by [-1, 1, 1, X, Y, Z].
+            auto old_shape_const = std::dynamic_pointer_cast<opset6::Constant>(m_shape.get_node_shared_ptr());
+            auto shape_value = old_shape_const.get()->cast_vector<int64_t>();
+            shape_value[0] = -1;
+            auto new_const = opset6::Constant::create(old_shape_const->get_element_type(),
+                                                      old_shape_const->get_shape(), shape_value);
+            new_const->set_friendly_name(old_shape_const->get_friendly_name());
+            ngraph::copy_runtime_info(old_shape_const, new_const);
+            ngraph::replace_node(old_shape_const, new_const);
+
+            setMask(m_output, output_mask);
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(reshape, "ReshapeMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
 class ngraph::pass::mask_propagation::Elementwise : public MatcherPass {
 public:
     Elementwise() {
         auto input = pattern::any_input();
         auto weights = pattern::any_input();
-        auto eltwise = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>({input, weights},
-                                                                                 pattern::has_static_rank());
+        auto eltwise = pattern::wrap_type<opset6::Add, opset6::Subtract, opset6::Maximum, opset6::Minimum,
+        opset6::Multiply>({input, weights}, pattern::has_static_rank());
+        // TODO: add Div, Power support
 
         ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
             const auto & pattern_map = m.get_pattern_value_map();
@@ -183,82 +278,275 @@ public:
             const auto & m_output = pattern_map.at(eltwise);
             const auto & m_input = pattern_map.at(input);
 
-            // TODO: implement check that compares input shape ranks
+            // Case when input masks should be united instead of intersection
+            bool union_eltwise_type = ngraph::is_type<opset6::Multiply>(m_output.get_node_shared_ptr());
+
             const auto & input_rank = m_input.get_partial_shape().rank().get_length();
             const auto & weights_rank = m_weights.get_partial_shape().rank().get_length();
+            // Here assuming that masks can be propagated only through 3/4 dimensional tensors
+            // (since channel dim is necessary)
             if (weights_rank < 3 || input_rank < 3) return false;
 
-            // In case if one of the inputs is constant
-            // TODO: need to find channel dimension instead of hardcoded zero
-            const size_t & channel_dim = (input_rank == weights_rank ? 1 : 0);
-            InitConstMask({channel_dim}).apply(m_input.get_node_shared_ptr());
-            InitConstMask({channel_dim}).apply(m_weights.get_node_shared_ptr());
+            // In case if first of the inputs is constant
+            InitConstMask({0, 1/* potential output channel dim */}).apply(m_input.get_node_shared_ptr());
+            auto input_mask = getMask(m_input);
+            if (!input_mask) {
+                NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
+                return false;
+            }
+
+            InitConstMask({0, 1}).apply(m_weights.get_node_shared_ptr());
 
             auto weights_mask = getMask(m_weights);
-            auto input_mask = getMask(m_input);
-
-            if (!weights_mask || !input_mask) {
-                NGRAPH_DEBUG << "No mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
+            if (!weights_mask) {
+                NGRAPH_DEBUG << "No weights mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
                 return false;
             }
             auto input_mask_row = input_mask.get();
             auto weights_mask_row = weights_mask.get();
 
-            // Merge masks from two inputs
+            // Merging masks from two inputs
             auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
             auto output_mask_row = output_mask.get();
 
-            auto out_mask_callback = [input_mask_row, weights_mask_row](Mask::Ptr cur_mask) -> bool {
-                auto omask_iter = cur_mask->rbegin();
-                auto imask_iter = input_mask_row->rbegin();
-                auto wmask_iter = weights_mask_row->rbegin();
-
-                for (auto & item : *cur_mask) {
-                    item.clear();
-                }
-
-                while (imask_iter != input_mask_row->rend() &&
-                       wmask_iter != weights_mask_row->rend()) {
-                    // Merge mask dimension values for both masks
-                    // Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
-                    for (const auto & value : *imask_iter) {
-                        if (wmask_iter->count(value)) {
-                            omask_iter->insert(value);
-                        }
-                    }
-
-                    omask_iter++;
-                    imask_iter++;
-                    wmask_iter++;
+            auto out_mask_callback = [input_mask_row, weights_mask_row, union_eltwise_type](Mask::Ptr cur_mask) -> bool {
+                Mask::Ptr result_mask;
+                if (union_eltwise_type) {
+                    result_mask = input_mask_row->union_masks_reversed(weights_mask_row);
+                } else {
+                    result_mask = input_mask_row->intersect_masks_reversed(weights_mask_row);
                 }
+                cur_mask->copy_value_from_mask_reversed(result_mask.get());
                 return true;
             };
             output_mask->add_callback(out_mask_callback, input_mask);
-            output_mask->add_callback(out_mask_callback, weights_mask);
 
-            auto callback = [output_mask_row](Mask::Ptr cur_mask) -> bool {
-                auto omask_iter = output_mask_row->rbegin();
-                auto cmask_iter = cur_mask->rbegin();
-                while (omask_iter != output_mask_row->rend() &&
-                       cmask_iter != cur_mask->rend()) {
-                    // TODO: check
-                    *cmask_iter = *omask_iter;
-
-                    omask_iter++;
-                    cmask_iter++;
-                }
+            input_mask->add_callback([weights_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(weights_mask_row);
                 return true;
-            };
-            input_mask->add_callback(callback, output_mask);
-            weights_mask->add_callback(callback, output_mask);
+            }, weights_mask);
+            input_mask->add_callback([output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(output_mask_row);
+                return true;
+            }, output_mask);
+            weights_mask->add_callback([input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask_reversed(input_mask_row);
+                return true;
+            }, input_mask);
 
-            // Init output mask
             output_mask->apply_callback(input_mask);
+            weights_mask->apply_callback(input_mask);
+
             setMask(m_output, output_mask);
             return true;
         };
 
-        auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, "EltwiseMaskPropagation");
+        auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, "ElementwiseMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
+class ngraph::pass::mask_propagation::FakeQuantize : public MatcherPass{
+public:
+    FakeQuantize(){
+        auto input = pattern::any_input(pattern::has_static_shape());
+        auto input_low = pattern::any_input(pattern::has_static_shape());
+        auto input_high = pattern::any_input(pattern::has_static_shape());
+        auto output_low = pattern::any_input(pattern::has_static_shape());
+        auto output_high = pattern::any_input(pattern::has_static_shape());
+        auto fake_quantize = pattern::wrap_type<opset6::FakeQuantize>({input, input_low, input_high, output_low,
+                                                                            output_high});
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_input = pattern_map.at(input);
+            const auto & m_input_low = pattern_map.at(input_low);
+            const auto & m_input_high = pattern_map.at(input_high);
+            const auto & m_output_low = pattern_map.at(output_low);
+            const auto & m_output_high = pattern_map.at(output_high);
+            const auto & m_output = pattern_map.at(fake_quantize);
+
+            auto input_mask = getMask(m_input);
+
+            // Input mask is the only source of pruning in FQ
+            if (!input_mask) {
+                NGRAPH_DEBUG << "FakeQuantize: No input mask for " << *m_output.get_node() << "\n";
+                return false;
+            }
+
+            auto input_mask_row = input_mask.get();
+
+            // Propagate input mask to output mask and in the opposite direction
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            // Output mask is equal to input mask
+            auto output_mask_callback = [input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask(input_mask_row);
+                return true;
+            };
+
+            auto input_mask_callback = [output_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->copy_value_from_mask(output_mask_row);
+                return true;
+            };
+
+            output_mask->add_callback(output_mask_callback, input_mask);
+            input_mask->add_callback(input_mask_callback, output_mask);
+
+            // Calculate output mask
+            output_mask->apply_callback(input_mask);
+            setMask(m_output, output_mask);
+
+            auto input_low_size = shape_size(m_input_low.get_shape());
+            auto input_high_size = shape_size(m_input_high.get_shape());
+            auto output_low_size = shape_size(m_output_low.get_shape());
+            auto output_high_size = shape_size(m_output_high.get_shape());
+
+            // In the per-tensor case FQ params shouldn't be pruned
+            if (input_low_size == 1 && output_low_size == 1 && input_high_size == 1 && output_high_size == 1) {
+                return true;
+            }
+
+            // If input/output ranges in FQ should be broadcasted to input shape -> broadcast this consant values
+            // for the convenience of working with the masks
+            NodeVector fq_params_nodes{m_input_low.get_node_shared_ptr(),
+                                                               m_input_high.get_node_shared_ptr(),
+                                                               m_output_low.get_node_shared_ptr(),
+                                                               m_output_high.get_node_shared_ptr()};
+            auto fq_node = std::dynamic_pointer_cast<op::FakeQuantize>(m_output.get_node_shared_ptr());
+            size_t idx = 0;
+            if (fq_node->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NONE) {
+                for (auto const_node : fq_params_nodes) {
+                    auto new_shape = broadcast_shape_to_rank(const_node->get_shape(),
+                                                             m_input.get_partial_shape().rank().get_length());
+                    auto const_copy = const_node->clone_with_new_inputs(const_node->input_values());
+                    auto new_const = std::dynamic_pointer_cast<op::Constant>(const_copy);
+                    new_const->set_data_shape(new_shape);
+                    new_const->validate_and_infer_types();
+                    new_const->set_friendly_name(const_node->get_friendly_name());
+                    ngraph::copy_runtime_info(const_node, new_const);
+                    ngraph::replace_node(const_node, new_const);
+                    fq_params_nodes[idx++] = new_const;
+                }
+            }
+
+            auto fq_params_mask_callback = [input_mask_row](Mask::Ptr cur_mask) -> bool {
+                cur_mask->at(1/* fq params have same shapes as input */) = input_mask_row->at(1 /* channel dim in data */);
+                return true;
+            };
+
+            for (auto fq_param : fq_params_nodes) {
+                auto mask = std::make_shared<Mask>(fq_param->get_shape().size());
+                mask->add_callback(fq_params_mask_callback, input_mask);
+                input_mask->add_callback([mask](Mask::Ptr cur_mask) -> bool {
+                    return true;
+                }, mask);
+                mask->apply_callback(input_mask);
+                setMask(fq_param->output(0), mask);
+            }
+
+            return true;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, "FakeQuantizeMaskPropagation");
+        register_matcher(m, callback);
+    }
+};
+
+class ngraph::pass::mask_propagation::Concat : public MatcherPass{
+public:
+    Concat() {
+        auto concat = pattern::wrap_type<opset6::Concat>(pattern::has_static_shape());
+
+        ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+            const auto & pattern_map = m.get_pattern_value_map();
+            const auto & m_output = pattern_map.at(concat);
+            auto concat_ptr = std::dynamic_pointer_cast<opset6::Concat>(m_output.get_node_shared_ptr());
+            auto axis = concat_ptr->get_concatenation_axis();
+
+            auto inputs = concat_ptr->inputs();
+            std::map<int64_t , Mask::Ptr> input_masks;
+            std::map<int64_t , Mask *> input_masks_row;
+            std::vector<int64_t> input_sizes;
+
+            size_t first_input_idx = 0;
+            Mask::Ptr first_input_mask;
+            bool first_initialized = false;
+            for (size_t i=0; i < inputs.size(); i++) {
+                auto input = inputs[i];
+                auto input_mask = getMask(input.get_source_output());
+                if (input_mask) {
+                    input_masks[i] = input_mask;
+                    input_masks_row[i] = input_mask.get();
+
+                    if (!first_initialized) {
+                        first_input_idx = i;
+                        first_input_mask = input_mask;
+                        first_initialized = true;
+                    }
+                }
+                input_sizes.push_back(input.get_shape().at(axis));
+            }
+
+            if (!first_initialized) {
+                return false;
+            }
+
+            auto output_mask = std::make_shared<Mask>(m_output.get_partial_shape().rank().get_length());
+            auto output_mask_row = output_mask.get();
+
+            auto out_mask_callback = [input_masks_row, input_sizes, axis](Mask::Ptr cur_mask) -> bool {
+                int64_t cur_size = 0;
+                cur_mask->at(axis).clear();
+
+                for (size_t i=0; i < input_sizes.size(); ++i) {
+                    if (input_masks_row.count(i)) {
+                        for (auto idx : input_masks_row.at(i)->at(axis)) {
+                            cur_mask->at(axis).insert(idx + cur_size);
+                        }
+                    }
+                    cur_size += input_sizes[i];
+                }
+                return true;
+            };
+
+            auto create_input_mask_callback_for_idx = [output_mask_row, input_sizes, axis](size_t input_idx){
+                auto input_mask_callback = [output_mask_row, input_sizes, axis, input_idx](Mask::Ptr cur_mask) -> bool {
+                    cur_mask->clean_dim_values();
+                    uint64_t min_val = 0;
+                    for (size_t i = 0; i < input_idx; i++) {
+                        min_val += input_sizes[i];
+                    }
+                    uint64_t max_val = min_val + input_sizes[input_idx];
+                    for (auto idx : output_mask_row->at(axis)) {
+                        if (idx < max_val && idx >= min_val) {
+                            cur_mask->at(axis).insert(idx - min_val);
+                        }
+                    }
+                    return true;
+                };
+                return input_mask_callback;
+            };
+            output_mask->add_callback(out_mask_callback, first_input_mask);
+
+            for (size_t i=0; i < inputs.size(); ++i) {
+                if (input_masks.count(i) && i != first_input_idx) {
+                    auto input_mask = input_masks.at(i);
+                    input_mask->add_callback(create_input_mask_callback_for_idx(i),
+                                             first_input_mask);
+                    first_input_mask->add_callback([](Mask::Ptr cur_mask) -> bool {
+                        return true;
+                    }, input_mask);
+                }
+            }
+            first_input_mask->add_callback(create_input_mask_callback_for_idx(first_input_idx),
+                                     output_mask);
+            output_mask->apply_callback(first_input_mask);
+            setMask(m_output, output_mask);
+
+            return true;
+        };
+        auto m = std::make_shared<ngraph::pattern::Matcher>(concat, "ConcatMaskPropagation");
         register_matcher(m, callback);
     }
 };
@@ -266,7 +554,9 @@ public:
 class ngraph::pass::mask_propagation::PassThrough : public MatcherPass {
 public:
     PassThrough() {
-        auto unary_op = pattern::wrap_type<op::util::UnaryElementwiseArithmetic, opset6::Clamp>();
+        auto unary_op = pattern::wrap_type<op::util::UnaryElementwiseArithmetic, opset6::Clamp,
+                                            opset6::Convert, opset6::ConvertLike, opset6::AvgPool, opset6::MaxPool,
+                                            opset6::ROIPooling, opset6::PSROIPooling, opset6::Pad>();
 
         ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
             const auto & pattern_map = m.get_pattern_value_map();
@@ -312,5 +602,8 @@ ngraph::pass::PropagateMasks::PropagateMasks() {
     add_matcher<mask_propagation::GroupConvolution>();
     add_matcher<mask_propagation::Elementwise>();
     add_matcher<mask_propagation::PassThrough>();
+    add_matcher<mask_propagation::FakeQuantize>();
+    add_matcher<mask_propagation::Concat>();
+    add_matcher<mask_propagation::Reshape>();
     add_matcher<mask_propagation::StopPropagation>();
 }
diff --git a/inference-engine/src/offline_transformations/src/pruning/pruning.cpp b/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
index 3159e3db7db..ad7f410b8f3 100644
--- a/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/pruning.cpp
@@ -15,8 +15,13 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::Pruning, "Pruning", 0);
 
 bool ngraph::pass::Pruning::run_on_function(std::shared_ptr<Function> f) {
     Manager manager(get_pass_config());
+
+    // Initialize masks only for Convolutions/GroupConvolutions weights (needed to init mask in source Constant of
+    // weights-calculating subgraph). For other node types masks initialized in PropagateMasks pass.
+    manager.register_pass<InitMasks>();
     manager.register_pass<PropagateMasks>();
 
+
 #ifdef NGRAPH_DEBUG_ENABLE
     // VisualizeTree modifier helps to print Masks and mark nodes with masks
     /*
diff --git a/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp b/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
index 80c2abbb709..08e7ef152c0 100644
--- a/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/shrink_weights.cpp
@@ -54,6 +54,8 @@ bool ngraph::pass::ShrinkWeights::run_on_function(std::shared_ptr<ngraph::Functi
             for (size_t dim = 0; dim < mask->size(); ++dim) {
                 const auto &dim_size = mask->at(dim).size();
                 if (dim_size == 0) continue;
+                // Broadcastable 1-size dimension shouldn't be shrank with mask
+                if (const_node->get_shape().at(dim) == 1 && dim_size > 1) continue;
 
                 // Convert dims that we want remove to dims that we need to keep
                 std::vector<int64_t> dims_to_keep;
diff --git a/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
index 0f46a853cef..82c1fa6c9f2 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
@@ -15,6 +15,7 @@
 #include <transformations/init_node_info.hpp>
 #include <ngraph/coordinate_transform.hpp>
 #include <ngraph/pass/manager.hpp>
+#include <inference_engine.hpp>
 
 using namespace testing;
 using namespace ngraph;
@@ -67,6 +68,23 @@ TEST(TransformationTests, InitMasksOutputChannel) {
     compare_masks(*getMask(weights->output(0)), {{}, {1}, {}, {}});
 }
 
+// TODO: add test init masks with subgraph
+TEST(TransformationTests, TestInitMasks) {
+    Shape weights_shape{6, 3, 3, 3};
+    Shape input_shape{1, 3, 64, 64};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights = create_constant_with_zeros(weights_shape, {{1, 2, 3}, {}, {}, {}});
+    auto conv = std::make_shared<opset5::Convolution>(input, weights, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights.get_node_shared_ptr()->output(0)), {{1, 2, 3}, {}, {}, {}});
+}
+
 TEST(TransformationTests, InitMasksNegative) {
     Shape weights_shape{6, 3, 3, 3};
     auto weights = opset5::Constant::create(element::f32, weights_shape, {0.5});
@@ -85,6 +103,7 @@ TEST(TransformationTests, PropagateMasksNegative) {
     auto f = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -102,27 +121,35 @@ TEST(TransformationTests, PropagateMasksBasic) {
                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto relu = std::make_shared<opset5::Relu>(conv);
 
+    auto add_const = create_constant_with_zeros(Shape{1, 6, 1, 1}, {{}, {1, 2, 3, 4, 5}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(relu, add_const);
+
     auto sub_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2, 3}, {}, {}});
-    auto sub = std::make_shared<opset5::Subtract>(relu, sub_const);
+    auto sub = std::make_shared<opset5::Subtract>(add, sub_const);
 
-    auto mul_const = create_constant_with_zeros(Shape{6, 1, 1}, {{2}, {}, {}});
-    auto mul = std::make_shared<opset5::Subtract>(sub, mul_const);
+    auto mul_const = create_constant_with_zeros(Shape{1, 6, 1, 1}, {{}, {4}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
 
-    auto weights2 = opset5::Constant::create(element::f32, weights_shape2, {0});
+    auto weights2 = create_constant_with_zeros(weights_shape2, {{1, 2}, {1, 2, 3}, {}, {}});
     auto conv2 = std::make_shared<opset5::Convolution>(mul, weights2, Strides(2, 1),
                                                        CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
-    compare_masks(*getMask(weights->output(0)),  Mask({{2}, {}, {}, {}}));
-    compare_masks(*getMask(conv->output(0)),     Mask({{}, {2}, {}, {}}));
-    compare_masks(*getMask(relu->output(0)),     Mask({{}, {2}, {}, {}}));
-    compare_masks(*getMask(sub_const), Mask({{2}, {}, {}}));
-    compare_masks(*getMask(mul_const), Mask({{2}, {}, {}}));
-    compare_masks(*getMask(weights2->output(0)), Mask({{}, {2}, {}, {}}));
+    compare_masks(*getMask(weights->output(0)),  Mask({{1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(conv->output(0)),     Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(relu->output(0)),     Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add_const), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(sub_const), Mask({{1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(mul_const), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(weights2.get_node_shared_ptr()->output(0)), Mask({{}, {1, 2, 3, 4}, {}, {}}));
     compare_masks(*getMask(conv2->output(0)),    Mask({{}, {}, {}, {}}));
 }
 
@@ -148,6 +175,7 @@ TEST(TransformationTests, PropagateMasksDynamicConvolution) {
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -182,6 +210,7 @@ TEST(TransformationTests, PropagateMasksDynamicGroupConvolution) {
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 }
@@ -199,15 +228,16 @@ TEST(TransformationTests, PropagateMasksEmpty) {
     auto sub_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2, 3}, {}, {}});
     auto sub = std::make_shared<opset5::Subtract>(relu, sub_const);
 
-    auto mul_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2}, {}, {}});
-    auto mul = std::make_shared<opset5::Subtract>(sub, mul_const);
+    auto add_const = create_constant_with_zeros(Shape{6, 1, 1}, {{1, 2}, {}, {}});
+    auto add = std::make_shared<opset5::Subtract>(sub, add_const);
 
     auto weights2 = opset5::Constant::create(element::f32, weights_shape2, {0});
-    auto conv2 = std::make_shared<opset5::Convolution>(mul, weights2, Strides(2, 1),
+    auto conv2 = std::make_shared<opset5::Convolution>(add, weights2, Strides(2, 1),
                                                        CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
     auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
 
     pass::Manager m;
+    m.register_pass<pass::InitMasks>();
     m.register_pass<pass::PropagateMasks>();
     m.run_passes(f);
 
@@ -215,11 +245,55 @@ TEST(TransformationTests, PropagateMasksEmpty) {
     compare_masks(*getMask(conv->output(0)),     Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(relu->output(0)),     Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(sub_const), Mask({{}, {}, {}}));
-    compare_masks(*getMask(mul_const), Mask({{}, {}, {}}));
+    compare_masks(*getMask(add_const), Mask({{}, {}, {}}));
     compare_masks(*getMask(weights2->output(0)), Mask({{}, {}, {}, {}}));
     compare_masks(*getMask(conv2->output(0)),    Mask({{}, {}, {}, {}}));
 }
 
+TEST(TransformationTests, PropagateMaskPassThrough) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_const_1 = create_constant_with_zeros(weights_shape, {{1, 2, 3}, {}, {}, {}});
+    weights_const_1.get_node_shared_ptr()->set_friendly_name("weights_1");
+
+    auto conv_1 = std::make_shared<opset5::Convolution>(input, weights_const_1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv_1->set_friendly_name("conv_1");
+
+    // Adding a couple of PassThrough operations
+    auto relu = std::make_shared<opset5::Relu>(conv_1);
+    relu->set_friendly_name("relu");
+
+    auto clamp = std::make_shared<opset5::Clamp>(relu, 0, 6);
+    clamp->set_friendly_name("clamp");
+
+    auto pads_begin = opset5::Constant::create(element::i32, Shape{4}, {0, 0, 1, 1});
+    auto pads_end = opset5::Constant::create(element::i32, Shape{4}, {0, 0, 2, 2});
+    auto pad = std::make_shared<opset5::Pad>(clamp, pads_begin, pads_end, op::PadMode::CONSTANT);
+    auto max_pool = std::make_shared<opset5::MaxPool>(pad, Strides{1, 1},
+                                                      Shape{0, 0}, Shape{1, 1}, Shape{4, 4});
+    max_pool->set_friendly_name("max_pool");
+
+    auto weights2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(max_pool, weights2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_const_1.get_node_shared_ptr()->output(0)),  Mask({{1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv_1->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(relu->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(clamp->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+    compare_masks(*getMask(max_pool->output(0)),     Mask({{}, {1, 2, 3}, {}, {}}));
+}
+
 TEST(TransformationTests, PropagateMasksHardDependencies) {
     Shape input_shape{1, 3, 3, 3};
 
@@ -280,4 +354,344 @@ TEST(TransformationTests, PropagateMasksHardDependencies) {
 //    compare_masks(*getMask(relu),     Mask({{}, {0, 1, 2, 3, 4, 5}, {}, {}}));
 //    compare_masks(*getMask(weights2), Mask({{}, {0, 1, 2, 3, 4, 5}, {}, {}}));
 //    compare_masks(*getMask(conv2),    Mask({{}, {}, {}, {}}));
-}
\ No newline at end of file
+}
+
+TEST(TransformationTests, PropagateMasksQuantizedGroupConvolution) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weights_group_shape{8, 1, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+
+    auto weights1 = create_constant_with_zeros(weights_shape, {{0, 1, 2, 3}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto weights_group = opset5::Constant::create(element::i8, weights_group_shape, {0});
+    weights_group->set_friendly_name("weights_group");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_group, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto reshape = std::make_shared<opset5::Reshape>(mul, opset5::Constant::create(element::i64, Shape{5}, {8, 1, 1, 3, 3}), false);
+
+    auto conv_group = std::make_shared<opset5::GroupConvolution>(conv1, reshape, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv_group, add_const);
+    add->set_friendly_name("add");
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(add, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::Pruning>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights1.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_group->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}}));
+
+    compare_masks(*getMask(reshape->output(0)), Mask({{0 , 1, 2, 3}, {}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv_group->output(0)),  Mask({{}, {0 , 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+}
+
+TEST(TransformationTests, PropagateMasksFakeQuantizePerTensor) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_1 = opset5::Constant::create(element::i8, weights_shape, {0});
+    weights_1->set_friendly_name("weights_int8_const");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_1, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto conv1 = std::make_shared<opset5::Convolution>(input, mul, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv1->set_friendly_name("conv1");
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv1, add_const);
+    add->set_friendly_name("add");
+
+    auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
+    auto input_high = opset5::Constant::create(element::f32, Shape{1, 1, 1, 1}, {20});
+    auto output_low = opset5::Constant::create(element::f32, Shape{}, {1});
+    auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
+    auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low, input_high, output_low, output_high, 8);
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(fq, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::Pruning>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1->output(0)), Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0 , 1, 2, 3, 4},  {}, {}}));
+
+    compare_masks(*getMask(fq->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+}
+
+TEST(TransformationTests, PropagateMasksFakeQuantizePerChannel) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape{8, 3, 3, 3};
+    Shape weight_shape2{3, 8, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    input->set_friendly_name("input");
+    auto weights_1 = opset5::Constant::create(element::i8, weights_shape, {0});
+    weights_1->set_friendly_name("weights_int8_const");
+
+    auto convert = std::make_shared<opset5::Convert>(weights_1, element::f32);
+    convert->set_friendly_name("convert");
+
+    auto sub_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3}, {}, {}, {}});
+
+    auto sub = std::make_shared<opset5::Subtract>(convert, sub_const);
+    sub->set_friendly_name("sub");
+
+    auto mul_const = create_constant_with_zeros(Shape{8, 1, 1, 1}, {{0, 1, 2, 3, 4}, {}, {}, {}});
+    auto mul = std::make_shared<opset5::Multiply>(sub, mul_const);
+    mul->set_friendly_name("mul");
+
+    auto conv1 = std::make_shared<opset5::Convolution>(input, mul, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    conv1->set_friendly_name("conv1");
+
+    auto add_const = create_constant_with_zeros(Shape{1, 8, 1, 1}, {{}, {0, 1, 2, 3, 4}, {}, {}});;
+    auto add = std::make_shared<opset5::Add>(conv1, add_const);
+    add->set_friendly_name("add");
+
+    auto input_low = opset5::Constant::create(element::f32, Shape{1, 8, 1, 1}, {0});
+    auto input_high = opset5::Constant::create(element::f32, Shape{1, 8, 1, 1}, {20});
+    auto output_low = opset5::Constant::create(element::f32, Shape{8, 1, 1}, {1});
+    auto output_high = opset5::Constant::create(element::f32, Shape{8, 1, 1}, {10});
+    auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low, input_high, output_low, output_high, 8);
+
+    auto weights_2 = opset5::Constant::create(element::f32, weight_shape2, {0});
+    auto conv2 = std::make_shared<opset5::Convolution>(fq, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+    auto f = std::make_shared<Function>(NodeVector{conv2}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1->output(0)), Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(sub->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(mul_const.get_node_shared_ptr()->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+    compare_masks(*getMask(mul->output(0)),  Mask({{0 , 1, 2, 3, 4}, {}, {}, {}}));
+
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0 , 1, 2, 3, 4},  {}, {}}));
+
+    compare_masks(*getMask(fq->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+
+    compare_masks(*getMask(weights_2->output(0)),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(fq->input(1).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(2).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(3).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+    compare_masks(*getMask(fq->input(4).get_source_output()),  Mask({{}, {0 , 1, 2, 3, 4}, {}, {}}));
+}
+
+TEST(TransformationTests, TestConcatMaskPropagation) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                      CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto weights_out_conv = create_constant_with_zeros(weight_shape_out_conv, {{}, {}, {}, {}});
+    auto conv_out = std::make_shared<opset5::Convolution>(concat, weights_out_conv, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv_out}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{0, 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{7, 8, 9, 10}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {7, 8, 9, 10}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{4, 5, 6, 7}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {4, 5, 6, 7}, {}, {}}));
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(weights_out_conv.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+}
+
+
+TEST(TransformationTests, TestConcatMaskPropagationUp) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3, 4, 5}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{2, 3, 4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto add_const = create_constant_with_zeros(Shape{1, 32, 1, 1}, {{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(concat, add_const);
+
+    auto weights_out_conv = create_constant_with_zeros(weight_shape_out_conv, {{}, {}, {}, {}});
+    auto conv_out = std::make_shared<opset5::Convolution>(add, weights_out_conv, Strides(2, 1),
+                                                          CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto f = std::make_shared<Function>(NodeVector{conv_out}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{0, 1, 2, 3}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {0, 1, 2, 3}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{7, 8, 9, 10}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {7, 8, 9, 10}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{4, 5, 6, 7}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {4, 5, 6, 7}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+    compare_masks(*getMask(weights_out_conv.get_node_shared_ptr()->output(0)),  Mask({{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}}));
+}
+
+
+TEST(TransformationTests, TestConcatMaskPropagationUpEmpty) {
+    Shape input_shape{1, 3, 64, 64};
+    Shape weights_shape1{8, 3, 3, 3};
+    Shape weights_shape2{16, 3, 3, 3};
+    Shape weights_shape3{8, 3, 3, 3};
+
+    Shape weight_shape_out_conv{3, 32, 3, 3};
+    auto input = std::make_shared<opset5::Parameter>(element::f32, input_shape);
+    auto weights_1 = create_constant_with_zeros(weights_shape1, {{0, 1, 2, 3, 4, 5}, {}, {}, {}});
+    auto conv1 = std::make_shared<opset5::Convolution>(input, weights_1, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_2 = create_constant_with_zeros(weights_shape2, {{7, 8, 9, 10}, {}, {}, {}});
+    auto conv2 = std::make_shared<opset5::Convolution>(input, weights_2, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto weights_3 = create_constant_with_zeros(weights_shape3, {{2, 3, 4, 5, 6, 7}, {}, {}, {}});
+    auto conv3 = std::make_shared<opset5::Convolution>(input, weights_3, Strides(2, 1),
+                                                       CoordinateDiff(2, 0), CoordinateDiff(2, 0), Strides(2, 1));
+
+    auto concat = std::make_shared<opset5::Concat>(OutputVector{conv1->output(0), conv2->output(0), conv3->output(0)}, 1);
+
+    auto add_const = create_constant_with_zeros(Shape{1, 32, 1, 1}, {{}, {0, 1, 2, 3, 15, 16, 17, 18, 28, 29, 30, 31}, {}, {}});
+    auto add = std::make_shared<opset5::Add>(concat, add_const);
+
+    auto f = std::make_shared<Function>(NodeVector{add}, ParameterVector{input});
+
+    pass::Manager m;
+    m.register_pass<pass::InitMasks>();
+    m.register_pass<pass::PropagateMasks>();
+    m.run_passes(f);
+
+    compare_masks(*getMask(weights_1.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv1->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(weights_2.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv2->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(weights_3.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(conv3->output(0)),  Mask({{}, {}, {}, {}}));
+
+    compare_masks(*getMask(add_const.get_node_shared_ptr()->output(0)),  Mask({{}, {}, {}, {}}));
+    compare_masks(*getMask(add->output(0)),  Mask({{}, {}, {}, {}}));
+
+
+    compare_masks(*getMask(concat->output(0)),  Mask({{}, {}, {}, {}}));
+}

From 327c3149f62e3eb186d0550e9d558d694e4c7a25 Mon Sep 17 00:00:00 2001
From: Anastasia Popova <anastasia.popova@intel.com>
Date: Tue, 8 Jun 2021 09:53:39 +0300
Subject: [PATCH 10/41] Fixed feature dimension calculation in
 grouped_convolutions_fusing(). (#6054)

* Fixed feature dim calculation in concat_convolutions().

* Fixed feature dim calculation in concat_convolutions().

* Added comments, added unit test.
---
 .../middle/passes/fusing/fuse_grouped_conv.py |   5 +-
 .../passes/fusing/fuse_grouped_conv_test.py   | 106 ++++++++++++++++++
 2 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py

diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
index afb3de7648f..bc256ca4990 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
@@ -87,7 +87,10 @@ def concat_convolutions(graph: Graph, start_node: Node, last_node: Node):
     weights_value = np.array(weights_node.value)
     bias_value = np.array(bias_node.value) if has_biases else None
 
-    feature_dim = 3 if graph.graph['layout'] == 'NHWC' else 0
+    # gconv.get_weights_permute.perm contains permutation indices
+    # where feature dimension is set to zero position, so 0 value
+    # in gconv.get_weights_permute.inv indicates original feature dimension index
+    feature_dim = np.where(gconv.get_weights_permute.inv == 0)[0][0]
 
     for conv in conv_nodes[1:]:
         weights_value = np.concatenate((weights_value, conv.in_node(1).value), axis=feature_dim)
diff --git a/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py b/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py
new file mode 100644
index 00000000000..8e8bc61077a
--- /dev/null
+++ b/model-optimizer/unit_tests/mo/middle/passes/fusing/fuse_grouped_conv_test.py
@@ -0,0 +1,106 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.middle.passes.fusing.fuse_grouped_conv import grouped_convolutions_fusing
+from mo.ops.op import PermuteAttrs
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data, regular_op, shaped_data, \
+    valued_const_with_data, shaped_const_with_data, valued_data
+
+nodes = {
+    **regular_op_with_shaped_data('placeholder1', [1, 16, 10, 10], {'type': 'Parameter'}),
+
+    **valued_const_with_data('split_1_axis', int64_array(1), {'type': 'Const'}),
+    **regular_op('split_1', {'type': 'Split', 'can_be_fused': True}),
+    **shaped_data('split_1_data1', [1, 4, 10, 10]),
+    **shaped_data('split_1_data2', [1, 4, 10, 10]),
+    **shaped_data('split_1_data3', [1, 4, 10, 10]),
+    **shaped_data('split_1_data4', [1, 4, 10, 10]),
+
+    **shaped_const_with_data('split_2_in_const_weights', int64_array([3, 3, 4, 16]), {'type': 'Const'}),
+    **regular_op('split_2', {'type': 'Split'}),
+    **valued_data('split_2_data1', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data2', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data3', np.zeros([3, 3, 4, 4])),
+    **valued_data('split_2_data4', np.zeros([3, 3, 4, 4])),
+
+    **regular_op_with_shaped_data('conv2d_1', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'channel_dims': np.array([1]), 'pad': np.array([2, 2]),
+                                   'stride': np.array([2, 2]),
+                                   'get_weights_permute': PermuteAttrs.Permutation(perm=int64_array([3, 2, 0, 1]),
+                                                                                   inv=int64_array([2, 3, 1, 0])),
+                                   'group': 1, 'output': 4, 'output_shape': [1, 4, 8, 8], 'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_2', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_3', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+    **regular_op_with_shaped_data('conv2d_4', [1, 4, 8, 8],
+                                  {'type': 'Convolution', 'pad': np.array([2, 2]), 'stride': np.array([2, 2]),
+                                   'can_be_fused': True}),
+
+    **regular_op_with_shaped_data('concat', [1, 16, 8, 8], {'type': 'Concat', 'axis': np.array(1)}),
+
+    **regular_op_with_shaped_data('fused_group_conv', [1, 16, 8, 8],
+                                  {'type': 'Convolution', 'channel_dims': np.array([1]), 'pad': np.array([2, 2]),
+                                   'stride': np.array([2, 2]),
+                                   'get_weights_permute': PermuteAttrs.Permutation(perm=int64_array([3, 2, 0, 1]),
+                                                                                   inv=int64_array([2, 3, 1, 0])),
+                                   'group': 1, 'output': 4, 'output_shape': [1, 4, 8, 8], 'can_be_fused': True}),
+    **shaped_const_with_data('new_weights_const', int64_array([3, 3, 4, 16]), {'type': 'Const'}),
+
+    **result('result')
+}
+
+
+class FuseGroupedConvTest(unittest.TestCase):
+    def test_fuse_grouped_conv(self):
+        graph = build_graph(nodes, [*connect('placeholder1', '0:split_1'), *connect('split_1_axis', '1:split_1'),
+                                    ('split_1', 'split_1_data1', {'out': 0}),
+                                    ('split_1', 'split_1_data2', {'out': 1}),
+                                    ('split_1', 'split_1_data3', {'out': 2}),
+                                    ('split_1', 'split_1_data4', {'out': 3}),
+
+                                    *connect('split_2_in_const_weights', 'split_2'),
+                                    ('split_2', 'split_2_data1', {'out': 0}),
+                                    ('split_2', 'split_2_data2', {'out': 1}),
+                                    ('split_2', 'split_2_data3', {'out': 2}),
+                                    ('split_2', 'split_2_data4', {'out': 3}),
+
+                                    ('split_1_data1', 'conv2d_1', {'in': 0}),
+                                    ('split_1_data2', 'conv2d_2', {'in': 0}),
+                                    ('split_1_data3', 'conv2d_3', {'in': 0}),
+                                    ('split_1_data4', 'conv2d_4', {'in': 0}),
+
+                                    ('split_2_data1', 'conv2d_1', {'in': 1}),
+                                    ('split_2_data2', 'conv2d_2', {'in': 1}),
+                                    ('split_2_data3', 'conv2d_3', {'in': 1}),
+                                    ('split_2_data4', 'conv2d_4', {'in': 1}),
+
+                                    *connect('conv2d_1', '0:concat'),
+                                    *connect('conv2d_2', '1:concat'),
+                                    *connect('conv2d_3', '2:concat'),
+                                    *connect('conv2d_4', '3:concat'),
+
+                                    *connect('concat', 'result')])
+
+        graph_ref = build_graph(nodes, [*connect('placeholder1', '0:fused_group_conv'),
+                                        *connect('new_weights_const', '1:fused_group_conv'),
+                                        *connect('fused_group_conv', 'result')])
+
+        graph.graph['layout'] = 'NCHW'
+        grouped_convolutions_fusing(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'result')
+        self.assertTrue(flag, resp)
+
+        group_conv_node = Node(graph, 'conv2d_1')
+        group_conv_weights_shape = group_conv_node.in_node(1).shape
+        self.assertTrue((group_conv_weights_shape == int64_array([3, 3, 4, 16])).all())

From d3beab79b22b84268d489d727295641c0817f289 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 8 Jun 2021 10:12:35 +0300
Subject: [PATCH 11/41] [nG] fix strided_slice_to_crop (#6032)

---
 .../convert_strided_slice_to_crop.cpp         | 11 ++--
 .../convert_strided_slice_to_crop_test.cpp    | 52 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
index a3ec122b9c7..cab07f54a76 100644
--- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
+++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp
@@ -9,7 +9,6 @@
 #include <vector>
 
 #include <ngraph/opsets/opset1.hpp>
-
 #include <legacy/ngraph_ops/crop_ie.hpp>
 #include <ngraph/rt_info.hpp>
 
@@ -137,7 +136,6 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                     lb = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), lb);
                     ub = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), ub);
 
-                    offset.emplace_back(lb);
 
                     // set default value for stride or use given value
                     int64_t stride = 1;
@@ -153,6 +151,7 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                             ub = -1;
 
                         lb = std::min(lb, static_cast<int64_t>(input_shape[input_shape_idx]) - 1);
+                        offset.emplace_back(lb);
                         lb -= 1;  // we always get 1st element, so we need decrease range
                         if (ub <= lb)
                             dimension = (ub - lb) / stride + 1;
@@ -160,12 +159,16 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher
                         // apply masks
                         if (begin_mask.count(axis))
                             lb = 0;
-                        if (end_mask.count(axis))
+                        offset.emplace_back(lb);
+
+                        if (end_mask.count(axis)) {
                             ub = static_cast<int64_t>(input_shape[input_shape_idx]);
+                        }
 
                         lb += 1;  // we always get 1st element, so we need decrease range
-                        if (ub >= lb)
+                        if (ub >= lb) {
                             dimension = (ub - lb) / stride + 1;
+                        }
                     }
 
                     dim.emplace_back(dimension);
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
index be5560f367e..731d96bbd3a 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_strided_slice_to_crop_test.cpp
@@ -53,6 +53,7 @@ TEST(TransformationTests, ConvertStridedSliceToCropTests1) {
         manager.register_pass<ngraph::pass::InitNodeInfo>();
         manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
         manager.run_passes(f);
+
         ASSERT_NO_THROW(check_rt_info(f));
     }
 
@@ -238,4 +239,53 @@ TEST(TransformationTests, ConvertStridedSliceToCropNegative2) {
 
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
-}
\ No newline at end of file
+}
+
+
+TEST(TransformationTests, ConvertStridedSliceToCropNoneZeroBeginValuesWithMask) {
+    // when begin_mask/end_mask are present begin/end values should not affect output shape
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input        = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 4});
+        auto slice_begin  = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 2, 1});
+        auto slice_end    = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 2});
+        auto slice_stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1, 1, 1, 1});
+
+        std::vector<int64_t> begin_mask       = {1, 0, 1, 1};
+        std::vector<int64_t> end_mask         = {1, 0, 1, 0};
+        std::vector<int64_t> new_axis_mask    = {0, 1, 0, 0};
+        std::vector<int64_t> shrink_axis_mask = {0, 0, 0, 0};
+        std::vector<int64_t> ellipsis_mask    = {0, 0, 0, 0};
+
+        auto sslice = std::make_shared<ngraph::opset1::StridedSlice>(input, slice_begin, slice_end, slice_stride,
+                                                                     begin_mask, end_mask,
+                                                                     new_axis_mask, shrink_axis_mask, ellipsis_mask);
+        sslice->set_friendly_name("strided_slice");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{sslice}, ngraph::ParameterVector{input});
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 4});
+
+        std::vector<int64_t> axes   = {0, 1, 2, 3};
+        std::vector<int64_t> dim    = {1, 1, 2, 2};
+        std::vector<int64_t> offset = {0, 0, 0, 0};
+
+        auto reshape = ngraph::op::util::reshapeTo(input, {1, 1, 2, 4});
+        reshape->set_friendly_name("strided_slice/Reshape_for_Crop");
+
+        auto crop = std::make_shared<ngraph::op::CropIE>(reshape, axes, dim, offset);
+        crop->set_friendly_name("strided_slice");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{crop}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}

From 4409a74dcfd82af8b962d4e6b31f9b92934fff81 Mon Sep 17 00:00:00 2001
From: Anton Romanov <anton.romanov@intel.com>
Date: Tue, 8 Jun 2021 10:16:37 +0300
Subject: [PATCH 12/41] samples: Fixed klocwork issues in speech (#6066)

---
 .../samples/speech_sample/fileutils.cpp       | 39 +++++++++++--------
 thirdparty/cnpy/cnpy.cpp                      | 12 +++---
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/inference-engine/samples/speech_sample/fileutils.cpp b/inference-engine/samples/speech_sample/fileutils.cpp
index f3211a21a4b..102cca25297 100644
--- a/inference-engine/samples/speech_sample/fileutils.cpp
+++ b/inference-engine/samples/speech_sample/fileutils.cpp
@@ -108,15 +108,18 @@ void NumpyFile::GetFileInfo(const char* fileName, uint32_t numArrayToFindSize, u
     cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
     auto it = my_npz1.begin();
     std::advance(it, numArrayToFindSize);
+    if (it != my_npz1.end()) {
+        numArrays = my_npz1.size();
+        cnpy::NpyArray my_npy = it->second;
+        numMemoryBytes = my_npy.data_holder->size();
 
-    numArrays = my_npz1.size();
-    cnpy::NpyArray my_npy = it->second;
-    numMemoryBytes = my_npy.data_holder->size();
-
-    if (ptrNumArrays != NULL)
-        *ptrNumArrays = numArrays;
-    if (ptrNumMemoryBytes != NULL)
-        *ptrNumMemoryBytes = numMemoryBytes;
+        if (ptrNumArrays != NULL)
+            *ptrNumArrays = numArrays;
+        if (ptrNumMemoryBytes != NULL)
+            *ptrNumMemoryBytes = numMemoryBytes;
+    } else {
+        throw std::runtime_error(std::string("Failed to get info %s  GetFileInfo()!\n") + fileName);
+    }
 }
 
 void NumpyFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string& ptrName, std::vector<uint8_t>& memory, uint32_t* ptrNumRows,
@@ -124,16 +127,20 @@ void NumpyFile::LoadFile(const char* fileName, uint32_t arrayIndex, std::string&
     cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
     auto it = my_npz1.begin();
     std::advance(it, arrayIndex);
-    ptrName = it->first;
-    cnpy::NpyArray my_npy = it->second;
-    *ptrNumRows = my_npy.shape[0];
-    *ptrNumColumns = my_npy.shape[1];
+    if (it != my_npz1.end()) {
+        ptrName = it->first;
+        cnpy::NpyArray my_npy = it->second;
+        *ptrNumRows = my_npy.shape[0];
+        *ptrNumColumns = my_npy.shape[1];
 
-    for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
-        memory.at(i) = my_npy.data_holder->at(i);
+        for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
+            memory.at(i) = my_npy.data_holder->at(i);
+        }
+
+        *ptrNumBytesPerElement = sizeof(float);
+    } else {
+        throw std::runtime_error(std::string("Failed to open %s for reading in LoadFile()!\n") + fileName);
     }
-
-    *ptrNumBytesPerElement = sizeof(float);
 }
 
 void NumpyFile::SaveFile(const char* fileName, bool shouldAppend, std::string name, void* ptrMemory, uint32_t numRows, uint32_t numColumns) {
diff --git a/thirdparty/cnpy/cnpy.cpp b/thirdparty/cnpy/cnpy.cpp
index a3a3e0ef406..26d0614bca1 100644
--- a/thirdparty/cnpy/cnpy.cpp
+++ b/thirdparty/cnpy/cnpy.cpp
@@ -183,9 +183,9 @@ void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_siz
 }
 
 cnpy::NpyArray load_the_npy_file(FILE* fp) {
-    std::vector<size_t> shape;
-    size_t word_size;
-    bool fortran_order;
+    std::vector<size_t> shape(0);
+    size_t word_size = 0;
+    bool fortran_order = false;
     cnpy::parse_npy_header(fp,word_size,shape,fortran_order);
     if (word_size >= 0 && word_size < ULLONG_MAX) {
         cnpy::NpyArray arr(shape, word_size, fortran_order);
@@ -225,9 +225,9 @@ cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncom
     err = inflate(&d_stream, Z_FINISH);
     err = inflateEnd(&d_stream);
 
-    std::vector<size_t> shape;
-    size_t word_size;
-    bool fortran_order;
+    std::vector<size_t> shape(0);
+    size_t word_size = 0;
+    bool fortran_order = false;
     cnpy::parse_npy_header(&buffer_uncompr[0],word_size,shape,fortran_order);
     if (word_size >= 0 && word_size < ULLONG_MAX) {
         cnpy::NpyArray array(shape, word_size, fortran_order);

From 98f45ffbddc94ea480fa7e182345ba6dcaa7f680 Mon Sep 17 00:00:00 2001
From: Maxim Andronov <maxim.andronov@intel.com>
Date: Tue, 8 Jun 2021 10:19:25 +0300
Subject: [PATCH 13/41] ConvertInterpolate1ToInterpolate4 fixes (#6019)

* half_pixel -> asymmetric and round_prefer_floor -> simple in ConvertInterpolate1ToInterpolate4

* test fix
---
 .../convert_interpolate1_to_interpolate4.cpp         | 10 ++++++++--
 .../convert_interpolate1_to_interpolate4_test.cpp    | 12 ++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
index 94173079c62..36a58551a68 100644
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp
@@ -68,14 +68,20 @@ ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolat
             return false;
         }
         attrsV4.shape_calculation_mode = ngraph::opset4::Interpolate::ShapeCalcMode::sizes;
-        attrsV4.nearest_mode = ngraph::opset4::Interpolate::NearestMode::round_prefer_floor;
+        attrsV4.nearest_mode = ngraph::opset4::Interpolate::NearestMode::simple;
         attrsV4.pads_begin = attrsV0.pads_begin;
         attrsV4.pads_end = attrsV0.pads_end;
         attrsV4.antialias = attrsV0.antialias;
-        attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::half_pixel;
+        attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::asymmetric;
         attrsV4.cube_coeff = -0.75f;
         if (attrsV0.align_corners) {
             attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::align_corners;
+        } else if ((attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx ||
+                    attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::linear) &&
+                    std::all_of(attrsV4.pads_begin.begin(), attrsV4.pads_begin.end(), [](size_t i){return i == 0;}) &&
+                    std::all_of(attrsV4.pads_end.begin(), attrsV4.pads_end.end(), [](size_t i){return i == 0;}) &&
+                    !(input_shape_rank - 2 == 2 && attrsV0.axes == AxisSet{2, 3})) {
+            attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::half_pixel;
         }
 
         auto interpolateV4 = std::make_shared<ngraph::opset4::Interpolate>(interpolationV0->input_value(0), interpolationV0->input_value(1),
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
index 9468db9287d..12177f78cbc 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_interpolate1_to_interpolate4_test.cpp
@@ -54,7 +54,7 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4) {
 
         auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::nearest,
             opset4::Interpolate::ShapeCalcMode::sizes, std::vector<size_t>{0, 0, 0, 0}, std::vector<size_t>{0, 0, 0, 0},
-            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::floor,
+            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::simple,
             false, -0.75);
 
         auto interpolate4 = std::make_shared<opset4::Interpolate>(data_node, out_shape_node, default_scales_node, axes_node, interpolate4_attr);
@@ -62,7 +62,7 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4) {
         f_ref = std::make_shared<Function>(NodeVector{interpolate4}, ParameterVector{data_node});
     }
 
-    auto res = compare_functions(f, f_ref);
+    auto res = compare_functions(f, f_ref, true, false, false, true, true);
     ASSERT_TRUE(res.first) << res.second;
 }
 
@@ -97,16 +97,16 @@ TEST(TransformationTests, ConvertInterpolate1ToInterpolate4_1) {
         auto default_scales_node = opset1::Constant::create(ngraph::element::f32, Shape{2}, {4.0f / 3.0f, 4.0f / 3.0f});
         auto axes_node = opset1::Constant::create(ngraph::element::i64, Shape{2}, {2, 3});
 
-        auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::linear,
+        auto interpolate4_attr = opset4::Interpolate::InterpolateAttrs(opset4::Interpolate::InterpolateMode::linear_onnx,
             opset4::Interpolate::ShapeCalcMode::sizes, std::vector<size_t>{0, 0, 0, 0}, std::vector<size_t>{0, 0, 0, 0},
-            opset4::Interpolate::CoordinateTransformMode::align_corners, opset4::Interpolate::NearestMode::floor,
-            false, -0.75);
+            opset4::Interpolate::CoordinateTransformMode::asymmetric, opset4::Interpolate::NearestMode::simple,
+            true, -0.75);
 
         auto interpolate4 = std::make_shared<opset4::Interpolate>(data_node, out_shape_node, default_scales_node, axes_node, interpolate4_attr);
 
         f_ref = std::make_shared<Function>(NodeVector{interpolate4}, ParameterVector{data_node});
     }
 
-    auto res = compare_functions(f, f_ref);
+    auto res = compare_functions(f, f_ref, true, false, false, true, true);
     ASSERT_TRUE(res.first) << res.second;
 }

From a99343f4ebd432324eb1e7262db1c77ad4fe7806 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 8 Jun 2021 11:00:02 +0300
Subject: [PATCH 14/41] Python tools (#6067)

* Removed useless lines

* Enable find PythonInterp only if we ENABLE_PYTHON

* Enable docs only if python interp is found
---
 cmake/features.cmake                          |  5 ++--
 .../conditional_compilation/CMakeLists.txt    |  5 +---
 tools/CMakeLists.txt                          | 23 ++++++++++---------
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/cmake/features.cmake b/cmake/features.cmake
index fe1b8919b51..adb1fad2523 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -18,8 +18,6 @@ Supported values:\
 
 ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON)
 
-ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF)
-
 ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF)
 
 ie_option_enum(SELECTIVE_BUILD "Enable OpenVINO conditional compilation or statistics collection. \
@@ -33,6 +31,9 @@ ie_option(ENABLE_ERROR_HIGHLIGHT "Highlight errors and warnings during compile t
 find_package(PythonLibs 3 QUIET)
 ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONLIBS_FOUND" OFF)
 
+find_package(PythonInterp 3 QUIET)
+ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)
+
 #
 # enable or disable output from NGRAPH_DEBUG statements
 #
diff --git a/openvino/conditional_compilation/CMakeLists.txt b/openvino/conditional_compilation/CMakeLists.txt
index 4a27ac50b7c..1db12d4eefb 100644
--- a/openvino/conditional_compilation/CMakeLists.txt
+++ b/openvino/conditional_compilation/CMakeLists.txt
@@ -19,10 +19,7 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
         message(FATAL_ERROR "In case SELECTIVE_BUILD is enabled, the SELECTIVE_BUILD_STAT variable should contain the path to the collected InelSEAPI statistics.\
  Usage: -DSELECTIVE_BUILD=ON -DSELECTIVE_BUILD_STAT=/path/*.csv")
     endif()
-    find_package (PythonInterp REQUIRED)
-    if(NOT PYTHON_VERSION_MAJOR EQUAL 3)
-        message(FATAL_ERROR " Python3 wasn't found!")
-    endif()
+    find_package (PythonInterp 3 REQUIRED)
 
     file(GLOB STAT_FILES ${SELECTIVE_BUILD_STAT})
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 0a09df95faa..fdc6d9ef11a 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,24 +1,25 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 cmake_minimum_required(VERSION 3.13)
+
 project(python_tools)
 
 if(NOT DEFINED OpenVINO_MAIN_SOURCE_DIR)
     find_package(InferenceEngineDeveloperPackage QUIET)
 endif()
 
-find_package(PythonInterp 3 REQUIRED)
-set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
-
-set(TARGET_NAME "python_tools")
-
-if(WIN32)
-    set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$<CONFIG>/python_api/${PYTHON_VERSION}/openvino)
-else()
-    set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
-endif()
-
 if(ENABLE_PYTHON)
+    find_package(PythonInterp 3 REQUIRED)
+    set(PYTHON_VERSION python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})
+
+    set(TARGET_NAME "python_tools")
+
+    if(WIN32)
+        set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$<CONFIG>/python_api/${PYTHON_VERSION}/openvino)
+    else()
+        set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
+    endif()
+
     # creates a copy inside bin directory for developers to have ability running python benchmark_app
     add_custom_target(${TARGET_NAME} ALL
         COMMAND ${CMAKE_COMMAND} -E make_directory ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/tools

From 77d8973e91a91bbd3c92316b348599b444545839 Mon Sep 17 00:00:00 2001
From: Maxim Shevtsov <maxim.y.shevtsov@intel.com>
Date: Tue, 8 Jun 2021 11:10:05 +0300
Subject: [PATCH 15/41] Perf Intro page brushing (#5861)

* brushing

* Update Intro_to_Performance.md
---
 docs/IE_DG/Intro_to_Performance.md | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md
index 66fcf48c34f..78d5c59c417 100644
--- a/docs/IE_DG/Intro_to_Performance.md
+++ b/docs/IE_DG/Intro_to_Performance.md
@@ -1,24 +1,28 @@
 # Introduction to the Performance Topics {#openvino_docs_IE_DG_Intro_to_Performance}
 
 This section is a shorter version of the
-[Optimization Guide](supported_plugins/MULTI.md) for the Intel Deep Learning Deployment Toolkit.
+[Optimization Guide](../optimization_guide/dldt_optimization_guide.md) for the Intel® Distribution of OpenVINO™ Toolkit.
 
 ## Precision
 Inference precision directly affects the performance. 
 
-Model Optimizer can produce an IR with different precision. For example, float16 IR initially targets VPU and GPU devices, while, for example, the CPU can also execute regular float32.
-Also, further device-specific inference precision settings are available, for example, [8-bit integer](Int8Inference.md) or [bfloat16](Bfloat16Inference.md) inference on the CPU.
-Note that for [MULTI device](supported_plugins/MULTI.md) that supports automatic inference on multiple devices in parallel, you can use the FP16 IR.
+Model Optimizer can produce an IR with different precision. For example, an FP16 IR initially targets VPU and GPU devices, while, for example, for the CPU, an FP16 IR is    typically up-scaled to the regular FP32 automatically upon loading. But notice that further device-specific inference precision settings are available, 
+for example, [8-bit integer](Int8Inference.md) or [bfloat16](Bfloat16Inference.md), which is specific to the CPU inference, below.
+Note that for the [MULTI device](supported_plugins/MULTI.md) plugin that supports automatic inference on multiple devices in parallel, you can use an FP16 IR (no need for FP32).
 You can find more information, including preferred data types for specific devices, in the
-[Supported Devices](supported_plugins/Supported_Devices.md) section.
+[Supported Devices](supported_plugins/Supported_Devices.md) document.
 
-## Lowering Inference Precision
-Default optimization is used for CPU and implies that inference is made with lower precision if it is possible on a given platform to reach better performance with acceptable range of accuracy.
-This approach can be used for CPU devices where the platform supports the AVX512_BF16 instruction. In this case, a regular float32 model is converted to [bfloat16](Bfloat16Inference.md) internal representation and inference is provided with bfloat16 layers usage.
-Below is the example command line to disable this feature on the CPU device with the AVX512_BF16 instruction and execute regular float32.
+## Automatic Lowering of the Inference Precision
+By default, plugins enable the optimizations that allow lower precision if the acceptable range of accuracy is preserved.
+For example, for the CPU that supports the AVX512_BF16 instructions, an FP16/FP32 model is converted to a [bfloat16](Bfloat16Inference.md) IR to accelerate inference.
+To compare the associated speedup, run the example command below to disable this feature on the CPU device with the AVX512_BF16 support and get regular FP32 execution:
 ```
 $ benchmark_app -m <model.xml> -enforcebf16=false
  ```
+Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default.
+Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details.
+
+Similarly, the GPU device has a dedicated config key to enable FP16 execution of the layers that remain in FP32 in the quantized models (as the quantization is typically performed on the FP32 models), refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/CL_DNN.md) 
 
 ## Latency vs. Throughput
 One way to increase computational efficiency is batching, which combines many (potentially tens) of
@@ -44,17 +48,17 @@ Below is the example command line that limits the execution to the single socket
 limited to the single socket).
 $ numactl -m 0 --physcpubind 0-27  benchmark_app -m <model.xml> -api sync -nthreads 28
  ```
-Note that if you have more than one input, running as many inference requests as you have NUMA nodes (or sockets)
+Note that if you have more than one input, running as many inference streams as you have NUMA nodes (or sockets)
 usually gives the same best latency as a single request on the single socket, but much higher throughput. Assuming two NUMA nodes machine:
 ```
 $ benchmark_app -m <model.xml> -nstreams 2
  ```
 Number of NUMA nodes on the machine can be queried via 'lscpu'.
-Please see more on the NUMA support in the [Optimization Guide](supported_plugins/MULTI.md).
+Please see more on the NUMA support in the [Optimization Guide](../optimization_guide/dldt_optimization_guide.md).
 
 ## Throughput Mode for CPU
 Unlike most accelerators, CPU is perceived as an inherently latency-oriented device. 
-Since 2018 R5 release, the Inference Engine introduced the "throughput" mode, which allows the Inference Engine to efficiently run multiple inference requests on the CPU simultaneously, greatly improving the throughput.
+OpenVINO™ toolkit provides a "throughput" mode that allows running multiple inference requests on the CPU simultaneously, which greatly improves the throughput.
 
 Internally, the execution resources are split/pinned into execution "streams".
 Using this feature gains much better performance for the networks that originally are not scaled well with a number of threads (for example, lightweight topologies). This is especially pronounced for the many-core server machines.
@@ -62,8 +66,6 @@ Using this feature gains much better performance for the networks that originall
 Run the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) and play with number of infer requests running in parallel, next section. 
 Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. 
 
-In addition to the number of streams, it is also possible to play with the batch size to find the throughput sweet-spot.
-
 The throughput mode relaxes the requirement to saturate the CPU by using a large batch: running multiple independent inference requests in parallel often gives much better performance, than using a batch only.
 This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
 Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.

From 9049dee86243d7677eb00fd173a3860562516ad7 Mon Sep 17 00:00:00 2001
From: Maxim Andronov <maxim.andronov@intel.com>
Date: Tue, 8 Jun 2021 11:10:56 +0300
Subject: [PATCH 16/41] klocwork 2021.4 (#6042)

---
 .../InferenceEngine_network_with_state_infer.cpp     |  6 ++++++
 .../src/convert_function_to_cnn_network.cpp          |  3 +++
 .../ngraph_transformations/reshape_fc_fusion.cpp     |  6 ++++++
 .../reshape_fully_connected.cpp                      |  2 ++
 .../src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp      |  2 +-
 .../nodes/mkldnn_gather_elements_node.h              |  2 +-
 .../nodes/mkldnn_strided_slice_node.cpp              |  8 ++++++--
 .../nodes/mkldnn_tensoriterator_node.cpp             | 12 ++++++++++++
 8 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/docs/snippets/InferenceEngine_network_with_state_infer.cpp b/docs/snippets/InferenceEngine_network_with_state_infer.cpp
index 81a3070ba3b..7af9c076931 100644
--- a/docs/snippets/InferenceEngine_network_with_state_infer.cpp
+++ b/docs/snippets/InferenceEngine_network_with_state_infer.cpp
@@ -64,7 +64,13 @@ int main(int argc, char *argv[]) {
             inferRequest.Infer();
             // check states
             auto states = inferRequest.QueryState();
+            if (states.empty()) {
+                throw std::runtime_error("Queried states are empty");
+            }
             auto mstate = as<MemoryBlob>(states[0].GetState());
+            if (mstate == nullptr) {
+                throw std::runtime_error("Can't cast state to MemoryBlob");
+            }
             auto state_buf = mstate->rmap();
             float * state =state_buf.as<float*>(); 
             std::cout << state[0] << "\n";
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 08b54640ee6..7d92c77219d 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -244,6 +244,9 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
     LayerParams params = {layer->get_friendly_name(), "TensorIterator",
                           details::convertPrecision(layer->get_output_element_type(0))};
     auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
+    if (res == nullptr) {
+        IE_THROW() << "Can't create TensorIterator";
+    }
     res->body = body;
 
     // Port map: outputs
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
index 09d3e7e0554..b850bd98ae2 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
@@ -22,7 +22,11 @@ MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
 
     ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
         auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
+        if (!fc)
+            return false;
         auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
+        if (!reshape)
+            return false;
 
         // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
         auto shape_in = reshape->input_value(0).get_shape();
@@ -67,6 +71,8 @@ MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
                                                                         fc->input_value(2),
                                                                         outShape,
                                                                         fc->output(0).get_element_type());
+        } else {
+            return false;
         }
         new_ops.push_back(new_fc);
         new_fc->set_friendly_name(fc->get_friendly_name());
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
index 999d1b958d8..f140f44e74e 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
@@ -60,6 +60,8 @@ MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
                                                                         fc->input_value(2),
                                                                         output_shape_new,
                                                                         fc->get_output_type());
+        } else {
+            return false;
         }
         new_ops.push_back(fc_new);
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
index 21fb93728c1..b9ef511d010 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
@@ -49,7 +49,7 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkld
 
     /* Data */
     inputShape = inDims[DATA_INDEX].ToSizeVector();
-    if (inputShape.size() < 1) {
+    if (inputShape.size() < 2) {
         IE_THROW() << layerErrorPrefix << " has invalid 'data' input tensor with rank: " << inputShape.size();
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
index 30d1fda9e95..bc19866768d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
@@ -32,7 +32,7 @@ private:
     size_t dataTypeSize_;
     int strideAxDst_;
     int dstAxDim_;
-    int strideAx1Diff_;
+    int strideAx1Diff_ = 0;
     std::string errorPrefix_;
 
     template <typename dataType>
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
index 07cc72247a5..1b70de9f0f8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
@@ -86,7 +86,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Nod
 
 void MKLDNNStridedSliceNode::getSupportedDescriptors() {
     auto isConstantNode = [](const MKLDNNNodePtr &node) {
-        return node->isConstant() && node->getType() == Input;
+        return node->getType() == Input && node->isConstant();
     };
 
     params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) &&
@@ -138,7 +138,11 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
 
     if (params.parametersAreConstant) {
         auto fillingInParameters = [&](std::vector<int> &parameter, const size_t type, const size_t size, const int value) {
-            auto blob = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent())->getMemoryPtr();
+            const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent());
+            if (!constNode) {
+                THROW_ERROR << "can't cast node on " << type << " port to MKLDNNInputNode";
+            }
+            auto blob = constNode->getMemoryPtr();
             if (blob->GetDataType() != mkldnn::memory::data_type::s32)
                 THROW_ERROR << "supports only parameters input with precision I32";
             const int *ptr = static_cast<const int*>(blob->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
index c9a53c79e07..d1d80e1b7cb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@@ -136,6 +136,9 @@ public:
     void execute(mkldnn::stream strm, int n_iter) override {
         auto mem = mem_holder_dst;
         auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for IterCountPortHelper";
+        }
         *data_ptr = n_iter;
     }
 };
@@ -150,6 +153,9 @@ public:
 
     int getStatus() override {
         auto data_ptr = static_cast<uint8_t*>(mem_holder.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for asBoolCheck";
+        }
         return *data_ptr == static_cast<uint8_t>(0) ? 0 : 1;
     }
 };
@@ -164,6 +170,9 @@ public:
 
     int getStatus() override {
         auto data_ptr = static_cast<uint32_t*>(mem_holder.get_data_handle());
+        if (data_ptr == nullptr) {
+            IE_THROW() << "TensorIterator node has not allocated memory for asIntCheck";
+        }
         return *data_ptr;
     }
 };
@@ -283,6 +292,9 @@ MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph:
 
 void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
     auto tiOp = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp>(ngraphOp);
+    if (tiOp == nullptr) {
+        IE_THROW() << "Can't cast TensorIterator node with name: " << getName() << " to ngraph::op::util::SubGraphOp";
+    }
     const std::shared_ptr<const ngraph::Function> body = tiOp->get_function();
     sub_graph.CreateGraph(body, ext_mng, weightCache);
 

From 713e4e1ebe231821e5da6c3e5461d724cb7490c9 Mon Sep 17 00:00:00 2001
From: Mikhail Kozlov <mikhail.kozlov@intel.com>
Date: Tue, 8 Jun 2021 11:17:04 +0300
Subject: [PATCH 17/41] Fix filling input blob for benchmark_app (#6055)

* Fix filling input blob for benchmark_app

* Fix clang format
---
 .../samples/benchmark_app/inputs_filling.cpp  | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/inference-engine/samples/benchmark_app/inputs_filling.cpp b/inference-engine/samples/benchmark_app/inputs_filling.cpp
index e12f7656f17..ef8a045279a 100644
--- a/inference-engine/samples/benchmark_app/inputs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp
@@ -39,6 +39,7 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
     return filtered;
 }
 
+template <typename T>
 void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info,
                    const size_t& requestId, const size_t& inputId, const size_t& inputSize) {
     MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
@@ -50,7 +51,7 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
     // locked memory holder should be alive all time while access to its buffer
     // happens
     auto minputHolder = minput->wmap();
-    auto inputBlobData = minputHolder.as<uint8_t*>();
+    auto inputBlobData = minputHolder.as<T*>();
 
     /** Collect images data ptrs **/
     std::vector<std::shared_ptr<uint8_t>> vreader;
@@ -90,7 +91,7 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
                     size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
                                                                                   ? (ch * width * height + h * width + w)
                                                                                   : (h * width * numChannels + w * numChannels + ch));
-                    inputBlobData[offset] = vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch];
+                    inputBlobData[offset] = static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]);
                 }
             }
         }
@@ -142,7 +143,7 @@ using uniformDistribution =
                               typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
 
 template <typename T, typename T2>
-void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<T>::min(), T rand_max = std::numeric_limits<T>::max()) {
+void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<uint8_t>::min(), T rand_max = std::numeric_limits<uint8_t>::max()) {
     MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
     if (!minput) {
         IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@@ -270,7 +271,19 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
             if (app_info.isImage()) {
                 if (!imageFiles.empty()) {
                     // Fill with Images
-                    fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    if (precision == InferenceEngine::Precision::FP32) {
+                        fillBlobImage<float>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::FP16) {
+                        fillBlobImage<short>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::I32) {
+                        fillBlobImage<int32_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::I64) {
+                        fillBlobImage<int64_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else if (precision == InferenceEngine::Precision::U8) {
+                        fillBlobImage<uint8_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
+                    } else {
+                        IE_THROW() << "Input precision is not supported for " << item.first;
+                    }
                     continue;
                 }
             } else {

From eadf2c4ce09d4bde216fee4ad48eac1c5162b35b Mon Sep 17 00:00:00 2001
From: Kate Generalova <kate.generalova@intel.com>
Date: Tue, 8 Jun 2021 12:03:25 +0300
Subject: [PATCH 18/41] samples: update ngraph README (#6070)

---
 .../ngraph_function_creation_sample/README.md    | 16 ++++++++++------
 .../ngraph_function_creation_sample/README.md    |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
index c33d67103c6..f0701f963ae 100644
--- a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md
@@ -1,7 +1,8 @@
 # nGraph Function Creation Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README}
 
-This sample demonstrates how to execute an inference using [nGraph function feature](../../../../../docs/nGraph_DG/build_function.md) to create a network that uses weights from LeNet classification network. So you don't need an XML file, the model will be created from the source code on the fly.  
-In addition to regular images, the sample also supports single-channel ubyte images as an input.
+This sample demonstrates how to execute an inference using [nGraph function feature](../../../../../docs/nGraph_DG/build_function.md) to create a network that uses weights from LeNet classification network, which is known to work well on digit classification tasks. So you don't need an XML file, the model will be created from the source code on the fly.  
+
+In addition to regular grayscale images with a digit, the sample also supports single-channel `ubyte` images as an input.
 
 The following Inference Engine Python API is used in the application:
 
@@ -14,6 +15,9 @@ Basic Inference Engine API is covered by [Hello Classification Python* Sample](.
 
 | Options                    | Values                                                                  |
 | :------------------------- | :---------------------------------------------------------------------- |
+| Validated Models           | LeNet (image classification network)                                    |
+| Model Format               | Network weights file (\*.bin)                                           |
+| Validated images           | The sample uses OpenCV\* to [read input grayscale image](https://docs.opencv.org/master/d4/da8/group__imgcodecs.html#ga288b8b3da0892bd651fce07b3bbd3a56) (\*.bmp, \*.png) or single-channel `ubyte` image                                          |
 | Supported devices          | [All](../../../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
 | Other language realization | [C++](../../../../samples/ngraph_function_creation_sample)              |
 
@@ -72,7 +76,7 @@ To run the sample, you need specify a model weights and image:
 You can do inference of an image using a pre-trained model on a GPU using the following command:
 
 ```sh
-python ngraph_function_creation_sample.py -m <path_to_model>/lenet.bin -i <path_to_image>/3.bmp -d GPU
+python ngraph_function_creation_sample.py -m <path_to_model>/lenet.bin -i <path_to_image>/3.png -d GPU
 ```
 
 ## Sample Output
@@ -84,10 +88,10 @@ The sample application logs each step in a standard output stream and outputs to
 [ INFO ] Loading the network using ngraph function with weights from <path_to_model>/lenet.bin
 [ INFO ] Configuring input and output blobs
 [ INFO ] Loading the model to the plugin
-[ WARNING ] <path_to_image>/3.bmp is inverted to white over black
-[ WARNING ] <path_to_image>/3.bmp is resized from (100, 100) to (28, 28)
+[ WARNING ] <path_to_image>/3.png is inverted to white over black
+[ WARNING ] <path_to_image>/3.png is is resized from (351, 353) to (28, 28)
 [ INFO ] Starting inference in synchronous mode
-[ INFO ] Image path: <path_to_image>/3.bmp
+[ INFO ] Image path: <path_to_image>/3.png
 [ INFO ] Top 10 results:
 [ INFO ] classid probability
 [ INFO ] -------------------
diff --git a/inference-engine/samples/ngraph_function_creation_sample/README.md b/inference-engine/samples/ngraph_function_creation_sample/README.md
index 1410241c3a5..9f7b4f8d433 100644
--- a/inference-engine/samples/ngraph_function_creation_sample/README.md
+++ b/inference-engine/samples/ngraph_function_creation_sample/README.md
@@ -1,6 +1,6 @@
 # nGraph Function Creation C++ Sample {#openvino_inference_engine_samples_ngraph_function_creation_sample_README}
 
-This sample demonstrates how to execute an synchronous inference using [nGraph function feature](../../../docs/nGraph_DG/build_function.md) to create a network, which uses weights from LeNet classification network.
+This sample demonstrates how to execute an synchronous inference using [nGraph function feature](../../../docs/nGraph_DG/build_function.md) to create a network, which uses weights from LeNet classification network, which is known to work well on digit classification tasks.
 
 The sample supports only single-channel `ubyte` images as an input.
 

From dcf36565b0476a5ef45f678238f7359f41df081c Mon Sep 17 00:00:00 2001
From: Shoujiang Ma <shoujiang.ma@intel.com>
Date: Tue, 8 Jun 2021 17:11:58 +0800
Subject: [PATCH 19/41] [AUTO plugin] AUTO plugin will ignore other plugins'
 configuration (#5979)

* AUTO plugin will ignore other plugins' configuration

Signed-off-by: Shoujiang Ma <shoujiang.ma@intel.com>

* Update tests

Signed-off-by: Shoujiang Ma <shoujiang.ma@intel.com>

* Support PER_COUNT config which is needed in benchmark_app

Signed-off-by: Shoujiang Ma <shoujiang.ma@intel.com>

* Address reviewer comments: check config and throw exception for unsupported, but that begin with "AUTO_" will be ignored

Signed-off-by: Shoujiang Ma <shoujiang.ma@intel.com>

* Fix CI tests issue

Signed-off-by: Shoujiang Ma <shoujiang.ma@intel.com>
---
 .../src/auto_plugin/auto_exec_network.cpp     |  8 +-
 .../src/auto_plugin/auto_exec_network.hpp     |  8 +-
 .../src/auto_plugin/auto_infer_request.cpp    | 16 +++-
 .../src/auto_plugin/auto_infer_request.hpp    |  4 +-
 .../src/auto_plugin/auto_plugin.cpp           | 84 ++++++++++---------
 .../src/auto_plugin/auto_plugin.hpp           | 23 +++--
 .../behavior/config.cpp                       | 44 ++--------
 .../behavior/infer_request_config.cpp         | 15 +---
 .../behavior/infer_request_input.cpp          |  4 +-
 .../behavior/infer_request_output.cpp         |  4 +-
 .../behavior/perf_counters.cpp                | 11 ---
 .../behavior/test_plugin.cpp                  |  4 +-
 .../skip_tests_config.cpp                     |  2 +
 .../behavior/config.cpp                       | 11 ++-
 .../behavior/infer_request_input.cpp          |  4 +-
 .../behavior/infer_request_output.cpp         |  3 +-
 .../behavior/perf_counters.cpp                | 22 -----
 .../behavior/test_plugin.cpp                  | 22 ++---
 .../plugin/shared/include/behavior/config.hpp | 29 ++++---
 .../include/behavior/infer_request_config.hpp | 12 ++-
 20 files changed, 140 insertions(+), 190 deletions(-)

diff --git a/inference-engine/src/auto_plugin/auto_exec_network.cpp b/inference-engine/src/auto_plugin/auto_exec_network.cpp
index 353196a88d4..49b0963c04d 100644
--- a/inference-engine/src/auto_plugin/auto_exec_network.cpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.cpp
@@ -3,10 +3,8 @@
 //
 
 #include <string>
-#include <vector>
 #include <memory>
 #include <map>
-#include <unordered_map>
 
 #include "ie_metric_helpers.hpp"
 #include "auto_exec_network.hpp"
@@ -15,8 +13,8 @@
 namespace AutoPlugin {
 using namespace InferenceEngine;
 
-AutoExecutableNetwork::AutoExecutableNetwork(const SoExecutableNetworkInternal& network) :
-    _network(network) {
+AutoExecutableNetwork::AutoExecutableNetwork(const SoExecutableNetworkInternal& network, bool enablePerfCount) :
+    _network(network), _enablePerfCount(enablePerfCount) {
 }
 
 AutoExecutableNetwork::~AutoExecutableNetwork() = default;
@@ -24,7 +22,7 @@ AutoExecutableNetwork::~AutoExecutableNetwork() = default;
 InferenceEngine::IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                                           OutputsDataMap networkOutputs) {
     SoIInferRequestInternal inferRequest = {_network, _network->CreateInferRequest()};
-    return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest);
+    return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest, _enablePerfCount);
 }
 
 void AutoExecutableNetwork::Export(std::ostream& networkModel) {
diff --git a/inference-engine/src/auto_plugin/auto_exec_network.hpp b/inference-engine/src/auto_plugin/auto_exec_network.hpp
index a39478b19a7..e29970711eb 100644
--- a/inference-engine/src/auto_plugin/auto_exec_network.hpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.hpp
@@ -19,16 +19,11 @@ namespace AutoPlugin {
 
 using DeviceName = std::string;
 
-struct DeviceInformation {
-    DeviceName deviceName;
-    std::map<std::string, std::string> config;
-};
-
 class AutoExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
 public:
     using Ptr = std::shared_ptr<AutoExecutableNetwork>;
 
-    explicit AutoExecutableNetwork(const InferenceEngine::SoExecutableNetworkInternal& network);
+    explicit AutoExecutableNetwork(const InferenceEngine::SoExecutableNetworkInternal& network, bool enablePerfCount);
 
     void Export(std::ostream& networkModel) override;
     InferenceEngine::RemoteContext::Ptr GetContext() const override;
@@ -43,6 +38,7 @@ public:
 
 private:
     InferenceEngine::SoExecutableNetworkInternal _network;
+    bool _enablePerfCount;
 };
 
 }  // namespace AutoPlugin
diff --git a/inference-engine/src/auto_plugin/auto_infer_request.cpp b/inference-engine/src/auto_plugin/auto_infer_request.cpp
index f0777409830..46d60318715 100644
--- a/inference-engine/src/auto_plugin/auto_infer_request.cpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.cpp
@@ -11,13 +11,23 @@ namespace AutoPlugin {
 
 AutoInferRequest::AutoInferRequest(const InputsDataMap&              networkInputs,
                                    const OutputsDataMap&             networkOutputs,
-                                   const SoIInferRequestInternal&    inferRequest)
+                                   const SoIInferRequestInternal&    inferRequest,
+                                   bool                              enablePerfCount)
     : IInferRequestInternal(networkInputs, networkOutputs)
-    , _inferRequest(inferRequest) {
+    , _inferRequest(inferRequest)
+    , _enablePerfCount(enablePerfCount) {
 }
 
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
-    return _inferRequest->GetPerformanceCounts();
+    if (_enablePerfCount) {
+        try {
+            return _inferRequest->GetPerformanceCounts();
+        } catch (...) {
+            return {};
+        }
+    } else {
+        return {};
+    }
 }
 
 void AutoInferRequest::InferImpl() {
diff --git a/inference-engine/src/auto_plugin/auto_infer_request.hpp b/inference-engine/src/auto_plugin/auto_infer_request.hpp
index 1ccaf0093b2..c97b2fa5aed 100644
--- a/inference-engine/src/auto_plugin/auto_infer_request.hpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.hpp
@@ -24,7 +24,8 @@ public:
     using Ptr = std::shared_ptr<AutoInferRequest>;
     explicit AutoInferRequest(const InferenceEngine::InputsDataMap&             networkInputs,
                               const InferenceEngine::OutputsDataMap&            networkOutputs,
-                              const InferenceEngine::SoIInferRequestInternal&   inferRequest);
+                              const InferenceEngine::SoIInferRequestInternal&   inferRequest,
+                              bool                                              enablePerfCount);
     std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
     void InferImpl() override;
     void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
@@ -37,6 +38,7 @@ public:
 
 private:
     InferenceEngine::SoIInferRequestInternal _inferRequest;
+    bool                                     _enablePerfCount;
 };
 
 }  // namespace AutoPlugin
diff --git a/inference-engine/src/auto_plugin/auto_plugin.cpp b/inference-engine/src/auto_plugin/auto_plugin.cpp
index 1fc20063575..274fa9d224f 100644
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@@ -75,11 +75,11 @@ IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& n
     }
 
     auto fullConfig = mergeConfigs(_config, config);
-    auto metaDevices = GetDeviceChoice(fullConfig);
+    auto metaDevices = GetDeviceList(fullConfig);
     std::unordered_set<std::string> supportedLayers;
     for (auto&& value : metaDevices) {
         try {
-            auto deviceQr = GetCore()->QueryNetwork(network, value.deviceName, value.config);
+            auto deviceQr = GetCore()->QueryNetwork(network, value, {});
             std::unordered_set<std::string> deviceSupportedLayers;
             for (auto &&layerQr : deviceQr.supportedLayersMap) {
                 deviceSupportedLayers.emplace(layerQr.first);
@@ -111,7 +111,19 @@ IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
 
 void AutoInferencePlugin::SetConfig(const ConfigType& config) {
     for (auto && kvp : config) {
-        _config[kvp.first] = kvp.second;
+        if (kvp.first.find("AUTO_") == 0) {
+            _config[kvp.first] = kvp.second;
+        } else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
+            if (kvp.second == IE::PluginConfigParams::YES ||
+                kvp.second == IE::PluginConfigParams::NO) {
+                _config[kvp.first] = kvp.second;
+            } else {
+                IE_THROW() << "Unsupported config value: " << kvp.second
+                           << " for key: " << kvp.first;
+            }
+        } else {
+            IE_THROW() << "Unsupported config key: " << kvp.first;
+        }
     }
 }
 
@@ -128,7 +140,10 @@ IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
         std::string device_name = {"Inference Engine AUTO device"};
         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
-        std::vector<std::string> configKeys;
+        std::vector<std::string> configKeys = {
+            IE::KEY_AUTO_DEVICE_LIST,
+            IE::PluginConfigParams::KEY_PERF_COUNT
+        };
         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
         std::vector<std::string> capabilities = GetOptimizationCapabilities(options);
@@ -139,42 +154,21 @@ IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
 }
 
 //////////////////////////////////// private & protected functions ///////////////////
-std::vector<AutoPlugin::DeviceInformation> AutoInferencePlugin::GetDeviceChoice(const ConfigType&  config) const {
-    std::vector<DeviceInformation> metaDevices;
-    std::vector<std::string> availableDevices;
+std::vector<DeviceName> AutoInferencePlugin::GetDeviceList(const ConfigType& config) const {
+    std::vector<DeviceName> deviceList;
 
     auto deviceListConfig = config.find(IE::KEY_AUTO_DEVICE_LIST);
     if (deviceListConfig == config.end()) {
-        availableDevices = GetCore()->GetAvailableDevices();
+        deviceList = GetCore()->GetAvailableDevices();
     } else {
-        availableDevices = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
+        deviceList = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
     }
 
-    auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
-        IE::DeviceIDParser deviceParser(deviceWithID);
-        std::string deviceName = deviceParser.getDeviceName();
-        ConfigType tconfig = config;
-
-        // set device ID if any
-        std::string deviceIDLocal = deviceParser.getDeviceID();
-        if (!deviceIDLocal.empty()) {
-            tconfig[IE::PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
-        }
-
-        return GetSupportedConfig(tconfig, deviceName);
-    };
-
-    for (auto && d : availableDevices) {
-        if (d != _pluginName) {
-            metaDevices.push_back({ d, getDeviceConfig(d)});
-        }
-    }
-
-    if (metaDevices.empty()) {
+    if (deviceList.empty()) {
         IE_THROW() << "Please, check environment due to no supported devices can be used";
     }
 
-    return metaDevices;
+    return deviceList;
 }
 
 std::vector<std::string> AutoInferencePlugin::GetOptimizationCapabilities(const std::map<std::string, IE::Parameter> & options) const {
@@ -215,7 +209,21 @@ ConfigType AutoInferencePlugin::GetSupportedConfig(const ConfigType&  config,
     return supportedConfig;
 }
 
-DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
+void AutoInferencePlugin::CheckConfig(const ConfigType& config) {
+    std::vector<std::string> supportedConfigKeys = GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
+    for (auto&& c : config) {
+        auto itKey = std::find(supportedConfigKeys.begin(), supportedConfigKeys.end(), c.first);
+        if (supportedConfigKeys.end() == itKey) {
+            // CVS-57233
+            if (c.first.find("AUTO_") == 0) {
+                continue;
+            }
+            IE_THROW() << "AUTO plugin doesn't support config key " << c.first;
+        }
+    }
+}
+
+DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision) {
     if (metaDevices.empty()) {
         IE_THROW(NotFound) << "No available device to select in AUTO plugin";
     }
@@ -223,15 +231,15 @@ DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInfo
         return metaDevices.at(0);
     }
 
-    std::vector<DeviceInformation> CPU;
-    std::vector<DeviceInformation> GPU;
+    std::vector<DeviceName> CPU;
+    std::vector<DeviceName> GPU;
 
     for (auto& item : metaDevices) {
-        if (item.deviceName.find("CPU") == 0) {
+        if (item.find("CPU") == 0) {
             CPU.push_back(item);
             continue;
         }
-        if (item.deviceName.find("GPU") == 0) {
+        if (item.find("GPU") == 0) {
             GPU.push_back(item);
             continue;
         }
@@ -242,10 +250,10 @@ DeviceInformation AutoInferencePlugin::SelectDevice(const std::vector<DeviceInfo
     }
 
     // Sort GPU by name: GPU.2 > GPU.1 > GPU.0 > GPU, so we always choose the GPU[0] as best device
-    std::sort(GPU.begin(), GPU.end(), [](const DeviceInformation& a, const DeviceInformation& b)->bool{return b.deviceName < a.deviceName;});
+    std::sort(GPU.begin(), GPU.end(), [](const DeviceName& a, const DeviceName& b)->bool{return b < a;});
 
     for (auto&& item : GPU) {
-        std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+        std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
         auto res = std::find(capability.begin(), capability.end(), networkPrecision);
         if (res != capability.end()) {
             return item;
diff --git a/inference-engine/src/auto_plugin/auto_plugin.hpp b/inference-engine/src/auto_plugin/auto_plugin.hpp
index af42e9f0ef7..858ee2143fd 100644
--- a/inference-engine/src/auto_plugin/auto_plugin.hpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.hpp
@@ -30,10 +30,11 @@ public:
     void SetConfig(const ConfigType& config) override;
 
 private:
-    std::vector<AutoPlugin::DeviceInformation> GetDeviceChoice(const ConfigType&  config) const;
+    std::vector<DeviceName> GetDeviceList(const ConfigType&  config) const;
     std::vector<std::string> GetOptimizationCapabilities(const std::map<std::string, IE::Parameter>& options) const;
-    DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
-    ConfigType GetSupportedConfig(const ConfigType& config, const AutoPlugin::DeviceName & deviceName) const;
+    DeviceName SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
+    ConfigType GetSupportedConfig(const ConfigType& config, const DeviceName & deviceName) const;
+    void CheckConfig(const ConfigType& config);
     static ConfigType mergeConfigs(ConfigType config, const ConfigType& local);
 
     template <typename T>
@@ -41,18 +42,21 @@ private:
         if (GetCore() == nullptr) {
             IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
         }
+
+        CheckConfig(config);
+
         auto fullConfig = mergeConfigs(_config, config);
-        auto metaDevices = GetDeviceChoice(fullConfig);
-        DeviceInformation selectedDevice;
+        auto metaDevices = GetDeviceList(fullConfig);
+        DeviceName selectedDevice;
         IE::SoExecutableNetworkInternal executableNetwork;
         while (!metaDevices.empty()) {
             selectedDevice = SelectDevice(metaDevices, networkPrecision);
             try {
-                executableNetwork = GetCore()->LoadNetwork(param, selectedDevice.deviceName, selectedDevice.config);
+                executableNetwork = GetCore()->LoadNetwork(param, selectedDevice, {});
                 break;
             } catch (...) {
                 auto eraseDevice = std::find_if(metaDevices.begin(), metaDevices.end(),
-                    [=](const DeviceInformation& d)->bool{return d.deviceName == selectedDevice.deviceName;});
+                    [=](const DeviceName& d)->bool{return d == selectedDevice;});
                 if (eraseDevice == metaDevices.end()) {
                     IE_THROW() << "Didn't find the selected device name";
                 }
@@ -63,7 +67,10 @@ private:
         if (!executableNetwork) {
             IE_THROW() << "Failed to load network by AUTO plugin";
         }
-        auto impl = std::make_shared<AutoExecutableNetwork>(executableNetwork);
+
+        bool enablePerfCount = fullConfig.find(IE::PluginConfigParams::KEY_PERF_COUNT) != fullConfig.end();
+
+        auto impl = std::make_shared<AutoExecutableNetwork>(executableNetwork, enablePerfCount);
 
         if (std::is_same<std::string, T>::value) {
             SetExeNetworkInfo(impl, executableNetwork->GetInputsInfo(),
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
index 8dab56fdafa..e3f0adb1cb6 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp
@@ -42,18 +42,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "8"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::NO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::YES}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "10"}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, CorrectConfigTests,
@@ -93,22 +82,14 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoinconfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "OFF"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, "OFF"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}}
+        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
+            {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "OFF"}}
     };
 
     const std::vector<std::map<std::string, std::string>> multiconf = {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> autoconf = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, CorrectConfigAPITests,
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
@@ -127,7 +108,7 @@ namespace {
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
             ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoconf)),
+            ::testing::ValuesIn(AutoConfigs)),
             CorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
@@ -144,13 +125,6 @@ namespace {
             ::testing::ValuesIn(multiinconfigs)),
             IncorrectConfigTests::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, IncorrectConfigTests,
-            ::testing::Combine(
-            ::testing::ValuesIn(netPrecisions),
-            ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoinconfigs)),
-            IncorrectConfigTests::getTestCaseName);
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigAPITests,
             ::testing::Combine(
             ::testing::ValuesIn(netPrecisions),
@@ -166,10 +140,10 @@ namespace {
             IncorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, IncorrectConfigAPITests,
-            ::testing::Combine(
-            ::testing::ValuesIn(netPrecisions),
-            ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-            ::testing::ValuesIn(autoinconfigs)),
-            IncorrectConfigAPITests::getTestCaseName);
+                            ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                ::testing::ValuesIn(autoinconfigs)),
+                            IncorrectConfigAPITests::getTestCaseName);
 
 } // namespace
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
index 80914183c39..3db325234cf 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_config.cpp
@@ -51,20 +51,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS,
-              InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS,
-              InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "8"}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::NO}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, InferenceEngine::PluginConfigParams::YES}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "10"}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferConfigTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
index 771794774a6..eb988c0043a 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_input.cpp
@@ -26,9 +26,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-              {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestInputTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
index 78be1513314..a77253f8900 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request_output.cpp
@@ -22,9 +22,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestOutputTests,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
index 2e2aab976c6..8bf1a8f95ba 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/perf_counters.cpp
@@ -14,10 +14,6 @@ namespace {
             {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_CPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> Autoconfigs = {
-            {{ AUTO_CONFIG_KEY(DEVICE_LIST) , CommonTestUtils::DEVICE_CPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PerfCountersTest,
                             ::testing::Combine(
                                     ::testing::Values(InferenceEngine::Precision::FP32),
@@ -32,11 +28,4 @@ namespace {
                                     ::testing::ValuesIn(Multiconfigs)),
                             PerfCountersTest::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                    ::testing::Values(InferenceEngine::Precision::FP32),
-                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(Autoconfigs)),
-                            PerfCountersTest::getTestCaseName);
-
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
index 95208f0a092..da76bc26498 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp
@@ -37,9 +37,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> AutoConfigsInputOutput = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU}},
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_CPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> configsOutput = {
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index b3134e9953e..f013e544074 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -67,6 +67,8 @@ std::vector<std::string> disabledTestPatterns() {
 
         // TODO: 55656 AUTO plugin and QueryNetwork
         R"(.*CoreThreading.*smoke_QueryNetwork.*targetDevice=AUTO_config.*)",
+        // Unsupported config KEY_ENFORCE_BF16 for AUTO plugin
+        R"(.*smoke_SetBlobOfKindAUTO.*SetBlobOfKindTest.CompareWithRefs.*)",
         // reference doesn't cover I8, U8 cases. Issue: 55842
         R"(.*Gather7LayerTest.*netPRC=I8.*)",
     };
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
index 0b550c568b6..e21d610db56 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
@@ -106,6 +106,13 @@ namespace {
                     ::testing::ValuesIn(autoconf)),
             CorrectConfigAPITests::getTestCaseName);
 
+    INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, CorrectConfigAPITests,
+                            ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
+                            CorrectConfigAPITests::getTestCaseName);
+
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigAPITests,
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
@@ -124,14 +131,14 @@ namespace {
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                    ::testing::ValuesIn(autoconf)),
+                    ::testing::ValuesIn(autoinconfigs)),
             IncorrectConfigAPITests::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, IncorrectConfigAPITests,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
+                                ::testing::ValuesIn(autoinconfigs)),
                             IncorrectConfigAPITests::getTestCaseName);
 
 
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
index a9a07450f70..e15ea827caa 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
@@ -26,9 +26,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
-                {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS,
-                    InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
index 550572077c3..6c38f5c841c 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
@@ -22,8 +22,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> autoConfigs = {
-            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
-                {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
index c5a5f695359..d8a89ef317d 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/perf_counters.cpp
@@ -14,14 +14,6 @@ namespace {
             {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_GPU}}
     };
 
-    const std::vector<std::map<std::string, std::string>> Autoconfigs = {
-            {{ AUTO_CONFIG_KEY(DEVICE_LIST) , CommonTestUtils::DEVICE_GPU}}
-    };
-
-    const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
-    };
-
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PerfCountersTest,
                             ::testing::Combine(
                                     ::testing::Values(InferenceEngine::Precision::FP32),
@@ -36,18 +28,4 @@ namespace {
                                     ::testing::ValuesIn(Multiconfigs)),
                             PerfCountersTest::getTestCaseName);
 
-    INSTANTIATE_TEST_CASE_P(smoke_Auto_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                    ::testing::Values(InferenceEngine::Precision::FP32),
-                                    ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(Autoconfigs)),
-                            PerfCountersTest::getTestCaseName);
-
-    INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, PerfCountersTest,
-                            ::testing::Combine(
-                                ::testing::Values(InferenceEngine::Precision::FP32),
-                                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
-                            PerfCountersTest::getTestCaseName);
-
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
index bc6507d7905..51979116646 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
@@ -28,7 +28,7 @@ namespace {
     };
 
     const std::vector<std::map<std::string, std::string>> auto_cpu_gpu_conf = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}}
     };
 
     const std::vector<std::map<std::string, std::string>> configsInput = {
@@ -42,18 +42,6 @@ namespace {
              {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
     };
 
-    const std::vector<std::map<std::string, std::string>> AutoConfigsInputOutput = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_GPU}},
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_GPU},
-            {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
-    };
-
-    const std::vector<std::map<std::string, std::string>> AutoCGConfigsInputOutput = {
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU}},
-        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_GPU},
-            {InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
-    };
-
     const std::vector<std::map<std::string, std::string>> configsOutput = {
             {},
             {{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}
@@ -77,14 +65,14 @@ namespace {
                             ::testing::Combine(
                                     ::testing::ValuesIn(netPrecisions),
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(AutoConfigsInputOutput)),
+                                    ::testing::ValuesIn(AutoConfigs)),
                             BehaviorTestOutput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, BehaviorTestOutput,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(AutoCGConfigsInputOutput)),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
                             BehaviorTestOutput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, BehaviorTests,
@@ -133,14 +121,14 @@ namespace {
                             ::testing::Combine(
                                     ::testing::ValuesIn(netPrecisions),
                                     ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                    ::testing::ValuesIn(AutoConfigsInputOutput)),
+                                    ::testing::ValuesIn(AutoConfigs)),
                             BehaviorTestInput::getTestCaseName);
 
     INSTANTIATE_TEST_CASE_P(smoke_AutoCG_BehaviorTests, BehaviorTestInput,
                             ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
-                                ::testing::ValuesIn(AutoCGConfigsInputOutput)),
+                                ::testing::ValuesIn(auto_cpu_gpu_conf)),
                             BehaviorTestInput::getTestCaseName);
 
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
index b9caf6edacf..e13fe679b2a 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/config.hpp
@@ -57,8 +57,7 @@ namespace BehaviorTestsDefinitions {
         // Create CNNNetwork from ngrpah::Function
         InferenceEngine::CNNNetwork cnnNet(function);
         if (targetDevice.find(CommonTestUtils::DEVICE_MULTI) == std::string::npos &&
-            targetDevice.find(CommonTestUtils::DEVICE_HETERO) == std::string::npos &&
-            targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            targetDevice.find(CommonTestUtils::DEVICE_HETERO) == std::string::npos) {
             ASSERT_NO_THROW(ie->GetMetric(targetDevice, METRIC_KEY(SUPPORTED_CONFIG_KEYS)));
             ASSERT_THROW(ie->SetConfig(configuration, targetDevice),
                          InferenceEngine::Exception);
@@ -73,8 +72,12 @@ namespace BehaviorTestsDefinitions {
         SKIP_IF_CURRENT_TEST_IS_DISABLED()
         // Create CNNNetwork from ngrpah::Function
         InferenceEngine::CNNNetwork cnnNet(function);
-        ASSERT_THROW(auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration),
-                     InferenceEngine::Exception);
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) != std::string::npos) {
+            GTEST_SKIP();
+        } else {
+            ASSERT_THROW(auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration),
+                         InferenceEngine::Exception);
+        }
     }
 
     using IncorrectConfigAPITests = BehaviorTestsUtils::BehaviorTestsBasic;
@@ -110,8 +113,10 @@ namespace BehaviorTestsDefinitions {
             ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
         }
         // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-        execNet.CreateInferRequest();
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+            execNet.CreateInferRequest();
+        }
 
         if ((targetDevice == CommonTestUtils::DEVICE_HDDL) || (targetDevice == CommonTestUtils::DEVICE_GNA)) {
             ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());
@@ -139,8 +144,10 @@ namespace BehaviorTestsDefinitions {
             ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
         }
         // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-        execNet.CreateInferRequest();
+        if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+            execNet.CreateInferRequest();
+        }
 
         if ((targetDevice == CommonTestUtils::DEVICE_MYRIAD) ||
             (targetDevice == CommonTestUtils::DEVICE_KEEMBAY)) {
@@ -170,8 +177,10 @@ namespace BehaviorTestsDefinitions {
                 ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
             }
             // Load CNNNetwork to target plugins
-            auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-            execNet.CreateInferRequest();
+            if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+                auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+                execNet.CreateInferRequest();
+            }
 
             if ((targetDevice == CommonTestUtils::DEVICE_MYRIAD) ||
                 (targetDevice == CommonTestUtils::DEVICE_KEEMBAY)) {
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
index cb364c80f8c..c9469401123 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_config.hpp
@@ -42,8 +42,10 @@ TEST_P(InferConfigTests, canSetExclusiveAsyncRequests) {
         ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
     }
     // Load CNNNetwork to target plugins
-    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-    execNet.CreateInferRequest();
+    if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+        execNet.CreateInferRequest();
+    }
 
     if ((targetDevice == CommonTestUtils::DEVICE_HDDL) || (targetDevice == CommonTestUtils::DEVICE_GNA)) {
         ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());
@@ -71,8 +73,10 @@ TEST_P(InferConfigTests, withoutExclusiveAsyncRequests) {
         ASSERT_NO_THROW(ie->SetConfig(config, targetDevice));
     }
     // Load CNNNetwork to target plugins
-    auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
-    execNet.CreateInferRequest();
+    if (targetDevice.find(CommonTestUtils::DEVICE_AUTO) == std::string::npos) {
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, config);
+        execNet.CreateInferRequest();
+    }
 
     if ((targetDevice == CommonTestUtils::DEVICE_GNA) || (targetDevice == CommonTestUtils::DEVICE_HDDL)) {
         ASSERT_EQ(0u, InferenceEngine::ExecutorManager::getInstance()->getExecutorsNumber());

From 79e44292c5758b9e736c8440e7c0e9f2364991dc Mon Sep 17 00:00:00 2001
From: Michal Papaj <michal.papaj@intel.com>
Date: Tue, 8 Jun 2021 11:35:26 +0200
Subject: [PATCH 20/41] Update to speech recognition demo version. (#6060)

The speech recognition demo packages were updated to align
OV python packages requirements. This patch updates packages
version and SHA.
---
 inference-engine/cmake/dependencies.cmake | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index 4ce1ef31365..b270c46f2da 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -295,25 +295,25 @@ if (ENABLE_SPEECH_DEMO)
     if(DEFINED IE_PATH_TO_DEPS)
         if (WIN32 AND X86_64)
             RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_WIN "speech_demo_1.0.0.755_windows.zip"
+                    ARCHIVE_WIN "speech_demo_1.0.0.774_windows.zip"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "58adef14b8a749f70fa83888614cee34b941956e6e958e445e3f48885b3c20a0")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "67b25170be5e89a4f0e90e8b39623b60c9a15b965c30329385e295fcd2edc856")
             debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
         elseif (LINUX AND X86_64)
             if (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.755_centos.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.774_centos.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "716201e377714ac50f3909c445d36d47a089de50a557d8ef65232de040671188")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "5ec3b7be9ae05376aefae5bd5fd4a39b12c274e82817fd3218120b8e8fc8ff5a")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             else()
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.755_linux.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.774_linux.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.755"
-                    SHA256 "7714b8776ec0183ed73eed6d3d965ee6d5c15d2dc49ee5ae118cc368c89c7a9d")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
+                    SHA256 "f0bbd0a6218b0365e7cfb1f860b34e4ace7e0d47dd60b369cdea8a480329810f")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             endif()
         else()

From 9aa338a0afcab0f1754e9cc1ec2e08d92adfdf5a Mon Sep 17 00:00:00 2001
From: Rafal Blaczkowski <rafal.blaczkowski@intel.com>
Date: Tue, 8 Jun 2021 11:37:20 +0200
Subject: [PATCH 21/41] OpenVINO ONNX CI - set more stable proxy (#5945)

* Set more stable proxy

* Update env

* Add missing chars

* Update proxy

* Additonal try to overwrite proxy

* update proxy

* fix style
---
 .ci/openvino-onnx/Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/openvino-onnx/Jenkinsfile b/.ci/openvino-onnx/Jenkinsfile
index 48529879ef1..5fe24928798 100644
--- a/.ci/openvino-onnx/Jenkinsfile
+++ b/.ci/openvino-onnx/Jenkinsfile
@@ -113,8 +113,8 @@ def buildDockerImage(Map configuration, String workdir) {
         --build-arg BUILD_TYPE=${configuration.build_type} \
         --build-arg PROTOBUF_LITE=${configuration.protobuf_lite} \
         --file=.ci/openvino-onnx/Dockerfile \
-        --build-arg http_proxy=http://proxy-chain.intel.com:911/ \
-        --build-arg https_proxy=http://proxy-chain.intel.com:912/ .
+        --build-arg http_proxy=http://proxy-ir.intel.com:911/ \
+        --build-arg https_proxy=http://proxy-ir.intel.com:911/ .
     """
 }
 

From 503d18c80f9f1291b5aa5a7456b074b30bac7fec Mon Sep 17 00:00:00 2001
From: Alexander Shchepetov <alexander.shchepetov@intel.com>
Date: Tue, 8 Jun 2021 12:47:20 +0300
Subject: [PATCH 22/41] Enable MYRIAD in stress tests (#6026)

---
 .../myriad_references_config.xml              |  20 +++
 .../nightly_configs/myriad_test_config.xml    | 115 ++++++++++++++++++
 .../nightly_configs/myriad_test_config.xml    |  19 +++
 .../nightly_configs/myriad_test_config.xml    |  20 +++
 4 files changed, 174 insertions(+)
 create mode 100644 tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml
 create mode 100644 tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml
 create mode 100644 tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml
 create mode 100644 tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml

diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml
new file mode 100644
index 00000000000..b4a8be108c3
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_references_config.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        # References were collected from DB with next query: {"commit_id": "aa2ae13c1ee6d700dd287ab809403e7de8c7c5e3", "commit_date": "2021-06-05 14:30:47+00:00"}
+        # and modified on FACTOR = 1.3
+        <!--Models with FP32 precision-->
+        <model path="public/alexnet/FP32/alexnet.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="2219614" vmpeak="2807734" vmrss="522615" vmhwm="959597" />
+        <model path="public/efficientnet-b0/FP32/efficientnet-b0.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="1939886" vmpeak="1939886" vmrss="138392" vmhwm="145381" />
+        <model path="public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="1750569" vmpeak="1771416" vmrss="131944" vmhwm="144471" />
+        <model path="public/se-resnet-50/FP32/se-resnet-50.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="2239920" vmpeak="2325117" vmrss="434096" vmhwm="471562" />
+        <model path="public/Sphereface/FP32/Sphereface.xml" precision="FP32" test="create_exenetwork" device="MYRIAD" vmsize="1890917" vmpeak="2115952" vmrss="192202" vmhwm="362102" />
+        <model path="public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml" precision="FP32" test="infer_request_inference" device="MYRIAD" vmsize="1935247" vmpeak="2020444" vmrss="142324" vmhwm="258949" />
+        <!--Models with FP16 precision-->
+        <model path="public/ctdet_coco_dlav0_384/FP16/ctdet_coco_dlav0_384.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="1904879" vmpeak="1904879" vmrss="207604" vmhwm="207604" />
+        <model path="public/i3d-rgb-tf/FP16/i3d-rgb-tf.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="3860984" vmpeak="3860984" vmrss="2147116" vmhwm="2147116" />
+        <model path="public/mobilenet-ssd/FP16/mobilenet-ssd.xml" precision="FP16" test="infer_request_inference" device="MYRIAD" vmsize="1886658" vmpeak="1886658" vmrss="94010" vmhwm="94010" />
+        <model path="public/se-inception/FP16/se-inception.xml" precision="FP16" test="create_exenetwork" device="MYRIAD" vmsize="1839182" vmpeak="1839182" vmrss="142168" vmhwm="142168" />
+        <model path="public/ssd_mobilenet_v2_coco/FP16/ssd_mobilenet_v2_coco.xml" precision="FP16" test="infer_request_inference" device="MYRIAD" vmsize="2013221" vmpeak="2098418" vmrss="219991" vmhwm="219991" />
+    </models>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..7409c6287da
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,115 @@
+<attributes>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <!--Models with FP32 precision-->
+        <model name="mobilenet-v2-1.4-224" precision="FP32" source="omz" />
+        <model name="brain-tumor-segmentation-0001" precision="FP32" source="omz" />
+        <model name="faster_rcnn_inception_resnet_v2_atrous_coco" precision="FP32" source="omz" />
+        <model name="yolo-v2-tf" precision="FP32" source="omz" />
+        <model name="se-inception" precision="FP32" source="omz" />
+        <model name="efficientnet-b0" precision="FP32" source="omz" />
+        <model name="mobilenet-v1-1.0-224-tf" precision="FP32" source="omz" />
+        <model name="mask_rcnn_resnet101_atrous_coco" precision="FP32" source="omz" />
+        <model name="ssd_mobilenet_v1_coco" precision="FP32" source="omz" />
+        <model name="se-resnet-152" precision="FP32" source="omz" />
+        <model name="octave-resnext-50-0.25" precision="FP32" source="omz" />
+        <model name="googlenet-v3" precision="FP32" source="omz" />
+        <model name="ssd_mobilenet_v2_coco" precision="FP32" source="omz" />
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="googlenet-v4-tf" precision="FP32" source="omz" />
+        <model name="ssd300" precision="FP32" source="omz" />
+        <model name="vgg19" precision="FP32" source="omz" />
+        <model name="ctdet_coco_dlav0_384" precision="FP32" source="omz" />
+        <model name="googlenet-v1" precision="FP32" source="omz" />
+        <model name="yolo-v3-tf" precision="FP32" source="omz" />
+        <model name="mtcnn-o" precision="FP32" source="omz" />
+        <model name="yolo-v1-tiny-tf" precision="FP32" source="omz" />
+        <model name="googlenet-v1-tf" precision="FP32" source="omz" />
+        <model name="yolo-v2-tiny-tf" precision="FP32" source="omz" />
+        <model name="ssd512" precision="FP32" source="omz" />
+        <model name="densenet-169" precision="FP32" source="omz" />
+        <model name="brain-tumor-segmentation-0002" precision="FP32" source="omz" />
+        <model name="Sphereface" precision="FP32" source="omz" />
+        <model name="googlenet-v2" precision="FP32" source="omz" />
+        <model name="ctdet_coco_dlav0_512" precision="FP32" source="omz" />
+        <model name="ctpn" precision="FP32" source="omz" />
+        <model name="i3d-rgb-tf" precision="FP32" source="omz" />
+        <model name="mobilenet-v2" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="se-resnext-50" precision="FP32" source="omz" />
+        <model name="caffenet" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+        <model name="se-resnet-50" precision="FP32" source="omz" />
+        <model name="densenet-121" precision="FP32" source="omz" />
+        <!--Models with FP16 precision-->
+        <model name="mobilenet-v2-1.4-224" precision="FP16" source="omz" />
+        <model name="brain-tumor-segmentation-0001" precision="FP16" source="omz" />
+        <model name="faster_rcnn_inception_resnet_v2_atrous_coco" precision="FP16" source="omz" />
+        <model name="yolo-v2-tf" precision="FP16" source="omz" />
+        <model name="se-inception" precision="FP16" source="omz" />
+        <model name="efficientnet-b0" precision="FP16" source="omz" />
+        <model name="mobilenet-v1-1.0-224-tf" precision="FP16" source="omz" />
+        <model name="mask_rcnn_resnet101_atrous_coco" precision="FP16" source="omz" />
+        <model name="ssd_mobilenet_v1_coco" precision="FP16" source="omz" />
+        <model name="se-resnet-152" precision="FP16" source="omz" />
+        <model name="octave-resnext-50-0.25" precision="FP16" source="omz" />
+        <model name="googlenet-v3" precision="FP16" source="omz" />
+        <model name="ssd_mobilenet_v2_coco" precision="FP16" source="omz" />
+        <model name="alexnet" precision="FP16" source="omz" />
+        <model name="googlenet-v4-tf" precision="FP16" source="omz" />
+        <model name="ssd300" precision="FP16" source="omz" />
+        <model name="vgg19" precision="FP16" source="omz" />
+        <model name="ctdet_coco_dlav0_384" precision="FP16" source="omz" />
+        <model name="googlenet-v1" precision="FP16" source="omz" />
+        <model name="yolo-v3-tf" precision="FP16" source="omz" />
+        <model name="mtcnn-o" precision="FP16" source="omz" />
+        <model name="yolo-v1-tiny-tf" precision="FP16" source="omz" />
+        <model name="googlenet-v1-tf" precision="FP16" source="omz" />
+        <model name="yolo-v2-tiny-tf" precision="FP16" source="omz" />
+        <model name="ssd512" precision="FP16" source="omz" />
+        <model name="densenet-169" precision="FP16" source="omz" />
+        <model name="brain-tumor-segmentation-0002" precision="FP16" source="omz" />
+        <model name="Sphereface" precision="FP16" source="omz" />
+        <model name="googlenet-v2" precision="FP16" source="omz" />
+        <model name="ctdet_coco_dlav0_512" precision="FP16" source="omz" />
+        <model name="ctpn" precision="FP16" source="omz" />
+        <model name="i3d-rgb-tf" precision="FP16" source="omz" />
+        <model name="mobilenet-v2" precision="FP16" source="omz" />
+        <model name="mobilenet-ssd" precision="FP16" source="omz" />
+        <model name="se-resnext-50" precision="FP16" source="omz" />
+        <model name="caffenet" precision="FP16" source="omz" />
+        <model name="mtcnn-r" precision="FP16" source="omz" />
+        <model name="se-resnet-50" precision="FP16" source="omz" />
+        <model name="densenet-121" precision="FP16" source="omz" />
+        <!--Models with FP16-INT8 precision-->
+        <model name="vehicle-attributes-recognition-barrier-0039" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-0006" precision="FP16-INT8" source="omz" />
+        <model name="person-vehicle-bike-detection-crossroad-1016" precision="FP16-INT8" source="omz" />
+        <model name="age-gender-recognition-retail-0013" precision="FP16-INT8" source="omz" />
+        <model name="vehicle-detection-adas-0002" precision="FP16-INT8" source="omz" />
+        <model name="image-retrieval-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-retail-0002" precision="FP16-INT8" source="omz" />
+        <model name="person-attributes-recognition-crossroad-0230" precision="FP16-INT8" source="omz" />
+        <model name="action-recognition-0001-decoder" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-teacher-0002" precision="FP16-INT8" source="omz" />
+        <model name="person-vehicle-bike-detection-crossroad-0078" precision="FP16-INT8" source="omz" />
+        <model name="face-detection-adas-0001" precision="FP16-INT8" source="omz" />
+        <model name="unet-camvid-onnx-0001" precision="FP16-INT8" source="omz" />
+        <model name="human-pose-estimation-0001" precision="FP16-INT8" source="omz" />
+        <model name="faster-rcnn-resnet101-coco-sparse-60-0001" precision="FP16-INT8" source="omz" />
+        <model name="action-recognition-0001-encoder" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-ava-sparse-35-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-raisinghand-recognition-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-asl-0001" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-tiny-ava-0001" precision="FP16-INT8" source="omz" />
+        <model name="license-plate-recognition-barrier-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-retail-0013" precision="FP16-INT8" source="omz" />
+        <model name="single-image-super-resolution-1032" precision="FP16-INT8" source="omz" />
+        <model name="landmarks-regression-retail-0009" precision="FP16-INT8" source="omz" />
+        <model name="driver-action-recognition-adas-0002-decoder" precision="FP16-INT8" source="omz" />
+        <model name="yolo-v2-ava-0001" precision="FP16-INT8" source="omz" />
+        <model name="person-detection-action-recognition-0005" precision="FP16-INT8" source="omz" />
+    </models>
+</attributes>
\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..3718751b253
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,19 @@
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>1000</value>
+    </iterations>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+    </models>
+</attributes>
\ No newline at end of file
diff --git a/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml b/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml
new file mode 100644
index 00000000000..979194a991a
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/nightly_configs/myriad_test_config.xml
@@ -0,0 +1,20 @@
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+        <value>2</value>
+    </threads>
+    <iterations>
+        <value>100</value>
+    </iterations>
+    <devices>
+        <value>MYRIAD</value>
+    </devices>
+    <models>
+        <model name="alexnet" precision="FP32" source="omz" />
+        <model name="mobilenet-ssd" precision="FP32" source="omz" />
+        <model name="mtcnn-r" precision="FP32" source="omz" />
+    </models>
+</attributes>

From ae5608534ed76572ed4fcaf412291656959091e2 Mon Sep 17 00:00:00 2001
From: iliya mironov <iliya.mironov@intel.com>
Date: Tue, 8 Jun 2021 13:15:54 +0300
Subject: [PATCH 23/41] Add support resize with 2 inputs (#5927)

* Add support resize with 2 inputs

* Add unit tests

* Hot fix

* Change resize check from port count to connected num port conditions

* Fix conditions

* Refactoring code according to review

* Fix according to review

* Change onnresize11 input condition
---
 .../middle/ONNXResize11ToInterpolate.py       | 13 ++--
 .../extensions/ops/ONNXResize11.py            | 13 ++--
 .../extensions/ops/ONNXResize11_test.py       | 68 ++++++++++++++++++-
 3 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py b/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
index ffd3c27eebd..c462d45285f 100644
--- a/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
+++ b/model-optimizer/extensions/middle/ONNXResize11ToInterpolate.py
@@ -34,12 +34,13 @@ def replace_resize(graph: Graph, resize: Node):
         log.warning('The input shape is not 4D or 5D for op with name {}'.format(resize_name))
         return
 
-    num_of_inputs = len([port for port in resize.in_ports().values() if not port.disconnected()])
-    assert num_of_inputs in {3, 4}, \
-        "Number of inputs of ONNXResize (with name {}) should be equal to 3 or 4".format(resize_name)
+    assert (resize.is_in_port_connected(0) and (resize.is_in_port_connected(2) or resize.is_in_port_connected(3))), \
+        "Scales or sizes inputs must be connected to Node {} with op {}.".format(resize.soft_get("name", resize.id),
+                                                                                 resize.op)
 
     assert resize.soft_get('coordinate_transformation_mode') != 'tf_crop_and_resize', \
-        'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(resize.op, resize_name)
+        'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(resize.op,
+                                                                                 resize.soft_get("name", resize.id))
 
     layout = graph.graph['layout']
 
@@ -74,7 +75,7 @@ def replace_resize(graph: Graph, resize: Node):
                       {'name': resize_name + '/axis',
                        'value': int64_array(np.arange(begin_dim, end_dim))}).create_node()
 
-    shape_calculation_mode = 'scales' if num_of_inputs == 3 else 'sizes'
+    shape_calculation_mode = 'sizes' if resize.is_in_port_connected(3) else 'scales'
 
     interpolate_node = Interpolate(graph, {'version': 'opset4',
                                            'mode': convert_mode(resize.mode),
@@ -96,7 +97,7 @@ def replace_resize(graph: Graph, resize: Node):
 
     dst_dtype = np.float32  # even if data_type=FP16 use float32 for shape values
 
-    if num_of_inputs == 3:
+    if not resize.is_in_port_connected(3):
         cast_shape_to_float = Cast(graph, {'dst_type': dst_dtype}).create_node()
         mul_node = Mul(graph, {'name': resize_name + '/Mul'}).create_node()
         shape_of.out_port(0).connect(cast_shape_to_float.in_port(0))
diff --git a/model-optimizer/extensions/ops/ONNXResize11.py b/model-optimizer/extensions/ops/ONNXResize11.py
index 5ef8d7f6ac8..5476087a3d4 100644
--- a/model-optimizer/extensions/ops/ONNXResize11.py
+++ b/model-optimizer/extensions/ops/ONNXResize11.py
@@ -35,14 +35,15 @@ class ONNXResize11Op(Op):
         if input_shape is None:
             return
 
-        num_of_in_nodes = len(node.in_nodes())
-        assert num_of_in_nodes in {3, 4}, \
-            "Node {} with op {} number of inputs must be equal to 3 or 4.".format(node.name, node.op)
+        assert (node.is_in_port_connected(0) and (node.is_in_port_connected(2) or node.is_in_port_connected(3))), \
+            "One of the scales or sizes inputs must be connected to Node {} with op {}.".format(node.soft_get("name", node.id),
+                                                                                                node.op)
 
         assert node.coordinate_transformation_mode != 'tf_crop_and_resize', \
-            'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(node.op, node.name)
+            'Mode tf_crop_and_resize is not supported for op {} with name {}'.format(node.op,
+                                                                                     node.soft_get("name", node.id))
 
-        if num_of_in_nodes == 3:
+        if not node.is_in_port_connected(3):
             # i.e. input 'sizes' is not given
             input2_value = node.in_port(2).data.get_value()
             assert input2_value is not None, \
@@ -53,7 +54,7 @@ class ONNXResize11Op(Op):
             # i.e. input 'sizes' is given
             sizes = node.in_port(3).data.get_value()
             assert sizes is not None, \
-                "Node {} with op {} has no value in input port 3".format(node.name, node.op)
+                "Node {} with op {} has no value in input port 3".format(node.soft_get("name", node.id), node.op)
             output_shape = input_shape.copy()
             spatial_dimension_indices = range(2, len(input_shape))
             output_shape[spatial_dimension_indices] = int64_array(sizes)[2:]
diff --git a/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py b/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
index b38773b8279..aac234c56ba 100644
--- a/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
+++ b/model-optimizer/unit_tests/extensions/ops/ONNXResize11_test.py
@@ -33,10 +33,8 @@ graph_edges_sizes = [
     ('input', 'input_data'),
     ('roi', 'roi_data'),
     ('sizes', 'sizes_data'),
-    ('scales', 'scales_data'),
     ('input_data', 'onnx_resize11', {'in': 0}),
     ('roi_data', 'onnx_resize11', {'in': 1}),
-    ('scales_data', 'onnx_resize11', {'in': 2}),
     ('sizes_data', 'onnx_resize11', {'in': 3}),
     ('onnx_resize11', 'onnx_resize11_data'),
     ('onnx_resize11_data', 'op_output'),
@@ -125,3 +123,69 @@ class TestONNXResize11Op(unittest.TestCase):
 
         self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
                         msg.format(scales, output_shape, graph.node['onnx_resize11_data']['shape']))
+
+    @generate(*[([1, 260, 100, 150], [1, 260, 200, 350], [1, 260, 200, 350], [1.0, 1.0, 1.0, 1.0]),
+                ([1, 260, 100, 150], [1, 260, 200, 350], [1, 1, 200, 350], [1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [1, 1, 140, 280], [1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [5, 14, 140, 280], [1.0, 1.0, 1.0, 1.0]),
+                ([1, 3, 260, 100, 150], [1, 3, 780, 200, 350], [1, 3, 780, 200, 350], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([1, 3, 450, 100, 150], [1, 3, 260, 200, 350], [1, 3, 260, 200, 350], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [1, 1, 500, 140, 280], [1.0, 1.0, 1.0, 1.0, 1.0]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [5, 14, 500, 140, 280], [1.0, 1.0, 1.0, 1.0, 1.0])])
+    def test_onnx_resize11_using_sizes_without_roi_input(self, input_shape, output_shape, sizes, scales):
+        np_scales = np.array(scales)
+        np_sizes = int64_array(sizes)
+        graph = build_graph(nodes_attrs=graph_node_attrs_sizes,
+                            edges=[('input', 'input_data'),
+                                   ('sizes', 'sizes_data'),
+                                   ('input_data', 'onnx_resize11', {'in': 0}),
+                                   ('sizes_data', 'onnx_resize11', {'in': 3}),
+                                   ('onnx_resize11', 'onnx_resize11_data'),
+                                   ('onnx_resize11_data', 'op_output'),
+                                ],
+                            update_attributes={
+                                'input_data': {'shape': int64_array(input_shape)},
+                                'scales': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'scales_data': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'sizes': {'shape': int64_array(np_sizes.shape), 'value': np_sizes},
+                                'sizes_data': {'shape': int64_array(np_sizes.shape), 'value': np_sizes},
+                            })
+        node = Node(graph, 'onnx_resize11')
+        ONNXResize11Op.onnx_resize_infer(node)
+
+        msg = "ONNXResize11 infer failed for case: sizes={}, scales={}, expected_shape={}, actual_shape={}"
+
+        self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
+                        msg.format(sizes, scales, output_shape, graph.node['onnx_resize11_data']['shape']))
+
+    @generate(*[([1, 260, 100, 150], [1, 260, 200, 350], [1.0, 1.0, 2.0, 350 / 150]),
+                ([1, 3, 100, 200], [1, 3, 350, 150], [1.0, 1.0, 3.5, 150 / 200]),
+                ([5, 14, 300, 40], [5, 14, 140, 280], [1.0, 1.0, 140 / 300, 7.0]),
+                ([5, 14, 300, 40], [5, 14, 140, 560], [1.0, 1.0, 140 / 300, 14.0]),
+                ([1, 3, 260, 100, 150], [1, 3, 780, 200, 350], [1.0, 1.0, 3.0, 2.0, 350 / 150]),
+                ([1, 3, 450, 100, 150], [1, 3, 260, 200, 350], [1.0, 1.0, 260 / 450, 2.0, 350 / 150]),
+                ([5, 14, 1000, 300, 40], [5, 14, 500, 140, 280], [1.0, 1.0, 0.5, 140 / 300, 7.0]),
+                ([4, 3, 180, 1340], [4, 3, 60, 804], [1.0, 1.0, 0.33333334, 0.6]),
+                ([4, 3, 500, 180, 1340], [4, 3, 750, 60, 804], [1.0, 1.0, 1.5, 0.33333334, 0.6])])
+    def test_onnx_resize_using_scales_without_roi(self, input_shape, output_shape, scales):
+        np_scales = np.array(scales)
+        graph = build_graph(nodes_attrs=graph_node_attrs_scales,
+                            edges=[('input', 'input_data'),
+                                   ('scales', 'scales_data'),
+                                   ('input_data', 'onnx_resize11', {'in': 0}),
+                                   ('scales_data', 'onnx_resize11', {'in': 2}),
+                                   ('onnx_resize11', 'onnx_resize11_data'),
+                                   ('onnx_resize11_data', 'op_output'),
+                            ],
+                            update_attributes={
+                                'input_data': {'shape': int64_array(input_shape)},
+                                'scales': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                                'scales_data': {'shape': int64_array(np_scales.shape), 'value': np_scales},
+                            })
+        node = Node(graph, 'onnx_resize11')
+        ONNXResize11Op.onnx_resize_infer(node)
+
+        msg = "ONNXResize11 infer failed for case: scales={}, expected_shape={}, actual_shape={}"
+
+        self.assertTrue(np.array_equal(graph.node['onnx_resize11_data']['shape'], int64_array(output_shape)),
+                        msg.format(scales, output_shape, graph.node['onnx_resize11_data']['shape']))

From 76313a0ffa2896e76d01180f0843e7c860d9a719 Mon Sep 17 00:00:00 2001
From: Andrey Zaytsev <andrey.zaytsev@intel.com>
Date: Tue, 8 Jun 2021 13:39:35 +0300
Subject: [PATCH 24/41] Feature/azaytsev/update license aggrements link (#6068)

* Added info on DockerHub CI Framework

* Feature/azaytsev/change layout (#3295)

* Changes according to feedback comments

* Replaced @ref's with html links

* Fixed links, added a title page for installing from repos and images, fixed formatting issues

* Added links

* minor fix

* Added DL Streamer to the list of components installed by default

* Link fixes

* Link fixes

* ovms doc fix (#2988)

* added OpenVINO Model Server

* ovms doc fixes

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>

* Updated openvino_docs.xml

* Updated the link to software license agreements

* Revert "Updated the link to software license agreements"

This reverts commit 706dac500e764bd7534f7005ac6197f827d68cb5.

* Updated the link to software license agreements

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
---
 docs/install_guides/installing-openvino-apt.md | 2 +-
 docs/install_guides/installing-openvino-yum.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md
index 66518696991..1bd734bd856 100644
--- a/docs/install_guides/installing-openvino-apt.md
+++ b/docs/install_guides/installing-openvino-apt.md
@@ -2,7 +2,7 @@
 
 This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the APT repository.
 
-> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
+> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
 
 > **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ APT distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime). 
 
diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md
index 27e464d1b84..c326cb93a0f 100644
--- a/docs/install_guides/installing-openvino-yum.md
+++ b/docs/install_guides/installing-openvino-yum.md
@@ -2,7 +2,7 @@
 
 This guide provides installation steps for the Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the YUM repository.
 
-> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/en-us/license/eula-for-intel-software-development-products). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
+> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `<openvino_install_root>/licensing` folder for more details.
 
 > **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ YUM distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime).
 

From 440d3ee0ba59700c9ced758877f4033e91c93b8c Mon Sep 17 00:00:00 2001
From: Andrey Zaytsev <andrey.zaytsev@intel.com>
Date: Tue, 8 Jun 2021 13:49:01 +0300
Subject: [PATCH 25/41] Feature/azaytsev/update gpu driver link (#6062)

* Added info on DockerHub CI Framework

* Feature/azaytsev/change layout (#3295)

* Changes according to feedback comments

* Replaced @ref's with html links

* Fixed links, added a title page for installing from repos and images, fixed formatting issues

* Added links

* minor fix

* Added DL Streamer to the list of components installed by default

* Link fixes

* Link fixes

* ovms doc fix (#2988)

* added OpenVINO Model Server

* ovms doc fixes

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>

* Updated openvino_docs.xml

* Updated link

* Update installing-openvino-windows.md

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
---
 docs/install_guides/installing-openvino-windows.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/install_guides/installing-openvino-windows.md b/docs/install_guides/installing-openvino-windows.md
index 1a1a31a07c6..054950292b6 100644
--- a/docs/install_guides/installing-openvino-windows.md
+++ b/docs/install_guides/installing-openvino-windows.md
@@ -248,8 +248,8 @@ Or proceed to the <a href="#get-started">Get Started</a> to get started with run
 
 > **NOTE**: These steps are required only if you want to use an Intel® integrated GPU.
 
-If your applications offload computation to **Intel® Integrated Graphics**, you must have the latest version of Intel Graphics Driver for Windows installed for your hardware. 
-[Download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers). 
+If your applications offload computation to **Intel® Integrated Graphics**, you must have the Intel Graphics Driver for Windows installed for your hardware. 
+[Download and install the recommended version](https://downloadcenter.intel.com/download/30079/Intel-Graphics-Windows-10-DCH-Drivers). 
 
 To check if you have this driver installed:
 
@@ -265,8 +265,6 @@ To check if you have this driver installed:
 
    ![](../img/DeviceDriverVersion.PNG)
 
-> **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Drivers & Software](https://downloadcenter.intel.com/download/29993/Intel-Iris-Xe-MAX-Dedicated-Graphics-Drivers?product=80939) page for driver downloads and installation instructions.  
-
 You are done updating your device driver and are ready to use your GPU. Proceed to the <a href="#get-started">Get Started</a> to get started with running code samples and demo applications.
 
 ### <a name="hddl-myriad"></a> Optional: Additional Installation Steps for the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs

From 9214fa72e2809cf7a353082a7107092f25770508 Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Tue, 8 Jun 2021 14:04:31 +0300
Subject: [PATCH 26/41] [CPU] Fixed AvgPooling and FQ fusing (#5994)

---
 .../plugin/cpu/single_layer_tests/pooling.cpp | 82 ++++++++++++++++---
 .../cpu/test_utils/fusing_test_utils.hpp      |  7 ++
 inference-engine/thirdparty/mkl-dnn           |  2 +-
 3 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
index 72dbe0d5e12..0df9c464c4f 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp
@@ -5,6 +5,7 @@
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/single_layer/pooling.hpp"
+#include "test_utils/fusing_test_utils.hpp"
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -13,21 +14,24 @@ using namespace LayerTestsDefinitions;
 namespace CPULayerTestsDefinitions {
 typedef std::tuple<
         poolLayerTestParamsSet,
-        CPUSpecificParams
+        CPUSpecificParams,
+        fusingSpecificParams
 > poolLayerCpuTestParamsSet;
 
 class PoolingLayerCPUTest : public testing::WithParamInterface<poolLayerCpuTestParamsSet>,
-                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
+                            virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<poolLayerCpuTestParamsSet>& obj) {
         poolLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
-        std::tie(basicParamsSet, cpuParams) = obj.param;
+        fusingSpecificParams fusingParams;
+        std::tie(basicParamsSet, cpuParams, fusingParams) = obj.param;
 
         std::ostringstream result;
         result << PoolingLayerTest::getTestCaseName(testing::TestParamInfo<poolLayerTestParamsSet>(
                 basicParamsSet, 0));
         result << CPUTestsBase::getTestCaseName(cpuParams);
+        result << CpuTestWithFusing::getTestCaseName(fusingParams);
 
         return result.str();
     }
@@ -36,7 +40,8 @@ protected:
     void SetUp() {
         poolLayerTestParamsSet basicParamsSet;
         CPUSpecificParams cpuParams;
-        std::tie(basicParamsSet, cpuParams) = this->GetParam();
+        fusingSpecificParams fusingParams;
+        std::tie(basicParamsSet, cpuParams, fusingParams) = this->GetParam();
 
         poolSpecificParams poolParams;
         std::vector<size_t> inputShape;
@@ -48,6 +53,7 @@ protected:
         }
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
         if (selectedType.empty()) {
             selectedType = getPrimitiveType();
@@ -133,7 +139,8 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_4D, PoolingLayerCPUTest,
                                 ::testing::Values(InferenceEngine::Layout::ANY),
                                 ::testing::Values(std::vector<size_t >({1, 3, 64, 64})),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                        ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                        ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest,
@@ -147,7 +154,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
@@ -161,7 +169,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::Values(ref)),
+                                ::testing::Values(ref),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 const std::vector<poolSpecificParams> paramsMax5D = {
@@ -200,7 +209,8 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_5D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 3, 16, 32, 32})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest,
@@ -214,7 +224,8 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 32, 32, 32})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs))),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
+                                ::testing::Values(emptyFusingSpec)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest,
@@ -228,7 +239,58 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest,
                                         ::testing::Values(InferenceEngine::Layout::ANY),
                                         ::testing::Values(std::vector<size_t >({1, 4, 16, 16, 16})),
                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                                ::testing::Values(ref)),
+                                ::testing::Values(ref),
+                                ::testing::Values(emptyFusingSpec)),
+                        PoolingLayerCPUTest::getTestCaseName);
+
+/* === Fusing === */
+
+const auto avx512_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"};
+const auto avx512_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"};
+
+const auto avx2_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"};
+const auto avx2_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"};
+
+const auto sse42_nhwc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"};
+const auto sse42_ndhwc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"};
+
+const std::vector<CPUSpecificParams> vecCpuConfigsFusing_4D = {sse42_nhwc, avx2_nhwc, avx512_nhwc};
+const std::vector<CPUSpecificParams> vecCpuConfigsFusing_5D = {sse42_ndhwc, avx2_ndhwc, avx512_ndhwc};
+
+std::vector<fusingSpecificParams> fusingParamsSet {
+    emptyFusingSpec,
+    fusingFakeQuantizePerTensor,
+    fusingFakeQuantizePerChannel,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_I8, PoolingLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(paramsAvg4D),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(Precision::I8),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(std::vector<size_t >({1, 4, 64, 64})),
+                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_4D)),
+                                ::testing::ValuesIn(fusingParamsSet)),
+                        PoolingLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_I8, PoolingLayerCPUTest,
+                        ::testing::Combine(
+                                ::testing::Combine(
+                                        ::testing::ValuesIn(paramsAvg5D),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(Precision::I8),
+                                        ::testing::Values(Precision::FP32),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(InferenceEngine::Layout::ANY),
+                                        ::testing::Values(std::vector<size_t >({1, 4, 16, 16, 16})),
+                                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_5D)),
+                                ::testing::ValuesIn(fusingParamsSet)),
                         PoolingLayerCPUTest::getTestCaseName);
 
 } // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
index 0e5fc43d366..d55f2a98859 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@@ -216,6 +216,13 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Add"} };
 
+const auto fusingFakeQuantizePerTensor = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                auto localPrc = inpNode->get_element_type();
+                ngraph::Shape newShape(inpNode->get_shape().size(), 1);
+                return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
+            }, "FakeQuantize(PerTensor)"}}), {"FakeQuantize"} };
+
 const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto localPrc = inpNode->get_element_type();
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index aa47fcd2a03..1cb9d0615aa 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit aa47fcd2a03ee5caac119b6417bc66abe3154aab
+Subproject commit 1cb9d0615aaf511b51b8f8fc3c3ff8805ad9be6c

From a7a9364b417b35464d352622254517f4ce602848 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Tue, 8 Jun 2021 14:23:45 +0300
Subject: [PATCH 27/41] [IE TESTS] Add local_cache arg to the subgraphDumper
 (#6063)

---
 .../conformance/subgraphs_dumper/README.md    |  1 +
 .../subgraphs_dumper/include/gflag_config.hpp |  5 ++-
 .../conformance/subgraphs_dumper/src/main.cpp | 38 +++++++++++++------
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
index 1f21dd1c07f..07b50de8409 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/README.md
@@ -17,6 +17,7 @@ Outcome of a build is a `subgrpahsDumper` binary located in building artifacts f
 ## Running
 The tool takes two command line parameters:    
 * `--input_folders` - Required. Comma separated paths to the input folders with IRs
+* `--local_cache` - Optional. Comma separated paths to the local cache folders with IRs.
 * `--output_folder` - Required. Path to the output folders where to serialize IRs
 * `--path_regex` - Optional. regular expression to be applied in input folders recursive discovery
 * `--constants_size_threshold` - Optional. Maximum size of constant in megabytes to be serialized.
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
index c37e45c445b..13ad0007de7 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/include/gflag_config.hpp
@@ -9,6 +9,7 @@
 
 static const char help_message[] = "Print a usage message.";
 static const char input_folders_message[] = "Required. Comma separated paths to the input folders with IRs";
+static const char local_cache_message[] = "Optional. Comma separated paths to the local cache folders with IRs";
 static const char output_folder_message[] = "Required. Path to the output folders where to serialize IRs";
 static const char path_regex_message[] = "Optional. regular expression to be applied in input "
                                          "folders recursive discovery";
@@ -21,7 +22,8 @@ static const char eliminate_dynamism_message[] = "Optional. If specified dynamic
                                                  "and replaced by propagated upper bound values (if possible)";
 
 DEFINE_bool(h, false, help_message);
-DEFINE_string(input_folders, ".", input_folders_message);
+DEFINE_string(input_folders, "", local_cache_message);
+DEFINE_string(local_cache, ".", input_folders_message);
 DEFINE_string(output_folder, "output", output_folder_message);
 DEFINE_string(path_regex, ".*", output_folder_message);
 DEFINE_double(constants_size_threshold, 1., constants_size_threshold_message);
@@ -37,6 +39,7 @@ static void showUsage() {
     std::cout << "\n";
     std::cout << "    -h                                     " << help_message << "\n";
     std::cout << "    --input_folders \"<path>\"             " << input_folders_message << "\n";
+    std::cout << "    --local_cache \"<path>\"               " << input_folders_message << "\n";
     std::cout << "    --output_folder \"<path>\"             " << output_folder_message << "\n";
     std::cout << "    --path_regex \"<path>\"                " << path_regex_message << "\n";
     std::cout << "    --constants_size_threshold \"<value>\" " << constants_size_threshold_message << "\n";
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
index 0bd42bf169d..16f8f55a98b 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp
@@ -19,17 +19,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-int main(int argc, char *argv[]) {
-    uint8_t ret_code = 0;
-
-    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
-    if (FLAGS_h) {
-        showUsage();
-        return 0;
-    }
-    SubgraphsDumper::ClonersMap::constant_size_threshold_mb = FLAGS_constants_size_threshold;
+std::vector<SubgraphsDumper::Model> findModelsInDirs(const std::vector<std::string> &dirs) {
     std::vector<std::string> input_folder_content;
-    std::vector<std::string> dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_input_folders);
     for (const auto &dir : dirs) {
         if (!CommonTestUtils::directoryExists(dir)) {
             std::string msg = "Input directory (" + dir + ") doesn't not exist!";
@@ -51,9 +42,13 @@ int main(int argc, char *argv[]) {
         std::string msg = "Output directory (" + FLAGS_output_folder + ") doesn't not exist!";
         throw std::runtime_error(msg);
     }
+    return models;
+}
 
+void cacheModels(std::unique_ptr<SubgraphsDumper::OPCache> &cache,
+                 uint8_t& ret_code,
+                 const std::vector<SubgraphsDumper::Model>& models) {
     auto ie = InferenceEngine::Core();
-    auto cache = SubgraphsDumper::OPCache::make_cache();
     time_t rawtime;
     struct tm *timeinfo;
     char buffer[20];
@@ -92,6 +87,27 @@ int main(int argc, char *argv[]) {
             }
         }
     }
+}
+
+
+int main(int argc, char *argv[]) {
+    uint8_t ret_code = 0;
+
+    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
+    if (FLAGS_h) {
+        showUsage();
+        return 0;
+    }
+    SubgraphsDumper::ClonersMap::constant_size_threshold_mb = FLAGS_constants_size_threshold;
+
+    std::vector<std::string> local_cache_dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_local_cache);
+    std::vector<std::string> dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_input_folders);
+    auto cachedOps = findModelsInDirs(local_cache_dirs);
+    auto models = findModelsInDirs(dirs);
+
+    auto cache = SubgraphsDumper::OPCache::make_cache();
+    cacheModels(cache, ret_code, cachedOps);
+    cacheModels(cache, ret_code, models);
     cache->serialize_cached_ops(FLAGS_output_folder);
 
     return ret_code;

From 9e34622ac1f899b3c6ddd151aadb3696ffc961eb Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Tue, 8 Jun 2021 15:29:30 +0300
Subject: [PATCH 28/41] [LPT] Concat precision selection fix (#6069)

---
 .../src/concat.cpp                            |  38 ++-
 .../src/concat_multi_channels.cpp             |  15 +-
 ...ate_precision_selection_transformation.cpp | 317 ++++++++++++++++++
 .../simple_low_precision_transformer.cpp      |  38 ++-
 .../simple_low_precision_transformer.hpp      |  16 +-
 .../lpt_ngraph_functions/concat_function.hpp  |  19 ++
 .../src/concat_function.cpp                   | 123 +++++++
 7 files changed, 542 insertions(+), 24 deletions(-)
 create mode 100644 inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp

diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 4988e29b1e2..f6d860ed172 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -43,19 +43,21 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         return false;
     }
 
-    // precisions can be different
+    // Concat operations precision is defined:
+    // 1. consumers after Concat
+    // 2. FakeQuantize precisions without zero point
     ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0];
     std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer.shared_from_this());
     if (!NetworkHelper::isQuantizeSupported(fq)) {
         return false;
     }
-
-    std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
-    fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
-    if (concatParentsChildrensPrecisions.empty()) {
+    DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
+    if (dataPrecision.precision == ngraph::element::undefined) {
         return false;
     }
 
+    std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
+
     for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
         fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
         if (fq == nullptr) {
@@ -72,20 +74,28 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
         if (quantizationDetails.inputHighValues.size() != 1ul) {
             return false;
         }
-        std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
-        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
-        concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
 
-        if (concatParentsChildrensPrecisions.empty()) {
+        // define concatenation operation consumers precisions
+        std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
+        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions);
+        concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
+        if (concatChildrenPrecisions.empty()) {
             return false;
         }
+
+        // define FakeQuantize precisions without zero point
+        const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
+        if (dataPrecision2.precision == ngraph::element::undefined) {
+            return false;
+        }
+
+        if (dataPrecision.precision != dataPrecision2.precision) {
+            dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
+        }
     }
 
-    DataPrecision dataPrecision;
-    if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
-        dataPrecision = DataPrecision(element::i8);
-    } else {
-        dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
+    if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
+        dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
     }
 
     std::vector<QuantizationDetails> quantizationLayersDetails;
diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
index dc81d51cd71..e36c2b5aa74 100644
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@@ -64,14 +64,23 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context
 
     DataPrecision dataPrecision;
     {
+        std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
         for (auto quantizationLayer : subgraph.quantizationLayers) {
             std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer->shared_from_this());
             if (!NetworkHelper::isQuantizeSupported(fq)) {
                 return false;
             }
 
-            const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
+            // define concatenation operation consumers precisions
+            std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
+            fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions);
+            concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
+            if (concatChildrenPrecisions.empty()) {
+                return false;
+            }
 
+            // define FakeQuantize precisions without zero point
+            const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
             if (dataPrecision.precision == ngraph::element::undefined) {
                 dataPrecision = tmp;
                 continue;
@@ -81,6 +90,10 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context
                 dataPrecision = tmp;
             }
         }
+
+        if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
+            dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
+        }
     }
 
     for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp
new file mode 100644
index 00000000000..0d6b29d5fe5
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_intermediate_precision_selection_transformation.cpp
@@ -0,0 +1,317 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <string>
+#include <sstream>
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/transformer.hpp>
+#include <low_precision/avg_pool.hpp>
+#include <low_precision/concat.hpp>
+#include <low_precision/concat_multi_channels.hpp>
+#include <low_precision/max_pool.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "lpt_ngraph_functions/concat_function.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "simple_low_precision_transformer.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+namespace {
+
+class ConcatTransformationActualValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize1;
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize2;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationActualValues& values) {
+    return out << "_" << values.fakeQuantize1 << "_" << values.fakeQuantize2;
+}
+
+class ConcatTransformationResultValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize1;
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize2;
+    ngraph::element::Type precisionBeforeOp;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
+    ngraph::element::Type precisionAfterOperation;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter2;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationResultValues& values) {
+    return out << "_" <<
+        values.fakeQuantize1 << "_" <<
+        values.fakeQuantize2 << "_" <<
+        values.dequantizationAfter1 << "_" <<
+        values.dequantizationAfter2;
+}
+
+class ConcatTransformationTestValues {
+public:
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    bool multiChannels;
+    ConcatTransformationActualValues actual;
+    ConcatTransformationResultValues result;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationTestValues& values) {
+    return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result;
+}
+
+typedef std::tuple <
+    ngraph::element::Type,
+    ngraph::Shape,
+    ConcatTransformationTestValues
+> ConcatTransformationParams;
+
+class ConcatWithIntermediatePrecisionSelectionTransformation : public LayerTransformation, public testing::WithParamInterface<ConcatTransformationParams> {
+public:
+    void SetUp() override {
+        const ngraph::element::Type precision = std::get<0>(GetParam());
+        const ngraph::Shape shape = std::get<1>(GetParam());
+        ConcatTransformationTestValues testValues = std::get<2>(GetParam());
+
+        actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginalWithIntermediateAvgPool(
+            precision,
+            shape,
+            testValues.actual.fakeQuantize1,
+            testValues.actual.fakeQuantize2);
+
+        SimpleLowPrecisionTransformer transform;
+        if (testValues.multiChannels) {
+            transform.addBranchSpecific<ngraph::pass::low_precision::ConcatMultiChannelsTransformation, ngraph::opset1::Concat>(testValues.params);
+        } else {
+            transform.addBranchSpecific<ngraph::pass::low_precision::ConcatTransformation, ngraph::opset1::Concat>(testValues.params);
+        }
+        transform.add<ngraph::pass::low_precision::MaxPoolTransformation, ngraph::opset1::MaxPool>(testValues.params);
+        transform.add<ngraph::pass::low_precision::AvgPoolTransformation, ngraph::opset1::AvgPool>(testValues.params);
+        transform.transform(actualFunction);
+
+        referenceFunction = ngraph::builder::subgraph::ConcatFunction::getReferenceWithIntermediateAvgPool(
+            precision,
+            shape,
+            testValues.result.fakeQuantize1,
+            testValues.result.fakeQuantize2,
+            testValues.result.precisionBeforeOp,
+            testValues.result.dequantizationBefore1,
+            testValues.result.dequantizationBefore2,
+            testValues.result.precisionAfterOperation,
+            testValues.result.dequantizationAfter1,
+            testValues.result.dequantizationAfter2);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<ConcatTransformationParams> obj) {
+        const ngraph::element::Type precision = std::get<0>(obj.param);
+        const ngraph::Shape shape = std::get<1>(obj.param);
+        const ConcatTransformationTestValues testValues = std::get<2>(obj.param);
+
+        std::ostringstream result;
+        result <<
+            LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" <<
+            (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
+            testValues.actual << "_" <<
+            testValues.result << "_";
+        return result.str();
+    }
+};
+
+TEST_P(ConcatWithIntermediatePrecisionSelectionTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+    auto res = compare_functions(referenceFunction, actualFunction, true, false, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+const std::vector<ngraph::element::Type> precisions = {
+    ngraph::element::f32,
+    // ngraph::element::f16
+};
+
+const std::vector<ConcatTransformationTestValues> testValues = {
+    // Concat: FakeQuantize operations with signed intervals but consumer requires U8
+    {
+        LayerTransformation::createParamsU8I8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {64.f}, {192.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, { 128.f }, { 0.01f } },
+            { {}, { 128.f }, { 0.01f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with unsigned intervals but consumer requires I8
+    {
+        LayerTransformation::createParamsI8I8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {-128.f}, { -0.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, { -128.f }, { 0.01f } },
+            { {}, { -128.f }, { 0.01f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with signed intervals but consumer requires U8
+    {
+        LayerTransformation::createParamsU8I8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {0.f}, { 255.f} },
+            ngraph::element::u8,
+            {},
+            {},
+            ngraph::element::u8,
+            { ngraph::element::f32, { 128.f }, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, { 128.f }, { 0.005f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with unsigned intervals but consumer requires I8
+    {
+        LayerTransformation::createParamsI8I8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {-128.f}, { 127.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, { -128.f }, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, { -128.f }, { 0.005f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with unsigned intervals, no consumer limitations: FQ were decomposed to U8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, { 128.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, {}, { 0.01f } },
+            { {}, {}, { 0.01f } }
+        }
+    },
+
+    // Concat: FakeQuantize operations with signed intervals, no consumer limitations: FQ were decomposed to I8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        false,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-64.f}, {64.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, {}, { 0.01f } },
+            { {}, {}, { 0.01f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with unsigned intervals, no consumer limitations: FQ were decomposed to U8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {255.f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {255.f} },
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, {}, { 0.005f } }
+        }
+    },
+
+    // ConcatMultichannel: FakeQuantize operations with signed intervals, no consumer limitations: FQ were decomposed to I8 precision
+    {
+        LayerTransformation::createParamsU8I8AndI8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-1.28f}, {1.27f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-1.28f / 2.f}, {1.27f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {-1.28f}, {1.27f}, {-128.f}, {127.f} },
+            { 256ul, ngraph::Shape({}), {-1.28f / 2.f}, {1.27f / 2.f}, {-128.f}, {127.f} },
+            ngraph::element::i8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::i8,
+            { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { {}, {}, { 0.005f } }
+        }
+    }
+};
+
+const std::vector<ngraph::Shape> shapes = {
+    { 1, 3, 9, 9 },
+    { 4, 3, 9, 9 }
+};
+
+INSTANTIATE_TEST_CASE_P(
+    smoke_LPT,
+    ConcatWithIntermediatePrecisionSelectionTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(precisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::ValuesIn(testValues)),
+    ConcatWithIntermediatePrecisionSelectionTransformation::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
index 8ee17c8e39b..3c48d56be5b 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
@@ -49,19 +49,41 @@ bool SimpleLowPrecisionTransformer::isPrecisionPreserved(const std::shared_ptr<n
 }
 
 void SimpleLowPrecisionTransformer::transform(std::shared_ptr<ngraph::Function>& function) {
+    // initialization
+    for (auto it : branchSpecificTransformations) {
+        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+        transformation->setParamsManager(this);
+        transformation->setLayerTransformationsManager(this);
+    }
+
+    for (auto it : transformations) {
+        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+        transformation->setParamsManager(this);
+        transformation->setLayerTransformationsManager(this);
+    }
+
+    // transformation
     {
         ngraph::pass::low_precision::TypeRelaxedReplacer pass;
         pass.run_on_function(function);
     }
 
     ngraph::pass::low_precision::TransformationContext context(function);
-    GraphRewrite pass;
-    for (auto it : transformations) {
-        ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
-
-        transformation->setParamsManager(this);
-        transformation->setLayerTransformationsManager(this);
-        transformation->registerMatcherIn(pass, context);
+    {
+        GraphRewrite pass;
+        for (auto it : branchSpecificTransformations) {
+            ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+            transformation->registerMatcherIn(pass, context);
+        }
+        pass.run_on_function(function);
+    }
+
+    {
+        GraphRewrite pass;
+        for (auto it : transformations) {
+            ngraph::pass::low_precision::LayerTransformationPtr transformation = it.second;
+            transformation->registerMatcherIn(pass, context);
+        }
+        pass.run_on_function(function);
     }
-    pass.run_on_function(function);
 }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
index b4bf3a9c978..c9582adf0f0 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
@@ -28,9 +28,22 @@ public:
     bool isQuantized(const std::shared_ptr<ngraph::Node>& layer) const noexcept override;
     bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& layer) const noexcept override;
 
+    template <class T, class Operation>
+    ngraph::pass::low_precision::LayerTransformationPtr addBranchSpecific(const ngraph::pass::low_precision::LayerTransformation::Params& params) {
+        const std::string typeName = ngraph::pass::low_precision::LowPrecisionTransformations::getType<Operation>();
+
+        const auto it = branchSpecificTransformations.find(typeName);
+        if (it != branchSpecificTransformations.end()) {
+            branchSpecificTransformations.erase(it);
+        }
+
+        auto transformation = std::make_shared<T>(params);
+        branchSpecificTransformations.emplace(typeName, transformation);
+        return transformation;
+    }
+
     template <class T, class Operation>
     ngraph::pass::low_precision::LayerTransformationPtr add(const ngraph::pass::low_precision::LayerTransformation::Params& params) {
-        // const std::string typeName = typeid(ngraph::op::TypeRelaxed<Operation>).name();
         const std::string typeName = ngraph::pass::low_precision::LowPrecisionTransformations::getType<Operation>();
 
         const auto it = transformations.find(typeName);
@@ -46,5 +59,6 @@ public:
     void transform(std::shared_ptr<ngraph::Function>& function);
 
 private:
+    std::map<std::string, ngraph::pass::low_precision::LayerTransformationPtr> branchSpecificTransformations;
     std::map<std::string, ngraph::pass::low_precision::LayerTransformationPtr> transformations;
 };
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
index c0c1686ca55..f70f653efe2 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
@@ -51,6 +51,12 @@ public:
         const FakeQuantizeOnData& fqOnData1,
         const FakeQuantizeOnData& fqOnData2);
 
+    static std::shared_ptr<ngraph::Function> getOriginalWithIntermediateAvgPool(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fqOnData1,
+        const FakeQuantizeOnData& fqOnData2);
+
     static std::shared_ptr<ngraph::Function> getOriginalWithSplitedIntermediate(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
@@ -134,6 +140,7 @@ public:
         const std::string& neighborType,
         const std::string& additionalLayer);
 
+    // TODO: refactor: dequantizationBefore2 <=> dequantizationOperations2
     static std::shared_ptr<ngraph::Function> getReferenceWithIntermediate(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
@@ -142,6 +149,18 @@ public:
         const FakeQuantizeOnData& fqOnData2,
         const ngraph::element::Type precisionBeforeOp,
         const DequantizationOperations& dequantizationBefore1,
+        const DequantizationOperations& dequantizationOperations2,
+        const ngraph::element::Type precisionAfterOperation,
+        const DequantizationOperations& dequantizationOperations1,
+        const DequantizationOperations& dequantizationBefore2);
+
+    static std::shared_ptr<ngraph::Function> getReferenceWithIntermediateAvgPool(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fqOnData1,
+        const FakeQuantizeOnData& fqOnData2,
+        const ngraph::element::Type precisionBeforeOp,
+        const DequantizationOperations& dequantizationBefore1,
         const DequantizationOperations& dequantizationBefore2,
         const ngraph::element::Type precisionAfterOperation,
         const DequantizationOperations& dequantizationOperations1,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
index 15108abb73e..37387977eb7 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
@@ -272,6 +272,58 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithIntermediate(
     return function;
 }
 
+std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithIntermediateAvgPool(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData1,
+    const FakeQuantizeOnData& fqOnData2) {
+    const std::vector<size_t> inputShape1 = { inputShape[0], inputShape[1], inputShape[2] - 2, inputShape[3] - 2 };
+
+    const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
+    input1->set_friendly_name("input1");
+    const auto fakeQuantize1 = makeFakeQuantize(input1, precision, fqOnData1);
+    fakeQuantize1->set_friendly_name("fakeQuantize1");
+
+    const std::vector<size_t> inputShape2 = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] };
+    const auto input2 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape2));
+    input2->set_friendly_name("input2");
+
+    const auto fakeQuantize2 = makeFakeQuantize(input2, precision, fqOnData2);
+    fakeQuantize2->set_friendly_name("fakeQuantize2");
+
+    std::shared_ptr<Node> intermediateOp = makeMaxPool(fakeQuantize2->output(0), { 3, 3 });
+    intermediateOp->set_friendly_name("intermediate");
+
+    const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
+        ngraph::OutputVector{ fakeQuantize1->output(0), intermediateOp->output(0) }, 1);
+    concat->set_friendly_name("concat");
+
+    auto& rtInfo = concat->get_rt_info();
+    rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("concat");
+
+    std::shared_ptr<Node> parent2 = std::make_shared<ngraph::opset1::AvgPool>(
+        intermediateOp,
+        Strides{ 1, 1 },
+        Shape{ 1, 1 },
+        Shape{ 0, 0 },
+        Shape{ 2, 2 },
+        true,
+        op::RoundingType::FLOOR);
+    parent2->set_friendly_name("avgPool");
+
+    ngraph::ResultVector results {
+        std::make_shared<ngraph::opset1::Result>(concat),
+        std::make_shared<ngraph::opset1::Result>(parent2)
+    };
+
+    std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
+        results,
+        ngraph::ParameterVector{ input1, input2 },
+        "ConcatWithIntermediateTransformation");
+
+    return function;
+}
+
 std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithSplitedIntermediate(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
@@ -1056,6 +1108,77 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithIntermediate(
     return function;
 }
 
+std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithIntermediateAvgPool(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData1,
+    const FakeQuantizeOnData& fqOnData2,
+    const ngraph::element::Type precisionBeforeOp,
+    const DequantizationOperations& dequantizationBefore1,
+    const DequantizationOperations& dequantizationBefore2,
+    const ngraph::element::Type precisionAfterOperation,
+    const DequantizationOperations& dequantizationAfter1,
+    const DequantizationOperations& dequantizationAfter2) {
+    const std::vector<size_t> inputShape1 = { inputShape[0], inputShape[1], inputShape[2] - 2, inputShape[3] - 2};
+    const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
+    input1->set_friendly_name("input1");
+
+    const auto fakeQuantize1 = makeFakeQuantizeTypeRelaxed(input1, precision, fqOnData1);
+    low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize1, precisionBeforeOp);
+    fakeQuantize1->set_friendly_name("fakeQuantize1");
+    const auto deqBefore1 = makeDequantization(fakeQuantize1, dequantizationBefore1);
+
+    const std::vector<size_t> inputShape2 = { inputShape[0], inputShape[1], inputShape[2], inputShape[3] };
+    const auto input2 = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape2));
+    input2->set_friendly_name("input2");
+
+    const auto fakeQuantize2 = makeFakeQuantizeTypeRelaxed(input2, precision, fqOnData2);
+    low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize2, precisionBeforeOp);
+    fakeQuantize2->set_friendly_name("fakeQuantize2");
+    const auto deqBefore2 = makeDequantization(fakeQuantize2, dequantizationBefore2);
+
+    std::shared_ptr<Node> intermediateOp  = makeMaxPool(deqBefore2, { 3, 3 });
+    intermediateOp->set_friendly_name("intermediate");
+
+    const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
+        ngraph::OutputVector { deqBefore1, intermediateOp },
+        1);
+    concat->set_friendly_name("concat");
+    low_precision::NetworkHelper::setOutDataPrecision(concat, precisionAfterOperation);
+
+    auto& rtInfo = concat->get_rt_info();
+    rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("concat");
+
+    const std::shared_ptr<ngraph::Node> parent1 = makeDequantization(concat, dequantizationAfter1);
+    parent1->set_friendly_name("concat");
+
+    std::shared_ptr<Node> parent2 = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::AvgPool>>(
+        std::vector<ngraph::element::Type>{ element::f32, element::f32 },
+        std::vector<ngraph::element::Type>{ element::f32 },
+        ngraph::op::TemporaryReplaceOutputType(intermediateOp, element::f32).get(),
+        Strides{ 1, 1 },
+        Shape{ 1, 1 },
+        Shape{ 0, 0 },
+        Shape{ 2, 2 },
+        true,
+        op::RoundingType::FLOOR);
+    parent2->set_friendly_name("avgPool");
+
+    parent2 = makeDequantization(parent2, dequantizationAfter2);
+
+    ngraph::ResultVector results {
+        std::make_shared<ngraph::opset1::Result>(parent1),
+        std::make_shared<ngraph::opset1::Result>(parent2)
+    };
+
+    std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
+        results,
+        ngraph::ParameterVector{ input1, input2 },
+        "ConcatWithIntermediateTransformation");
+
+    return function;
+}
+
 std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithSplitedIntermediate(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,

From de2a163363eb885dbfe7367d4c6349204d1641de Mon Sep 17 00:00:00 2001
From: Svetlana Dolinina <svetlana.a.dolinina@intel.com>
Date: Tue, 8 Jun 2021 16:09:04 +0300
Subject: [PATCH 29/41] Error during Caffe model conversion with Python3.8
 (#6056)

* added one more possible reason for Caffe error during caffemodel parser in code + add error description to FAQ
Also added MxNet error to FAQ based on Kate Generalova wording

* review fixes

* wording polishing

* wording polishing

* review fixes

* review fixes

* Update docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>

* Update docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>

* Update docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>

* Update docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>

* review fixes

Co-authored-by: Tatiana Savina <tatiana.savina@intel.com>
---
 docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md | 14 +++++++++++++-
 model-optimizer/mo/front/caffe/loader.py        |  8 +++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
index f9aef04a0a9..bb599cf93b5 100644
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@@ -627,4 +627,16 @@ It means that you trying to convert the topology which contains '_contrib_box_nm
   });
 </script>
 
-\endhtmlonly
\ No newline at end of file
+\endhtmlonly
+
+#### 103. What does the message "ModelOptimizer is not able to parse *.caffemodel" mean? <a name="question-103"></a>
+
+If a '*.caffemodel' file exists and it is correct, the error possibly occured due to the use of Python protobuf implementation. In some cases, it shows error message during model parsing, for example: "'utf-8' codec can't decode byte 0xe0 in position 4: invalid continuation byte in field: mo_caffe.SpatialTransformerParameter.transform_type". You can either use Python 3.6/3.7 or build 'cpp' implementation of protobuf yourself for your version of Python. For the complete instructions about building `protobuf` from sources, see the appropriate section in [Converting a Model to Intermediate Representation](Config_Model_Optimizer.md).
+
+#### 104. What does the message "SyntaxError: 'yield' inside list comprehension" during MxNet\* model conversion mean? <a name="question-104"></a>
+
+The issue "SyntaxError: 'yield' inside list comprehension" might occur during converting MXNet\* models (mobilefacedet-v1-mxnet, brain-tumor-segmentation-0001) on Windows* platform with Python* 3.8 environment. This issue is caused by API changes for `yield expression` in Python 3.8.
+The following workarounds are suggested to resolve this issue:
+1. Use Python 3.6/3.7 to convert MXNet\* models on Windows
+2. Update MXNet: pip install mxnet=1.7.0.post2
+Note that you might have conflicts between previously installed PyPI dependencies.
\ No newline at end of file
diff --git a/model-optimizer/mo/front/caffe/loader.py b/model-optimizer/mo/front/caffe/loader.py
index 2ffca364fb6..14497c6108d 100644
--- a/model-optimizer/mo/front/caffe/loader.py
+++ b/model-optimizer/mo/front/caffe/loader.py
@@ -130,10 +130,16 @@ def load_caffe_proto_model(caffe_pb2, proto_path: str, model_path: [str, None] =
                 map = mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ)
                 model.MergeFromString(map)
     except Exception as e:
+        third_point = ''
+        if api_implementation._implementation_type == 'python':
+            third_point = '      3. Python protobuf implementation was used. Some models can\'t be converted ' + \
+                          ' in this configuration. Please, use Python version with existing cpp implementation of ' + \
+                          'protobuf library or build it by yourself\n' + refer_to_faq_msg(103)
         log.error('Exception message: {}\n\n'.format(e) +
                   '    Possible reasons:\n' +
                   '      1. {} does not exist\n'.format(model_path) +
-                  '      2. {} does not have a valid structure\n'.format(model_path), extra={'framework_error': True})
+                  '      2. {} does not have a valid structure\n'.format(model_path) + third_point,
+                  extra={'framework_error': True})
         raise FrameworkError('Model Optimizer is not able to parse {}'.format(model_path)) from e
 
     return proto, model

From a36d6a0f0678fd0c0d637df9ea3f1b34682643cc Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 8 Jun 2021 19:33:51 +0300
Subject: [PATCH 30/41] Used setMeanImageForChannel (#6076)

---
 .../src/readers/ir_reader/ie_ir_parser.cpp            | 11 +++++------
 .../plugin/shared/include/behavior/set_preprocess.hpp | 11 ++++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index d26132abf2e..892c2e6bae4 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -1011,8 +1011,6 @@ void V10Parser::parsePreProcess(
     if (!meanSegmentPrecision || meanSegmentPrecision == Precision::MIXED)
         IE_THROW() << "mean blob defined without specifying precision.";
 
-    InferenceEngine::PreProcessChannel::Ptr preProcessChannel;
-
     int lastChanNo = -1;
     std::unordered_set<int> idsForMeanImage;
 
@@ -1022,7 +1020,6 @@ void V10Parser::parsePreProcess(
             IE_THROW() << "Pre-process channel id invalid: " << chanNo;
         }
         lastChanNo = chanNo;
-        preProcessChannel = pp[chanNo];
 
         auto meanNode = chan.child("mean");
         if (!meanNode.empty()) {
@@ -1038,13 +1035,15 @@ void V10Parser::parsePreProcess(
                                        << " extpecting " << width << " x " << height << " x "
                                        << meanSegmentPrecision.size();
                 }
-                preProcessChannel->meanData = make_blob_with_precision(
+                auto meanData = make_blob_with_precision(
                     TensorDesc(meanSegmentPrecision, {height, width}, Layout::HW));
-                preProcessChannel->meanData->allocate();
-                auto lockedMem = preProcessChannel->meanData->buffer();
+                meanData->allocate();
+                auto lockedMem = meanData->buffer();
                 char* data = lockedMem.as<char*>();
                 uint8_t* src_data = weights->cbuffer().as<uint8_t*>() + offset;
                 memcpy(data, src_data, size);
+
+                pp.setMeanImageForChannel(meanData, chanNo);
             }
         }
     }
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
index de442f12c21..ff294866858 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
@@ -157,16 +157,17 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessSetBlob) {
     auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
     preProcess.init(3);
     for (size_t i = 0; i < 3; i++) {
-        preProcess[i]->meanData = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
-                                                                                       {10, 10},
-                                                                                       InferenceEngine::Layout::HW));
-        preProcess[i]->meanData->allocate();
-        auto lockedMem = preProcess[i]->meanData->buffer();
+        auto meanData = make_blob_with_precision(
+            InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,  {10, 10},
+            InferenceEngine::Layout::HW));
+        meanData->allocate();
+        auto lockedMem = meanData->buffer();
         auto* data = lockedMem.as<float *>();
         for (size_t j = 0; j < 100; j++) {
             data[j] = 0;
             data[j] -= i * 100 + j;
         }
+        ASSERT_NO_THROW(preProcess.setMeanImageForChannel(meanData, i));
     }
     preProcess.setVariant(InferenceEngine::MEAN_IMAGE);
     // Load CNNNetwork to target plugins

From d171b5c4b76151b98d7935193e61549b75f4796b Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Tue, 8 Jun 2021 19:55:36 +0300
Subject: [PATCH 31/41] Removed version.in.hpp from nGraph package (#6081)

---
 ngraph/core/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ngraph/core/CMakeLists.txt b/ngraph/core/CMakeLists.txt
index fa3a91c34f8..72272046b01 100644
--- a/ngraph/core/CMakeLists.txt
+++ b/ngraph/core/CMakeLists.txt
@@ -105,6 +105,7 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
     FILES_MATCHING
         PATTERN "*.hpp"
         PATTERN "*.h"
+        PATTERN "*version.in.hpp" EXCLUDE
 )
 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/ngraph/version.hpp
     DESTINATION ${NGRAPH_INSTALL_INCLUDE}/ngraph

From 246932a5d358364104b934b8c07f7939407f5319 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 8 Jun 2021 20:24:11 +0300
Subject: [PATCH 32/41] Py code style (#6086)

* GitHub CI: Enable Python for Code style

* Update formatting

* Fix

* Add cython

* Add upgrade pip

* Update to src/requirements-dev.txt

* Fixed clang code style

Co-authored-by: azhogov <alexander.zhogov@intel.com>
---
 .github/org_control/check_pr.py                              | 2 +-
 .github/workflows/code_style.yml                             | 5 ++++-
 .../python/src/openvino/inference_engine/CMakeLists.txt      | 3 ++-
 .../src/openvino/offline_transformations/CMakeLists.txt      | 3 ++-
 .../ie_bridges/python/src/openvino/test_utils/CMakeLists.txt | 3 ++-
 ngraph/python/src/pyngraph/util.cpp                          | 2 +-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/org_control/check_pr.py b/.github/org_control/check_pr.py
index e0b48832ead..85e5b4e34c0 100644
--- a/.github/org_control/check_pr.py
+++ b/.github/org_control/check_pr.py
@@ -229,7 +229,7 @@ def main():
     if wrong_pulls:
         for pull_number, wrong_commits in wrong_pulls.items():
             print(
-                f"\nERROR: Remove or replace wrong commits in the PR {pull_number}:\n    ",
+                f"\nERROR: Remove or replace wrong commits in the PR {pull_number}:\n   ",
                 "\n    ".join(wrong_commits),
             )
         print(
diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml
index b538a179339..607fe2cb64a 100644
--- a/.github/workflows/code_style.yml
+++ b/.github/workflows/code_style.yml
@@ -15,14 +15,17 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt --assume-yes install libusb-1.0-0-dev
+          python3 -m pip install --upgrade pip
           python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
+          # Add for -DENABLE_PYTHON=ON, no cython
+          python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
 
       # Run cmake with -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT in order to enable codestyle check for ITT collector
       - name: CMake
         run: |
           mkdir build
           cd build
-          cmake -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
+          cmake -DENABLE_PYTHON=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
 
       - name: Check code style
         run: cmake --build build --target clang_format_check_all
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index 1f623fb4833..17b8bf5b9b5 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -77,4 +77,5 @@ install(PROGRAMS __init__.py
         DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine
         COMPONENT ${PYTHON_VERSION})
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
\ No newline at end of file
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
index c6315336ba2..27c9e7bf898 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
@@ -42,7 +42,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     target_compile_options(${TARGET_NAME} PRIVATE "-Wno-error=register")
 endif()
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
 
 # perform copy
 add_custom_command(TARGET ${TARGET_NAME}
diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
index 504125d9823..8367f941d9f 100644
--- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
@@ -48,4 +48,5 @@ add_custom_command(TARGET ${TARGET_NAME}
     COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/test_utils/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/__init__.py
 )
 
-add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
\ No newline at end of file
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
\ No newline at end of file
diff --git a/ngraph/python/src/pyngraph/util.cpp b/ngraph/python/src/pyngraph/util.cpp
index 5178e84fe90..69c0df89dcd 100644
--- a/ngraph/python/src/pyngraph/util.cpp
+++ b/ngraph/python/src/pyngraph/util.cpp
@@ -4,8 +4,8 @@
 
 #include <pybind11/numpy.h>
 
-#include "pyngraph/util.hpp"
 #include "ngraph/validation_util.hpp"
+#include "pyngraph/util.hpp"
 
 namespace py = pybind11;
 

From 57850f0a876a8dfdab0a1fec575c948ca4e3bf09 Mon Sep 17 00:00:00 2001
From: Egor Duplensky <egor.duplenskii@intel.com>
Date: Tue, 8 Jun 2021 22:39:08 +0300
Subject: [PATCH 33/41] [CPU] Allow exec graph serialization to cout or xml via
 env variable (#5967)

---
 inference-engine/src/mkldnn_plugin/config.h   |  8 +++-
 .../src/mkldnn_plugin/mkldnn_edge.cpp         | 14 ++++--
 .../src/mkldnn_plugin/mkldnn_edge.h           | 10 ++--
 .../src/mkldnn_plugin/mkldnn_graph.cpp        | 31 ++++--------
 .../src/mkldnn_plugin/mkldnn_graph.h          |  7 ++-
 .../src/mkldnn_plugin/mkldnn_graph_dumper.cpp | 47 +++++++++++++++++++
 .../src/mkldnn_plugin/mkldnn_graph_dumper.h   |  5 +-
 .../src/mkldnn_plugin/utils/README.md         | 19 ++++++++
 .../mkldnn_plugin/utils/debug_capabilities.h  | 45 ++++++++++++++++--
 .../src/mkldnn_plugin/utils/node_dumper.cpp   | 36 +++++++-------
 .../src/mkldnn_plugin/utils/node_dumper.h     |  7 ++-
 11 files changed, 169 insertions(+), 60 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h
index 01eb0e23c5e..54336d58495 100644
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -4,9 +4,11 @@
 
 #pragma once
 
+#include <threading/ie_istreams_executor.hpp>
+#include "utils/debug_capabilities.h"
+
 #include <string>
 #include <map>
-#include <threading/ie_istreams_executor.hpp>
 
 namespace MKLDNNPlugin {
 
@@ -35,6 +37,10 @@ struct Config {
     bool manualEnforceBF16 = false;
 #endif
 
+#ifdef CPU_DEBUG_CAPS
+    DebugCaps::Config debugCaps;
+#endif
+
     void readProperties(const std::map<std::string, std::string> &config);
     void updateProperties();
     std::map<std::string, std::string> _config;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index b5ff60efed0..1415dc1ae95 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -32,7 +32,7 @@ bool MKLDNNEdge::isUseExternalMemory() const {
     return externalMemoryPtr;
 }
 
-bool MKLDNNEdge::isDropped() {
+bool MKLDNNEdge::isDropped() const {
     bool not_in_parent = true;
     bool not_in_child = true;
 
@@ -124,6 +124,10 @@ void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) {
     status = Status::Allocated;
 }
 
+const InferenceEngine::TensorDesc& MKLDNNEdge::getInputDescRO() const {
+    return inputDesc;
+}
+
 InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
     if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         inputDesc = getSpecifiedInputDesc({});
@@ -131,6 +135,10 @@ InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
     return inputDesc;
 }
 
+const InferenceEngine::TensorDesc& MKLDNNEdge::getOutputDescRO() const {
+    return outputDesc;
+}
+
 InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() {
     if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         outputDesc = getSpecifiedOutputDesc({});
@@ -145,11 +153,11 @@ InferenceEngine::TensorDesc MKLDNNEdge::getDesc() {
     return getInputDesc();
 }
 
-int MKLDNNEdge::getInputNum() {
+int MKLDNNEdge::getInputNum() const {
     return parent_port;
 }
 
-int MKLDNNEdge::getOutputNum() {
+int MKLDNNEdge::getOutputNum() const {
     return child_port;
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
index c9884caf56e..63e2a16414d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -61,11 +61,11 @@ public:
     MKLDNNMemoryPtr& getMemoryPtr();
 
     bool needReorder();
-    bool isDropped();
+    bool isDropped() const;
     bool isUseExternalMemory() const;
 
-    int getInputNum();
-    int getOutputNum();
+    int getInputNum() const;
+    int getOutputNum() const;
 
     void setChildPort(const size_t port) { child_port = port; }
 
@@ -73,10 +73,12 @@ public:
     MKLDNNEdgePtr getSharedEdge() const;
     MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
 
+    const InferenceEngine::TensorDesc& getInputDescRO() const;
+    const InferenceEngine::TensorDesc& getOutputDescRO() const;
+
 private:
     std::string name();
 
-private:
     std::weak_ptr<MKLDNNNode> parent;
     std::weak_ptr<MKLDNNNode> child;
     int parent_port;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index b92afb8a9f0..efc99bddb84 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -78,7 +78,10 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
 
     Replicate(net, extMgr);
     InitGraph();
+
     status = Ready;
+
+    ENABLE_CPU_DEBUG_CAP(serialize(*this));
 }
 
 template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
@@ -344,10 +347,6 @@ void MKLDNNGraph::InitGraph() {
         graphNode->cleanup();
     }
 #endif
-
-#if !defined(NDEBUG) && defined(PRINT_GRAPH_INFO)
-    printGraphInfo();
-#endif
     ExecuteConstantNodesOnly();
 }
 
@@ -809,7 +808,7 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
 
     mkldnn::stream stream(eng);
 
-    ENABLE_CPU_DEBUG_CAP(NodeDumper nd(infer_count));
+    ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count));
 
     for (int i = 0; i < graphNodes.size(); i++) {
         if (request != nullptr) {
@@ -954,6 +953,10 @@ void MKLDNNGraph::setConfig(const Config &cfg) {
     config = cfg;
 }
 
+const Config& MKLDNNGraph::getConfig() const {
+    return config;
+}
+
 void MKLDNNGraph::setProperty(const std::map<std::string, std::string>& properties) {
     config.readProperties(properties);
 }
@@ -1217,21 +1220,3 @@ void MKLDNNGraph::EnforceBF16() {
 InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
     return dump_graph_as_ie_ngraph_net(*this);
 }
-
-void MKLDNNGraph::printGraphInfo() const {
-    for (auto &graphNode : graphNodes) {
-        std::cout << "name: " << graphNode->getName() << " [ ";
-        if (graphNode->parentEdges.size() > 0) {
-            auto prnt_out_desc = graphNode->parentEdges[0].lock()->getOutputDesc();
-            std::cout << "in: " << prnt_out_desc.getPrecision().name()
-                      << "/l=" << prnt_out_desc.getLayout()
-                      << "; ";
-        }
-        if (graphNode->childEdges.size() > 0) {
-            auto chld_in_desc = graphNode->childEdges[0].lock()->getInputDesc();
-            std::cout << "out: " << chld_in_desc.getPrecision().name()
-                      << "/l=" << chld_in_desc.getLayout();
-        }
-        std::cout << " ]"  << std::endl;
-    }
-}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index c3fcb0d5c9c..1b54f71e88c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -39,6 +39,8 @@ public:
     }
 
     void setConfig(const Config &cfg);
+    const Config& getConfig() const;
+
     void setProperty(const std::map<std::string, std::string> &properties);
     Config getProperty() const;
 
@@ -59,6 +61,10 @@ public:
 
     void Infer(MKLDNNInferRequest* request = nullptr, int batch = -1);
 
+    const std::vector<MKLDNNNodePtr>& GetNodes() const {
+        return graphNodes;
+    }
+
     std::vector<MKLDNNNodePtr>& GetNodes() {
         return graphNodes;
     }
@@ -219,7 +225,6 @@ protected:
 
 private:
     void EnforceBF16();
-    void printGraphInfo() const;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
index 14d2f6a28ae..ac4bfff6b6d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@@ -5,9 +5,11 @@
 #include "mkldnn_graph_dumper.h"
 #include <ie_ngraph_utils.hpp>
 #include "exec_graph_info.hpp"
+#include "ie_common.h"
 #include "mkldnn_debug.h"
 #include <ngraph/variant.hpp>
 #include "ngraph/ngraph.hpp"
+#include "utils/debug_capabilities.h"
 
 #include <vector>
 #include <string>
@@ -18,6 +20,9 @@ using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
+void serializeToCout(const MKLDNNGraph &graph);
+void serializeToXML(const MKLDNNGraph &graph, const std::string& path);
+
 namespace {
 
 std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &node) {
@@ -207,4 +212,46 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph
     return net;
 }
 
+#ifdef CPU_DEBUG_CAPS
+void serialize(const MKLDNNGraph &graph) {
+    const std::string& path = graph.getConfig().debugCaps.execGraphPath;
+
+    if (path.empty())
+        return;
+
+    if (path == "cout")
+        serializeToCout(graph);
+    else if (!path.compare(path.size() - 4, 4, ".xml"))
+        serializeToXML(graph, path);
+    else
+        IE_THROW() << "Unknown serialize format. Should be either 'cout' or '*.xml'. Got " << path;
+}
+
+void serializeToXML(const MKLDNNGraph &graph, const std::string& path) {
+    if (path.empty())
+        return;
+
+    graph.dump().serialize(path);
+}
+
+void serializeToCout(const MKLDNNGraph &graph) {
+    for (const auto& node : graph.GetNodes()) {
+        std::cout << "name: " << node->getName() << " [ ";
+        if (!node->getParentEdges().empty()) {
+            const auto& parentEdge = *(node->getParentEdges()[0].lock());
+            const auto& prnt_out_desc = parentEdge.getOutputDescRO();
+            std::cout << "in: " << prnt_out_desc.getPrecision().name()
+                      << "/l=" << prnt_out_desc.getLayout()
+                      << "; ";
+        }
+        if (!node->getChildEdges().empty()) {
+            const auto& childEdge = *(node->getChildEdges()[0].lock());
+            const auto& chld_in_desc = childEdge.getInputDescRO();
+            std::cout << "out: " << chld_in_desc.getPrecision().name()
+                      << "/l=" << chld_in_desc.getLayout();
+        }
+        std::cout << " ]"  << std::endl;
+    }
+}
+#endif
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
index d954695baaa..597568224f3 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
@@ -6,11 +6,14 @@
 
 #include "cpp/ie_cnn_network.h"
 #include "mkldnn_graph.h"
+#include "utils/debug_capabilities.h"
 
 #include <memory>
 
 namespace MKLDNNPlugin {
 
 InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
-
+#ifdef CPU_DEBUG_CAPS
+void serialize(const MKLDNNGraph &graph);
+#endif // CPU_DEBUG_CAPS
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/README.md b/inference-engine/src/mkldnn_plugin/utils/README.md
index af50c8d5015..d3b98f1cb48 100644
--- a/inference-engine/src/mkldnn_plugin/utils/README.md
+++ b/inference-engine/src/mkldnn_plugin/utils/README.md
@@ -71,3 +71,22 @@ Example:
 ```sh
     OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
 ```
+
+## Graph serialization
+The functionality allows to serialize execution graph using environment variable:
+```sh
+    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
+```
+
+Possible serialization options:
+* cout
+
+    Serialize to console output
+* \<path\>.xml
+
+    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
+* \<path\>.dot
+
+    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
+
+
diff --git a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
index 64af835064d..be6e7a830c2 100644
--- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@@ -4,7 +4,44 @@
 #pragma once
 
 #ifdef CPU_DEBUG_CAPS
-#   define ENABLE_CPU_DEBUG_CAP(_x) _x;
-#else
-#   define ENABLE_CPU_DEBUG_CAP(_x)
-#endif
+
+#include <map>
+#include <string>
+#include <vector>
+
+#define ENABLE_CPU_DEBUG_CAP(_x) _x;
+
+namespace MKLDNNPlugin {
+namespace DebugCaps {
+
+class Config {
+public:
+    Config() {
+        readParam(blobDumpDir, "OV_CPU_BLOB_DUMP_DIR");
+        readParam(blobDumpFormat, "OV_CPU_BLOB_DUMP_FORMAT");
+        readParam(blobDumpNodeExecId, "OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
+        readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
+        readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
+        readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
+    }
+
+    std::string blobDumpDir;
+    std::string blobDumpFormat;
+    std::string blobDumpNodeExecId;
+    std::string blobDumpNodeType;
+    std::string blobDumpNodeName;
+    std::string execGraphPath;
+
+private:
+    void readParam(std::string& param, const char* envVar) {
+        if (const char* envValue = std::getenv(envVar))
+            param = envValue;
+    }
+};
+
+} // namespace DebugCaps
+} // namespace MKLDNNPlugin
+
+#else // !CPU_DEBUG_CAPS
+#define ENABLE_CPU_DEBUG_CAP(_x)
+#endif // CPU_DEBUG_CAPS
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
index f69551159dc..9f3af44a66a 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -6,9 +6,10 @@
 #include "node_dumper.h"
 
 #include "mkldnn_node.h"
-#include "utils/blob_dump.h"
-
 #include "ie_common.h"
+#include "utils/blob_dump.h"
+#include "utils/debug_capabilities.h"
+
 #include <array>
 #include <regex>
 #include <sstream>
@@ -18,27 +19,24 @@ using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
-NodeDumper::NodeDumper(int _count):
-    count(_count), dumpFormat(DUMP_FORMAT::BIN) {
-    const char* dumpDirEnv = std::getenv("OV_CPU_BLOB_DUMP_DIR");
-    if (dumpDirEnv)
-        dumpDirName = dumpDirEnv;
+NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
+    : dumpFormat(DUMP_FORMAT::BIN)
+    , dumpDirName("mkldnn_dump")
+    , count(_count) {
+    if (!config.blobDumpDir.empty())
+        dumpDirName = config.blobDumpDir;
 
-    const char* dumpFormatEnv = std::getenv("OV_CPU_BLOB_DUMP_FORMAT");
-    if (dumpFormatEnv)
-        dumpFormat = parseDumpFormat(dumpFormatEnv);
+    if (!config.blobDumpFormat.empty())
+        dumpFormat = parseDumpFormat(config.blobDumpFormat);
 
-    const char* filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
-    if (filter)
-        dumpFilters[FILTER::BY_EXEC_ID] = filter;
+    if (!config.blobDumpNodeExecId.empty())
+        dumpFilters[FILTER::BY_EXEC_ID] = config.blobDumpNodeExecId;
 
-    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_TYPE");
-    if (filter)
-        dumpFilters[FILTER::BY_TYPE] = filter;
+    if (!config.blobDumpNodeType.empty())
+        dumpFilters[FILTER::BY_TYPE] = config.blobDumpNodeType;
 
-    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_NAME");
-    if (filter)
-        dumpFilters[FILTER::BY_NAME] = filter;
+    if (!config.blobDumpNodeName.empty())
+        dumpFilters[FILTER::BY_NAME] = config.blobDumpNodeName;
 }
 
 void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
index 7dd1ac1f0c6..0580bee4731 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
@@ -6,6 +6,7 @@
 
 #include "mkldnn_node.h"
 #include "utils/blob_dump.h"
+#include "utils/debug_capabilities.h"
 
 #include <unordered_map>
 #include <string>
@@ -22,7 +23,7 @@ namespace MKLDNNPlugin {
  */
 class NodeDumper {
 public:
-    NodeDumper(int _count);
+    NodeDumper(const DebugCaps::Config& config, const int _count);
 
     void dumpInputBlobs(const MKLDNNNodePtr &node) const;
     void dumpOutputBlobs(const MKLDNNNodePtr &node) const;
@@ -41,11 +42,9 @@ private:
     void formatNodeName(std::string& name) const;
 
     DUMP_FORMAT dumpFormat;
-
+    std::string dumpDirName;
     int count;
 
-    std::string dumpDirName = "mkldnn_dump";
-
     enum FILTER {
         BY_EXEC_ID,
         BY_TYPE,

From f0cf1dffa76dd7c93c2b7e8d8b4b5cf2c39c0f6a Mon Sep 17 00:00:00 2001
From: Andrey Somsikov <andrey.somsikov@intel.com>
Date: Wed, 9 Jun 2021 08:21:03 +0300
Subject: [PATCH 34/41] Add ENABLE_STRICT_DEPENDENCIES for faster parallel
 builds (#5473)

---
 cmake/features.cmake       |  2 ++
 ngraph/test/CMakeLists.txt | 12 ++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/cmake/features.cmake b/cmake/features.cmake
index adb1fad2523..7518c99c868 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -6,6 +6,8 @@ ie_dependent_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON "X8
 
 ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
 
+ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" OFF)
+
 ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
 
 ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index ba3c6a9d052..97f85a3128f 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -286,13 +286,21 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
 if (ENABLE_MKL_DNN)
     message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
     set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
-    list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
+    if (NOT ENABLE_STRICT_DEPENDENCIES)
+        # For convinience add a runtime dependency to build along with this target.
+        # Warning: Parallel build with -GNinja may not be efficient.
+        list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
+    endif()
 endif()
 
 if (ENABLE_CLDNN)
     message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
     set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
-    list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
+    if (NOT ENABLE_STRICT_DEPENDENCIES)
+        # For convinience add a runtime dependency to build along with this target.
+        # Warning: Parallel build with -GNinja may not be efficient.
+        list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
+    endif()
 endif()
 
 if (NGRAPH_INTERPRETER_ENABLE)

From aa4a18dda1ad0dfc55412dbe776be99dc0c0c964 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Wed, 9 Jun 2021 09:02:25 +0300
Subject: [PATCH 35/41] [IE CLDNN] Updated GPU device config (#6040)

---
 docs/IE_DG/API_Changes.md                     |  15 ++-
 docs/IE_DG/Extensibility_DG/GPU_Kernel.md     |  16 ---
 docs/IE_DG/GPU_Kernels_Tuning.md              |  39 ------
 docs/IE_DG/Intro_to_Performance.md            |  20 +--
 .../supported_plugins/{CL_DNN.md => GPU.md}   |  45 +++----
 .../supported_plugins/Supported_Devices.md    |  10 +-
 docs/doxygen/ie_docs.xml                      |   3 +-
 docs/model_server/README.md                   |  46 +++----
 .../dldt_optimization_guide.md                |  62 ++++-----
 docs/snippets/GPU_Kernel.cpp                  |   5 -
 docs/snippets/GPU_Kernels_Tuning.cpp          |  14 --
 docs/snippets/GPU_RemoteBlob_API2.cpp         |   4 +-
 .../include/cldnn/cldnn_config.hpp            |  80 +++---------
 inference-engine/include/gpu/gpu_config.hpp   | 120 ++++++++++++++++++
 .../samples/benchmark_app/main.cpp            |   4 +-
 .../samples/hello_query_device/README.md      |   6 +-
 .../src/cldnn_engine/cldnn_config.cpp         |  29 +++--
 .../src/cldnn_engine/cldnn_engine.cpp         |   2 +-
 .../cldnn_engine/cldnn_executable_network.cpp |   1 -
 .../src/cldnn_engine/cldnn_graph.cpp          |   1 -
 .../cldnn_remote_blob_tests.cpp               |   4 +-
 .../behavior/config.cpp                       |  35 ++++-
 .../behavior/core_integration.cpp             |   2 +-
 .../behavior/infer_request_input.cpp          |   2 +-
 .../behavior/infer_request_output.cpp         |   2 +-
 .../behavior/test_plugin.cpp                  |   2 +-
 .../multi/gpu_remote_blob_tests.cpp           |   2 +-
 .../single_layer_tests/tensor_iterator.cpp    |   6 +-
 tools/benchmark/main.py                       |   2 +-
 29 files changed, 301 insertions(+), 278 deletions(-)
 delete mode 100644 docs/IE_DG/GPU_Kernels_Tuning.md
 rename docs/IE_DG/supported_plugins/{CL_DNN.md => GPU.md} (62%)
 delete mode 100644 docs/snippets/GPU_Kernels_Tuning.cpp
 create mode 100644 inference-engine/include/gpu/gpu_config.hpp

diff --git a/docs/IE_DG/API_Changes.md b/docs/IE_DG/API_Changes.md
index a234471c13e..2534a4a6c38 100644
--- a/docs/IE_DG/API_Changes.md
+++ b/docs/IE_DG/API_Changes.md
@@ -14,6 +14,15 @@ The sections below contain detailed list of changes made to the Inference Engine
  * InferenceEngine::Parameter(std::shared_ptr<ngraph::Variant>& var)
  * std::shared_ptr<ngraph::Variant> InferenceEngine::Parameter::asVariant() const
  * InferenceEngine::Parameter::operator std::shared_ptr<ngraph::Variant>() const
+ * KEY_CLDNN_NV12_TWO_INPUTS GPU plugin option. Use KEY_GPU_NV12_TWO_INPUTS instead
+ * KEY_CLDNN_PLUGIN_PRIORITY GPU plugin option. Use KEY_GPU_PLUGIN_PRIORITY instead
+ * KEY_CLDNN_PLUGIN_THROTTLE GPU plugin option. Use KEY_GPU_PLUGIN_THROTTLE instead
+ * KEY_CLDNN_MEM_POOL GPU plugin option
+ * KEY_CLDNN_GRAPH_DUMPS_DIR GPU plugin option
+ * KEY_CLDNN_SOURCES_DUMPS_DIR GPU plugin option
+ * KEY_DUMP_KERNELS GPU plugin option
+ * KEY_TUNING_MODE GPU plugin option
+ * KEY_TUNING_FILE GPU plugin option
 
 ## 2021.3
 
@@ -528,7 +537,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * DLIA_CONFIG_KEY(ENABLE_STREAMING) config key
 
 ### Removed API
- 
+
  * InferenceEngine::EltwiseLayer::Select from InferenceEngine::EltwiseLayer::eOperation enumeration
 
 ## 2019 R2
@@ -577,7 +586,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * DLIA_CONFIG_KEY(IO_TRANSFORMATIONS_NATIVE) config key
  * DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION) config key
  * GNA_CONFIG_VALUE(SW_FP32) config value for GNA_CONFIG_KEY(DEVICE_MODE) key
- * MULTI_CONFIG_KEY(DEVICE_PRIORITIES) config key for `MULTI` device 
+ * MULTI_CONFIG_KEY(DEVICE_PRIORITIES) config key for `MULTI` device
  * InferenceEngine::CNNNetReader::ReadNetwork(const std::wstring &filepath) new method
  * InferenceEngine::CNNNetReader::ReadWeights(const std::wstring &filepath) new method
  * InferenceEngine::ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg) constructor with additional `plg` parameter
@@ -593,7 +602,7 @@ The sections below contain detailed list of changes made to the Inference Engine
  * InferenceEngine::EltwiseLayer::Logical_NOT, InferenceEngine::EltwiseLayer::Mean, InferenceEngine::EltwiseLayer::Select extensions to InferenceEngine::EltwiseLayer::eOperation enumeration
  * InferenceEngine::OneHotLayer new class
  * InferenceEngine::SelectLayer new class
- * InferenceEngine::BroadcastLayer new class 
+ * InferenceEngine::BroadcastLayer new class
  * InferenceEngine::MathLayer new class
  * InferenceEngine::ReduceLayer new class
  * InferenceEngine::TopKLayer new class
diff --git a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
index 09ace6f0a29..d9fd809f8e4 100644
--- a/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
+++ b/docs/IE_DG/Extensibility_DG/GPU_Kernel.md
@@ -219,22 +219,6 @@ __kernel void example_relu_kernel(
 
 ## Debugging Tips<a name="debugging-tips"></a>
 
-* **Dumping the Resulting Kernels**.
-It is recommended to get a dump of the kernel with all of
-the values set by the Inference Engine, such as tensor sizes,
-floating-point, and integer kernel parameters. To get the dump, add the
-following line to your code that configures the GPU plugin to output the
-custom kernels:
-
-@snippet snippets/GPU_Kernel.cpp part1
-
-When the Inference Engine compiles the kernels for the specific network,
-it also outputs the resulting code for the custom kernels. In the
-directory of your executable, find files like
-`clDNN_program0.cl`, `clDNN_program1.cl`. There are as many files as
-distinct sets of parameters for your custom kernel: different input
-tensor sizes and kernel parameters.
-
 * **Using `printf` in the OpenCL™ Kernels**.
 To debug the specific values, you can use `printf` in your kernels.
 However, be careful: for instance, do not output excessively
diff --git a/docs/IE_DG/GPU_Kernels_Tuning.md b/docs/IE_DG/GPU_Kernels_Tuning.md
deleted file mode 100644
index 5bb6a8334b2..00000000000
--- a/docs/IE_DG/GPU_Kernels_Tuning.md
+++ /dev/null
@@ -1,39 +0,0 @@
-Using GPU Kernels Tuning {#openvino_docs_IE_DG_GPU_Kernels_Tuning}
-======================
-
-GPU Kernels Tuning allows you to tune models, so the heavy computational layers are configured to fit better into
-hardware, which the tuning was done on. It is required to achieve best performance on GPU.
-> **NOTE** Currently only convolution and fully connected layers undergo tuning process. It means that the performance boost depends on the amount of that layers in the model.
-
-OpenVINO™ releases include the `<INSTALL_DIR>/inference_engine/bin/intel64/Release/cache.json` file with pretuned data for current state of the art models. It is highly recommended to do the
-tuning for new kind of models, hardwares or drivers.
-
-## Tuned data
-
-GPU tuning data is saved in JSON format. The file is composed of 2 types of attributes and 1 type of value:
-* Execution units number (attribute): splits the content into different EU sections
-* Hash (attribute): hashed tuned kernel data
-* Key (value): Array with kernel name and kernel's mode index
-
-## Usage
-
----
-
-You can activate Kernels Tuning process by setting `KEY_TUNING_MODE` flag to `TUNING_CREATE` and `KEY_TUNING_FILE` to `<"filename">` in a configuration map that is
-passed to the plugin while loading a network.
-This configuration modifies the behavior of the `ExecutableNetwork` object. Instead of standard network compilation, it will run the tuning process.
-Please keep in mind that the tuning can be very time consuming. The bigger the network, the longer it will take.
-File with tuned data is the result of this step.
-
-> **NOTE** If a filename passed to `KEY_TUNING_FILE` points to existing tuned data and you are tuning a new model, then this file will be extended by new data. This allows you to extend existing `cache.json` provided in the OpenVINO™ release package. 
-
-The example below shows how to set and use the key files:
-
-@snippet snippets/GPU_Kernels_Tuning.cpp part0
-
----
-
-You can activate the inference with tuned data by setting `KEY_TUNING_MODE` flag to `TUNING_USE_EXISTING` and
-`KEY_TUNING_FILE` flag to `<"filename">`. 
-
-GPU backend will process the content of the file during network compilation to configure the OpenCL kernels for the best performance.
diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md
index 78d5c59c417..94d0173dbbe 100644
--- a/docs/IE_DG/Intro_to_Performance.md
+++ b/docs/IE_DG/Intro_to_Performance.md
@@ -22,7 +22,7 @@ $ benchmark_app -m <model.xml> -enforcebf16=false
 Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default.
 Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details.
 
-Similarly, the GPU device has a dedicated config key to enable FP16 execution of the layers that remain in FP32 in the quantized models (as the quantization is typically performed on the FP32 models), refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/CL_DNN.md) 
+Similarly, the GPU device has a dedicated config key to enable FP16 execution of the layers that remain in FP32 in the quantized models (as the quantization is typically performed on the FP32 models), refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md)
 
 ## Latency vs. Throughput
 One way to increase computational efficiency is batching, which combines many (potentially tens) of
@@ -72,30 +72,20 @@ Instead, it is possible to keep a separate infer request per camera or another s
 
 ## Benchmark App
 [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample is the best performance reference.
-It has a lot of device-specific knobs, but the primary usage is as simple as: 
+It has a lot of device-specific knobs, but the primary usage is as simple as:
 ```bash
 $ ./benchmark_app –d GPU –m <model> -i <input>
 ```
-to measure the performance of the model on the GPU. 
+to measure the performance of the model on the GPU.
 Or
 ```bash
 $ ./benchmark_app –d CPU –m <model> -i <input>
 ```
 to execute on the CPU instead.
 
-For example, for the CPU throughput mode from the previous section, you can play with number of streams (`-nstreams` command-line param). 
-Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. For example, on a 8-core CPU, compare the `-nstreams 1` (which is a latency-oriented scenario) to the `2`, `4` and `8` streams. Notice that `benchmark_app` automatically queries/creates/runs number of requests required to saturate the given number of streams. 
+For example, for the CPU throughput mode from the previous section, you can play with number of streams (`-nstreams` command-line param).
+Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. For example, on a 8-core CPU, compare the `-nstreams 1` (which is a latency-oriented scenario) to the `2`, `4` and `8` streams. Notice that `benchmark_app` automatically queries/creates/runs number of requests required to saturate the given number of streams.
 
 Finally, notice that when you don't specify number of streams with `-nstreams`, "AUTO" value for the streams is used, e.g. for the CPU this is [CPU_THROUGHPUT_AUTO](supported_plugins/CPU.md). You can spot the actual value behind "AUTO" for your machine in the application output.
 Notice that the "AUTO" number is not necessarily most optimal, so it is generally recommended to play either with the benchmark_app's "-nstreams" as described above, or via  [new Workbench tool](@ref workbench_docs_Workbench_DG_Introduction).This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
 Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.
-
-## Kernels Tuning for GPU
-
-GPU backend comes with a feature, that allows models tuning, so the workload is configured to fit better into hardware.
-
-Tuning is time consuming process, which internally execute every layer several (or even hundreds) times to find most performant configuration.
-
-This configuration is saved into json-formatted file, whose name can be passed as plugin param to network. GPU backend will process this data to configure kernels for the best performance.
-
-For more details about Kernels Tuning and How-To please refer to [GPU Kernels Tuning](GPU_Kernels_Tuning.md). 
diff --git a/docs/IE_DG/supported_plugins/CL_DNN.md b/docs/IE_DG/supported_plugins/GPU.md
similarity index 62%
rename from docs/IE_DG/supported_plugins/CL_DNN.md
rename to docs/IE_DG/supported_plugins/GPU.md
index 0216ae71d0d..cc12be98a12 100644
--- a/docs/IE_DG/supported_plugins/CL_DNN.md
+++ b/docs/IE_DG/supported_plugins/GPU.md
@@ -1,4 +1,4 @@
-GPU Plugin {#openvino_docs_IE_DG_supported_plugins_CL_DNN}
+GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU}
 =======
 
 The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
@@ -89,13 +89,10 @@ Some layers are executed during the load time, not during the inference. One of
 
 The following layers are not accelerated on the GPU and executed on the host CPU instead:
 * Proposal
-* SimplerNMS
+* NonMaxSuppression
 * PriorBox
 * DetectionOutput
 
-## Known Layers Limitations
-* ROIPooling is supported for 'max' value of 'method' attribute.
-
 ## Supported Configuration Parameters
 
 The plugin supports the configuration parameters listed below.
@@ -107,31 +104,21 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_CACHE_DIR`      | `"<cache_dir>"`                    | `""`              | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled             |
 | `KEY_PERF_COUNT`      | `YES` / `NO`                    | `NO`              | Collect performance counters during inference             |
 | `KEY_CONFIG_FILE`     | `"<file1> [<file2> ...]"`         | `""`              | Load custom layer configuration files                     |
-| `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers             |
-| `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file              |
-| `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use                               |
-| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for clDNN OpenCL queue. 0 disables the setting. |
-| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
-| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format)                                     |
-| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory                                   |
-| `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
+| `KEY_GPU_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. |
+| `KEY_GPU_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
+| `KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS` | `YES` / `NO`                       | `YES`               | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs |
+| `KEY_GPU_NV12_TWO_INPUTS` | `YES` / `NO`                       | `NO`               | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input |
+| `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
 | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
-| `KEY_CLDNN_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
-
-## Note on Debug Capabilities of the GPU Plugin
-
-Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.
-
-The application can use the <code>SetConfig()</code> function with the key <code>PluginConfigParams::KEY_DUMP_KERNELS</code> and value: <code>PluginConfigParams::YES</code>. Then during network loading, all custom layers will print their OpenCL kernels with the JIT instrumentation added by the plugin.
-The kernels will be stored in the working directory under files named the following way: <code>clDNN_program0.cl</code>, <code>clDNN_program1.cl</code>.
-
-This option is disabled by default. Additionally, the application can call the <code>SetConfig()</code> function with the key <code>PluginConfigParams::KEY_DUMP_KERNELS</code> and value: <code>PluginConfigParams::NO</code> before network loading.
-
-How to verify that this option is disabled:
-1.  Delete all <code>clDNN_program*.cl</code> files from the current directory
-2.  Run your application to load a network
-3.  Examine the working directory for the presence of any kernel file (for example, <code>clDNN_program0.cl</code>)
+| `KEY_GPU_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. |
+| `KEY_GPU_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
+| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY |
+| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE |
+| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release                                     |
+| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release                                   |
+| `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers. **Deprecated**. Will be removed in the next release             |
+| `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file. **Deprecated**. Will be removed in the next release |
+| `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use. **Deprecated**. Will be removed in the next release |
 
 ## GPU Context and Video Memory Sharing RemoteBlob API
 
diff --git a/docs/IE_DG/supported_plugins/Supported_Devices.md b/docs/IE_DG/supported_plugins/Supported_Devices.md
index ed8cabec076..e1140ae4b74 100644
--- a/docs/IE_DG/supported_plugins/Supported_Devices.md
+++ b/docs/IE_DG/supported_plugins/Supported_Devices.md
@@ -9,11 +9,11 @@ The Inference Engine provides unique capabilities to infer deep learning models
 
 | Plugin                                   | Device types                                                                                                                                                |
 |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|[GPU plugin](CL_DNN.md)            |Intel&reg; Processor Graphics, including Intel&reg; HD Graphics and Intel&reg; Iris&reg; Graphics                                                            |
+|[GPU plugin](GPU.md)            |Intel&reg; Processor Graphics, including Intel&reg; HD Graphics and Intel&reg; Iris&reg; Graphics                                                            |
 |[CPU plugin](CPU.md)              |Intel&reg; Xeon&reg; with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel&reg; Core&trade; Processors with Intel&reg; AVX2, Intel&reg; Atom&reg; Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
 |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
 |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit)              |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
-|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel    |   
+|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel    |
 |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel&reg; devices (for example if a device doesn't [support certain layers](#supported-layers)).                                                           |
 
 Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
@@ -60,7 +60,7 @@ For example, the CHW value at index (c,h,w) is physically located at index (c\*H
 |GNA plugin    |Supported               |Supported               |Not supported           |
 <br>\* - currently, only limited set of topologies might benefit from enabling I8 model on GPU<br>
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_.  
+the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_.
 
 ### Supported Input Precision
 
@@ -73,7 +73,7 @@ the supported models formats depends on the actual underlying devices. _Generall
 
 <br>\* - Supported via `SetBlob` only, `GetBlob` returns FP32<br>
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported input precision  depends on the actual underlying devices. _Generally, U8 is preferable as it is most ubiquitous_.  
+the supported input precision  depends on the actual underlying devices. _Generally, U8 is preferable as it is most ubiquitous_.
 
 ### Supported Output Precision
 
@@ -84,7 +84,7 @@ the supported input precision  depends on the actual underlying devices. _Genera
 |VPU plugins   |Supported |Supported     |
 |GNA plugin    |Supported |Not supported |
 For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution
-the supported output precision  depends on the actual underlying devices. _Generally, FP32 is preferable as it is most ubiquitous_. 
+the supported output precision  depends on the actual underlying devices. _Generally, FP32 is preferable as it is most ubiquitous_.
 
 ### Supported Input Layout
 
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
index f287487913d..bb006c9f01c 100644
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@@ -293,7 +293,6 @@ limitations under the License.
                 <tab type="user" title="[DEPRECATED] Import an ONNX model" url="@ref openvino_docs_IE_DG_OnnxImporterTutorial"/>
                 <tab type="user" title="Using Dynamic Batching Feature" url="@ref openvino_docs_IE_DG_DynamicBatching"/>
                 <tab type="user" title="Using Static Shape Infer Feature" url="@ref openvino_docs_IE_DG_ShapeInference"/>
-                <tab type="user" title="Using GPU kernels tuning" url="@ref openvino_docs_IE_DG_GPU_Kernels_Tuning"/>
                 <tab type="usergroup" title="Using Bfloat16 Inference" url="@ref openvino_docs_IE_DG_Bfloat16Inference">
                 </tab>
                 <tab type="usergroup" title="Using Low-Precision 8-bit Integer Inference" url="@ref openvino_docs_IE_DG_Int8Inference">
@@ -303,7 +302,7 @@ limitations under the License.
                 </tab>
                 <tab type="user" title="Introduction to OpenVINO state API" url="@ref openvino_docs_IE_DG_network_state_intro"/>
                 <tab type="usergroup" title="Supported Devices" url="@ref openvino_docs_IE_DG_supported_plugins_Supported_Devices">
-                    <tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CL_DNN">
+                    <tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU">
                         <tab type="user" title="RemoteBlob API of GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API"/>
                     </tab>
                     <tab type="user" title="CPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CPU"/>
diff --git a/docs/model_server/README.md b/docs/model_server/README.md
index ae5d03914ab..e6c7144f3cb 100644
--- a/docs/model_server/README.md
+++ b/docs/model_server/README.md
@@ -1,29 +1,29 @@
 # OpenVINO&trade; Model Server {#openvino_docs_ovms}
 
-OpenVINO&trade; Model Server (OVMS) is a scalable, high-performance solution for serving machine learning models optimized for Intel&reg; architectures. 
-The server provides an inference service via gRPC or REST API - making it easy to deploy new algorithms and AI experiments using the same 
-architecture as [TensorFlow* Serving](https://github.com/tensorflow/serving) for any models trained in a framework that is supported 
-by [OpenVINO](https://software.intel.com/en-us/openvino-toolkit). 
+OpenVINO&trade; Model Server (OVMS) is a scalable, high-performance solution for serving machine learning models optimized for Intel&reg; architectures.
+The server provides an inference service via gRPC or REST API - making it easy to deploy new algorithms and AI experiments using the same
+architecture as [TensorFlow* Serving](https://github.com/tensorflow/serving) for any models trained in a framework that is supported
+by [OpenVINO](https://software.intel.com/en-us/openvino-toolkit).
 
 The server implements gRPC and REST API framework with data serialization and deserialization using TensorFlow Serving API,
  and OpenVINO&trade; as the inference execution provider. Model repositories may reside on a locally accessible file system (for example, NFS),
   Google Cloud Storage\* (GCS), Amazon S3\*, MinIO\*, or Azure Blob Storage\*.
-  
+
 OVMS is now implemented in C++ and provides much higher scalability compared to its predecessor in the Python version.
 You can take advantage of all the power of Xeon® CPU capabilities or AI accelerators and expose it over the network interface.
 Read the [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what's new in the C++ version.
 
 Review the [Architecture Concept](https://github.com/openvinotoolkit/model_server/blob/main/docs/architecture.md) document for more details.
 
-A few key features: 
+A few key features:
 - Support for multiple frameworks. Serve models trained in popular formats such as Caffe\*, TensorFlow\*, MXNet\*, and ONNX*.
 - Deploy new [model versions](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-version-policy) without changing client code.
-- Support for AI accelerators including [Intel Movidius Myriad VPUs](../IE_DG/supported_plugins/VPU), 
-[GPU](../IE_DG/supported_plugins/CL_DNN), and [HDDL](../IE_DG/supported_plugins/HDDL). 
+- Support for AI accelerators including [Intel Movidius Myriad VPUs](../IE_DG/supported_plugins/VPU.md),
+[GPU](../IE_DG/supported_plugins/GPU.md), and [HDDL](../IE_DG/supported_plugins/HDDL.md).
 - The server can be enabled both on [Bare Metal Hosts](https://github.com/openvinotoolkit/model_server/blob/main/docs/host.md) or in
 [Docker* containers](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md).
-- [Kubernetes deployments](https://github.com/openvinotoolkit/model_server/blob/main/deploy). The server can be deployed in a Kubernetes cluster allowing the inference service to scale horizontally and ensure high availability.  
-- [Model reshaping](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-reshaping). The server supports reshaping models in runtime. 
+- [Kubernetes deployments](https://github.com/openvinotoolkit/model_server/blob/main/deploy). The server can be deployed in a Kubernetes cluster allowing the inference service to scale horizontally and ensure high availability.
+- [Model reshaping](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-reshaping). The server supports reshaping models in runtime.
 - [Model ensemble](https://github.com/openvinotoolkit/model_server/blob/main/docs/ensemble_scheduler.md) (preview). Connect multiple models to deploy complex processing solutions and reduce overhead of sending data back and forth.
 
 > **NOTE**: OVMS has been tested on CentOS\* and Ubuntu\*. Publicly released [Docker images](https://hub.docker.com/r/openvino/model_server) are based on CentOS.
@@ -68,30 +68,30 @@ For more detailed guides on using the Model Server in various scenarios, visit t
 
 ## API Documentation
 
-### GRPC 
+### GRPC
 
-OpenVINO&trade; Model Server gRPC API is documented in the proto buffer files in [tensorflow_serving_api](https://github.com/tensorflow/serving/tree/r2.2/tensorflow_serving/apis). 
+OpenVINO&trade; Model Server gRPC API is documented in the proto buffer files in [tensorflow_serving_api](https://github.com/tensorflow/serving/tree/r2.2/tensorflow_serving/apis).
 
-> **NOTE:** The implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. 
+> **NOTE:** The implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available.
 > These are the most generic function calls and should address most of the usage scenarios.
 
-[Predict proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/predict.proto) defines two message specifications: `PredictRequest` and `PredictResponse` used while calling Prediction endpoint.  
-* `PredictRequest` specifies information about the model spec, that is name and version, and a map of input data serialized via 
+[Predict proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/predict.proto) defines two message specifications: `PredictRequest` and `PredictResponse` used while calling Prediction endpoint.
+* `PredictRequest` specifies information about the model spec, that is name and version, and a map of input data serialized via
 [TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) to a string format.
-* `PredictResponse` includes a map of outputs serialized by 
+* `PredictResponse` includes a map of outputs serialized by
 [TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) and information about the used model spec.
- 
+
 [Get Model Metadata proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_metadata.proto) defines three message definitions used while calling Metadata endpoint:
  `SignatureDefMap`, `GetModelMetadataRequest`, `GetModelMetadataResponse`.
 
  A function call `GetModelMetadata` accepts model spec information as input and returns Signature Definition content in the format similar to TensorFlow Serving.
 
 [Get Model Status proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_status.proto) defines three message definitions used while calling Status endpoint:
- `GetModelStatusRequest`, `ModelVersionStatus`, `GetModelStatusResponse` that report all exposed versions including their state in their lifecycle. 
+ `GetModelStatusRequest`, `ModelVersionStatus`, `GetModelStatusResponse` that report all exposed versions including their state in their lifecycle.
 
 Refer to the [example client code](https://github.com/openvinotoolkit/model_server/blob/main/example_client) to learn how to use this API and submit the requests using the gRPC interface.
 
-Using the gRPC interface is recommended for optimal performance due to its faster implementation of input data deserialization. It enables you to achieve lower latency, especially with larger input messages like images. 
+Using the gRPC interface is recommended for optimal performance due to its faster implementation of input data deserialization. It enables you to achieve lower latency, especially with larger input messages like images.
 
 ### REST
 
@@ -99,9 +99,9 @@ OpenVINO&trade; Model Server RESTful API follows the documentation from the [Ten
 
 Both row and column format of the requests are implemented.
 
-> **NOTE**: Just like with gRPC, only the implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. 
+> **NOTE**: Just like with gRPC, only the implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available.
 
-Only the numerical data types are supported. 
+Only the numerical data types are supported.
 
 Review the exemplary clients below to find out more how to connect and run inference requests.
 
@@ -110,9 +110,9 @@ REST API is recommended when the primary goal is in reducing the number of clien
 
 ## Known Limitations
 
-* Currently, `Predict`, `GetModelMetadata`, and `GetModelStatus` calls are implemented using the TensorFlow Serving API. 
+* Currently, `Predict`, `GetModelMetadata`, and `GetModelStatus` calls are implemented using the TensorFlow Serving API.
 * `Classify`, `Regress`, and `MultiInference` are not included.
-* `Output_filter` is not effective in the `Predict` call. All outputs defined in the model are returned to the clients. 
+* `Output_filter` is not effective in the `Predict` call. All outputs defined in the model are returned to the clients.
 
 ## OpenVINO Model Server Contribution Policy
 
diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md
index e70c0365a41..9ece7fec93a 100644
--- a/docs/optimization_guide/dldt_optimization_guide.md
+++ b/docs/optimization_guide/dldt_optimization_guide.md
@@ -2,13 +2,13 @@
 
 ## Introduction
 
-The purpose of this document is to give you performance-related insights to every step of the network deployment process.  
+The purpose of this document is to give you performance-related insights to every step of the network deployment process.
 
 For information on the general workflow, refer to the documentation in <a href="#see-also">See Also</a>. For an example Inference Engine API snippet, see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>.
 
 ### Deep Learning Inference Engine Overview <a name="dldt-overview"></a>
 
-Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment Toolkit (Intel&reg; DL Deployment Toolkit) and OpenVINO&trade; toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.  
+Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment Toolkit (Intel&reg; DL Deployment Toolkit) and OpenVINO&trade; toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.
 
 Below, there are the three main steps of the deployment process:
 
@@ -50,7 +50,7 @@ When evaluating performance of your model with the Inference Engine, you must me
 
 ### Latency vs. Throughput <a name="latency-vs-throughput"></a>
 
-In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time. 
+In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time.
 In contrast, for the latency-oriented tasks, the time to a single frame is more important.
 
 Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
@@ -114,23 +114,23 @@ The resulting IR precision, for instance, `FP16` or `FP32`, directly affects per
 
 ## Multi-Device Execution <a name="multi-device-optimizations"></a>
 OpenVINO&trade; toolkit supports automatic multi-device execution, please see [MULTI-Device plugin description](../IE_DG/supported_plugins/MULTI.md).
-In the next chapter you can find the device-specific tips, while this section covers few recommendations 
+In the next chapter you can find the device-specific tips, while this section covers few recommendations
 for the multi-device execution:
--	MULTI usually performs best when the fastest device is specified first in the list of the devices. 
-    This is particularly important when the parallelism is not sufficient 
+-	MULTI usually performs best when the fastest device is specified first in the list of the devices.
+    This is particularly important when the parallelism is not sufficient
     (e.g. the number of request in the flight is not enough to saturate all devices).
-- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork 
-  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter). 
-Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.    
--   Notice that for example CPU+GPU execution performs better with certain knobs 
+- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork
+  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter).
+Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.
+-   Notice that for example CPU+GPU execution performs better with certain knobs
     which you can find in the code of the same [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample.
-    One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams (which is already a default for the GPU) to amortize slower 
+    One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams (which is already a default for the GPU) to amortize slower
     inference completion from the device to the host.
--	Multi-device logic always attempts to save on the (e.g. inputs) data copies between device-agnostic, user-facing inference requests 
-    and device-specific 'worker' requests that are being actually scheduled behind the scene. 
-    To facilitate the copy savings, it is recommended to start the requests in the order that they were created 
+-	Multi-device logic always attempts to save on the (e.g. inputs) data copies between device-agnostic, user-facing inference requests
+    and device-specific 'worker' requests that are being actually scheduled behind the scene.
+    To facilitate the copy savings, it is recommended to start the requests in the order that they were created
     (with ExecutableNetwork's CreateInferRequest).
-  
+
 
 ## Device-Specific Optimizations <a name="device-specific-optimizations"></a>
 
@@ -171,7 +171,7 @@ Notice that on a multi-socket machine, the bare minimum of streams for a latency
 
 In addition, you can play with the batch size to find the throughput sweet spot.
 
-If your application is hard or impossible to change in accordance with the multiple-requests logic, consider the "multiple-instance" trick to improve the throughput:  
+If your application is hard or impossible to change in accordance with the multiple-requests logic, consider the "multiple-instance" trick to improve the throughput:
 -   For multi-socket execution, it is recommended to set   [`KEY_CPU_THREADS_NUM`](../IE_DG/supported_plugins/CPU.md) to the number of cores per socket, and run as many instances of the application as you have sockets.
 -   Similarly, for extremely lightweight networks (running faster than 1ms) and/or many-core machines (16+ cores), try limiting the number of CPU inference threads to just `#&zwj;phys` cores and further, while trying to saturate the machine with running multiple instances of the application.
 
@@ -186,15 +186,15 @@ Inference Engine relies on the [Compute Library for Deep Neural Networks (clDNN)
 -	If your application is simultaneously using the inference on the CPU or otherwise loads the host heavily, make sure that the OpenCL driver threads do not starve. You can use [CPU configuration options](../IE_DG/supported_plugins/CPU.md) to limit number of inference threads for the CPU plugin.
 -	In the GPU-only scenario, a GPU driver might occupy a CPU core with spin-looped polling for completion. If the _CPU_ utilization is a concern, consider the `KEY_CLDND_PLUGIN_THROTTLE` configuration option.
 
-> **NOTE**: See the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) code for a usage example. 
-Notice that while disabling the polling, this option might reduce the GPU performance, so usually this option is used with multiple [GPU streams](../IE_DG/supported_plugins/CL_DNN.md). 
+> **NOTE**: See the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) code for a usage example.
+Notice that while disabling the polling, this option might reduce the GPU performance, so usually this option is used with multiple [GPU streams](../IE_DG/supported_plugins/GPU.md).
 
 
 ### Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit and Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs  <a name="myriad"></a>
 
 Since Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit (Intel&reg; Movidius&trade; Myriad&trade; 2 VPU) communicates with the host over USB, minimum four infer requests in flight are recommended to hide the data transfer costs. See <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a> and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) for more information.
 
-Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs requires to keep at least 32 inference requests in flight to fully saturate the device.  
+Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs requires to keep at least 32 inference requests in flight to fully saturate the device.
 
 ### FPGA <a name="fpga"></a>
 
@@ -274,7 +274,7 @@ The following tips are provided to give general guidance on optimizing execution
 
 -	Generally, GPU performance is better on heavy kernels (like Convolutions) and large inputs. So if the network inference time is already too small (~1ms of execution time), using the GPU would unlikely give a boost.
 
--	A typical strategy to start with is to test the CPU-only and GPU-only scenarios first (with samples this is plain `-d CPU` or `-d GPU`). If there are specific kernels that are not supported by the GPU, the best option to try is the `HETERO:GPU,CPU` that automatically applies default splitting (based on the plugins layers support). Then, you can play with the manual affinity settings (for example, to further minimize the number of subgraphs).  
+-	A typical strategy to start with is to test the CPU-only and GPU-only scenarios first (with samples this is plain `-d CPU` or `-d GPU`). If there are specific kernels that are not supported by the GPU, the best option to try is the `HETERO:GPU,CPU` that automatically applies default splitting (based on the plugins layers support). Then, you can play with the manual affinity settings (for example, to further minimize the number of subgraphs).
 
 -	The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" (or helper) kernels on the CPU. Notice that this includes the granularity considerations. For example, running some (custom) activation on the CPU would result in too many conversions.
 
@@ -337,7 +337,7 @@ For inference on the CPU there are multiple threads binding options, see
 
 If you are building an app-level pipeline with third-party components like GStreamer*, the general guidance for NUMA machines is as follows:
 - Whenever possible, use at least one instance of the pipeline per NUMA node:
-   - Pin the _entire_ pipeline instance to the specific NUMA node at the outer-most level (for example, use Kubernetes* and/or `numactl` command with proper settings before  actual GStreamer commands). 
+   - Pin the _entire_ pipeline instance to the specific NUMA node at the outer-most level (for example, use Kubernetes* and/or `numactl` command with proper settings before  actual GStreamer commands).
    - Disable any individual pinning by the pipeline components (e.g. set [CPU_BIND_THREADS to 'NO'](../IE_DG/supported_plugins/CPU.md)).
    - Limit each instance with respect to number of inference threads. Use  [CPU_THREADS_NUM](../IE_DG/supported_plugins/CPU.md) or  or other means (e.g. virtualization, Kubernetes*, etc), to avoid oversubscription.
 - If pinning instancing/pinning of the entire pipeline is not possible or desirable, relax the inference threads pinning to just 'NUMA'.
@@ -416,7 +416,7 @@ If your application simultaneously executes multiple infer requests:
 
 -	For FPGA and GPU, the actual work is serialized by a plugin and/or a driver anyway.
 
-- 	Finally, for <a href="#myriad">any VPU flavor</a>, using multiple requests is a must for achieving good throughput. 
+- 	Finally, for <a href="#myriad">any VPU flavor</a>, using multiple requests is a must for achieving good throughput.
 
 In the Inference Engine, there is no notion of requests priorities. It is left to the user side (for example, not queuing the low priority infer request, until another higher priority is waiting). Notice that it would require additional logic to synchronize between executable networks (queues) in your application code.
 
@@ -470,12 +470,12 @@ Example of Inference Engine calls:
 	Notice that `Task_runNOThrow` is an Async API wrapper and it is executed in a different thread and triggers the Intel MKL-DNN execution:
 
 	![](../img/vtune_timeline.png)
-	
+
 -	In the Intel VTune Amplifier **Top-down view**, grouped by the **Task Domain**.
 	Notice the `Task_runNoThrow` and `MKLDNN _INFER` that are bracketing the actual Intel MKL-DNN kernels execution:
-	
+
 	![](../img/vtune_topdown_view.jpg)
-	
+
 Similarly, you can use any GPU analysis in the Intel VTune Amplifier and get general correlation with Inference Engine API as well as the execution breakdown for OpenCL kernels.
 
 Just like with regular native application, further drill down in the counters is possible, however, this is mostly useful for <a href="#optimizing-custom-kernels">optimizing custom kernels</a>. Finally, with the Intel VTune Amplifier, the profiling is not limited to your user-level code (see the [corresponding section in the Intel&reg; VTune&trade; Amplifier User's Guide](https://software.intel.com/en-us/vtune-amplifier-help-analyze-performance)).
@@ -513,12 +513,12 @@ Since FPGA execution does not separate individual kernels, only bulk execution/d
 
 ```
 subgraph1: 1. input preprocessing (mean data/FPGA):EXECUTED   layerType: preprocessing   realTime: 129     cpu: 129
-subgraph1: 2. input transfer to DDR:EXECUTED       layerType:                    realTime: 201        cpu: 0              
-subgraph1: 3. FPGA execute time:EXECUTED           layerType:                    realTime: 3808       cpu: 0              subgraph1: 4. output transfer from DDR:EXECUTED    layerType:                    realTime: 55         cpu: 0              
-subgraph1: 5. FPGA output postprocessing:EXECUTED  layerType:                    realTime: 7          cpu: 7              
-subgraph1: 6. softmax/copy:   EXECUTED       layerType:                    realTime: 2          cpu: 2              
-subgraph2: out_prob:          NOT_RUN        layerType: Output             realTime: 0          cpu: 0              
-subgraph2: prob:              EXECUTED       layerType: SoftMax            realTime: 10         cpu: 10             
+subgraph1: 2. input transfer to DDR:EXECUTED       layerType:                    realTime: 201        cpu: 0
+subgraph1: 3. FPGA execute time:EXECUTED           layerType:                    realTime: 3808       cpu: 0              subgraph1: 4. output transfer from DDR:EXECUTED    layerType:                    realTime: 55         cpu: 0
+subgraph1: 5. FPGA output postprocessing:EXECUTED  layerType:                    realTime: 7          cpu: 7
+subgraph1: 6. softmax/copy:   EXECUTED       layerType:                    realTime: 2          cpu: 2
+subgraph2: out_prob:          NOT_RUN        layerType: Output             realTime: 0          cpu: 0
+subgraph2: prob:              EXECUTED       layerType: SoftMax            realTime: 10         cpu: 10
 Total time: 4212     microseconds
 ```
 
diff --git a/docs/snippets/GPU_Kernel.cpp b/docs/snippets/GPU_Kernel.cpp
index 5f849eb6a6a..8b21a79dfe2 100644
--- a/docs/snippets/GPU_Kernel.cpp
+++ b/docs/snippets/GPU_Kernel.cpp
@@ -1,5 +1,4 @@
 #include <ie_core.hpp>
-#include "cldnn/cldnn_config.hpp"
 
 int main() {
 using namespace InferenceEngine;
@@ -9,9 +8,5 @@ InferenceEngine::Core core;
 core.SetConfig({ { InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, "<path_to_the_xml_file>" } }, "GPU");
 //! [part0]
 
-//! [part1]
-core.SetConfig({ { PluginConfigParams::KEY_DUMP_KERNELS, PluginConfigParams::YES } }, "GPU");
-//! [part1]
-
 return 0;
 }
diff --git a/docs/snippets/GPU_Kernels_Tuning.cpp b/docs/snippets/GPU_Kernels_Tuning.cpp
deleted file mode 100644
index 25daeec5e2a..00000000000
--- a/docs/snippets/GPU_Kernels_Tuning.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <ie_core.hpp>
-#include "cldnn/cldnn_config.hpp"
-
-int main() {
-using namespace InferenceEngine;
-//! [part0]
-Core ie;          
-  ie.SetConfig({{ CONFIG_KEY(TUNING_MODE), CONFIG_VALUE(TUNING_CREATE) }}, "GPU");
-  ie.SetConfig({{ CONFIG_KEY(TUNING_FILE), "/path/to/tuning/file.json" }}, "GPU");
-  // Further LoadNetwork calls will use the specified tuning parameters
-//! [part0]
-
-return 0;
-}
diff --git a/docs/snippets/GPU_RemoteBlob_API2.cpp b/docs/snippets/GPU_RemoteBlob_API2.cpp
index 1bb00c17e03..13597ae4561 100644
--- a/docs/snippets/GPU_RemoteBlob_API2.cpp
+++ b/docs/snippets/GPU_RemoteBlob_API2.cpp
@@ -1,6 +1,6 @@
 #include <ie_core.hpp>
 #include <gpu/gpu_context_api_va.hpp>
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 
 
 int main() {
@@ -28,7 +28,7 @@ auto shared_va_context = gpu::make_shared_context(ie, "GPU", disp);
 // compile network within a shared context
 ExecutableNetwork executable_network = ie.LoadNetwork(network,
                                                       shared_va_context,
-                                                      { { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS,
+                                                      { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS,
                                                           PluginConfigParams::YES } });
 
 
diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/cldnn/cldnn_config.hpp
index cbc2aef0242..3e5dc4cfb12 100644
--- a/inference-engine/include/cldnn/cldnn_config.hpp
+++ b/inference-engine/include/cldnn/cldnn_config.hpp
@@ -11,47 +11,11 @@
 #pragma once
 
 #include "ie_plugin_config.hpp"
+#include "ie_api.h"
+#include "gpu/gpu_config.hpp"
 
 namespace InferenceEngine {
 
-namespace Metrics {
-
-/**
- * @def GPU_METRIC_KEY(name)
- * @brief shortcut for defining GPU plugin metrics
- */
-#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
-#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
-
-/**
- * @def DECLARE_GPU_METRIC_VALUE(name)
- * @brief shortcut for defining gpu metric values
- */
-#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
-
-/**
- * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
- */
-DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
-
-/**
- * @brief Metric to get microarchitecture identifier in major.minor.revision format
- */
-DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
-
-/**
- * @brief Metric to get count of execution units for current GPU
- */
-DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
-
-/**
- * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
- *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
- */
-DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
-
-}  // namespace Metrics
-
 /**
  * @brief GPU plugin configuration
  */
@@ -70,6 +34,7 @@ namespace CLDNNConfigParams {
  * this option should be used with an unsigned integer value (1 is lowest priority)
  * 0 means no priority hint is set and default queue is created.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_PLUGIN_PRIORITY instead")
 DECLARE_CLDNN_CONFIG_KEY(PLUGIN_PRIORITY);
 
 /**
@@ -78,22 +43,26 @@ DECLARE_CLDNN_CONFIG_KEY(PLUGIN_PRIORITY);
  * chapter 9.19. This option should be used with an unsigned integer value (1 is lowest energy consumption)
  * 0 means no throttle hint is set and default queue created.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_PLUGIN_THROTTLE instead")
 DECLARE_CLDNN_CONFIG_KEY(PLUGIN_THROTTLE);
 
 /**
  * @brief This key controls clDNN memory pool optimization.
  * Turned off by default.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(MEM_POOL);
 
 /**
  * @brief This key defines the directory name to which clDNN graph visualization will be dumped.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR);
 
 /**
  * @brief This key defines the directory name to which full program sources will be dumped.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR);
 
 /**
@@ -108,43 +77,19 @@ DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);
  * @brief This key should be set to correctly handle NV12 input without pre-processing.
  * Turned off by default.
  */
+INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::GPUConfigParams::GPU_NV12_TWO_INPUTS instead")
 DECLARE_CLDNN_CONFIG_KEY(NV12_TWO_INPUTS);
 
-/**
- * @brief This key sets the max number of host threads that can be used by GPU plugin on model loading.
- * Default value is maximum number of threads available in the environment.
- */
-DECLARE_CLDNN_CONFIG_KEY(MAX_NUM_THREADS);
-
-/**
- * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
- * This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
- * Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
- * Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
- * Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
-DECLARE_CLDNN_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
-
 }  // namespace CLDNNConfigParams
 
 namespace PluginConfigParams {
 
-/**
- * @brief Optimize GPU plugin execution to maximize throughput.
- *
- * It is passed to Core::SetConfig(), this option should be used with values:
- * - KEY_GPU_THROUGHPUT_AUTO creates bare minimum of streams that might improve performance in some cases,
- *   this option allows to enable throttle hint for opencl queue thus reduce CPU load without significant performance
- * drop
- * - a positive integer value creates the requested number of streams
- */
-DECLARE_CONFIG_VALUE(GPU_THROUGHPUT_AUTO);
-DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
-
 /**
  * @brief This key enables dumping of the kernels used by the plugin for custom layers.
  *
  * This option should be used with values: PluginConfigParams::YES or PluginConfigParams::NO (default)
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(DUMP_KERNELS);
 
 /**
@@ -159,17 +104,24 @@ DECLARE_CONFIG_KEY(DUMP_KERNELS);
  *
  * For values TUNING_CREATE and TUNING_RETUNE the file will be created if it does not exist.
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(TUNING_MODE);
 
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_CREATE);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_USE_EXISTING);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_DISABLED);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_UPDATE);
+INFERENCE_ENGINE_DEPRECATED("The config value will be removed")
 DECLARE_CONFIG_VALUE(TUNING_RETUNE);
 
 /**
  * @brief This key defines the tuning data filename to be created/used
  */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(TUNING_FILE);
 
 }  // namespace PluginConfigParams
diff --git a/inference-engine/include/gpu/gpu_config.hpp b/inference-engine/include/gpu/gpu_config.hpp
new file mode 100644
index 00000000000..96f8754ac86
--- /dev/null
+++ b/inference-engine/include/gpu/gpu_config.hpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header for advanced hardware related properties for GPU plugin
+ *        To use in SetConfig() method of plugins
+ *
+ * @file gpu_config.hpp
+ */
+#pragma once
+
+#include "ie_plugin_config.hpp"
+
+namespace InferenceEngine {
+
+namespace Metrics {
+
+/**
+ * @def GPU_METRIC_KEY(name)
+ * @brief shortcut for defining GPU plugin metrics
+ */
+#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
+#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
+
+/**
+ * @def DECLARE_GPU_METRIC_VALUE(name)
+ * @brief shortcut for defining gpu metric values
+ */
+#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
+
+/**
+ * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
+ */
+DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
+
+/**
+ * @brief Metric to get microarchitecture identifier in major.minor.revision format
+ */
+DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
+
+/**
+ * @brief Metric to get count of execution units for current GPU
+ */
+DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
+
+/**
+ * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
+ *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
+ */
+DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
+
+}  // namespace Metrics
+
+/**
+ * @brief GPU plugin configuration
+ */
+namespace GPUConfigParams {
+
+/**
+ * @brief shortcut for defining configuration keys
+ */
+#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
+#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
+#define DECLARE_GPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GPU_##name)
+
+/**
+ * @brief This key instructs the GPU plugin to use the OpenCL queue priority hint
+ * as defined in https://www.khronos.org/registry/OpenCL/specs/opencl-2.1-extensions.pdf
+ * this option should be used with an unsigned integer value (1 is lowest priority)
+ * 0 means no priority hint is set and default queue is created.
+ */
+DECLARE_GPU_CONFIG_KEY(PLUGIN_PRIORITY);
+
+/**
+ * @brief This key instructs the GPU plugin to use throttle hints the OpenCL queue throttle hint
+ * as defined in https://www.khronos.org/registry/OpenCL/specs/opencl-2.1-extensions.pdf,
+ * chapter 9.19. This option should be used with an unsigned integer value (1 is lowest energy consumption)
+ * 0 means no throttle hint is set and default queue created.
+ */
+DECLARE_GPU_CONFIG_KEY(PLUGIN_THROTTLE);
+
+/**
+ * @brief This key should be set to correctly handle NV12 input without pre-processing.
+ * Turned off by default.
+ */
+DECLARE_GPU_CONFIG_KEY(NV12_TWO_INPUTS);
+
+/**
+ * @brief This key sets the max number of host threads that can be used by GPU plugin on model loading.
+ * Default value is maximum number of threads available in the environment.
+ */
+DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS);
+
+/**
+ * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
+ * This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
+ * Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
+ * Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
+ * Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
+DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
+
+}  // namespace GPUConfigParams
+
+namespace PluginConfigParams {
+
+/**
+ * @brief Optimize GPU plugin execution to maximize throughput.
+ *
+ * It is passed to Core::SetConfig(), this option should be used with values:
+ * - KEY_GPU_THROUGHPUT_AUTO creates bare minimum of streams that might improve performance in some cases,
+ *   this option allows to enable throttle hint for opencl queue thus reduce CPU load without significant performance
+ * drop
+ * - a positive integer value creates the requested number of streams
+ */
+DECLARE_CONFIG_VALUE(GPU_THROUGHPUT_AUTO);
+DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS);
+}  // namespace PluginConfigParams
+
+}  // namespace InferenceEngine
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 849dc05ad33..cd7ddc641dc 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -4,8 +4,8 @@
 
 #include <algorithm>
 #include <chrono>
-#include <cldnn/cldnn_config.hpp>
 #include <gna/gna_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <inference_engine.hpp>
 #include <map>
 #include <memory>
@@ -282,7 +282,7 @@ int main(int argc, char* argv[]) {
                                << "which releases another CPU thread (that is otherwise "
                                   "used by the GPU driver for active polling)"
                                << slog::endl;
-                    device_config[CLDNN_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
+                    device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
                 }
             } else if (device == "MYRIAD") {
                 device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
diff --git a/inference-engine/samples/hello_query_device/README.md b/inference-engine/samples/hello_query_device/README.md
index a185147f8ec..059077c48ad 100644
--- a/inference-engine/samples/hello_query_device/README.md
+++ b/inference-engine/samples/hello_query_device/README.md
@@ -63,20 +63,20 @@ Available devices:
                 SUPPORTED_METRICS : [ AVAILABLE_DEVICES SUPPORTED_METRICS FULL_DEVICE_NAME OPTIMIZATION_CAPABILITIES SUPPORTED_CONFIG_KEYS RANGE_FOR_ASYNC_INFER_REQUESTS RANGE_FOR_STREAMS ]
                 FULL_DEVICE_NAME : Intel(R) UHD Graphics 620 (iGPU)
                 OPTIMIZATION_CAPABILITIES : [ FP32 BIN FP16 ]
-                SUPPORTED_CONFIG_KEYS : [ CACHE_DIR CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS CLDNN_GRAPH_DUMPS_DIR CLDNN_MAX_NUM_THREADS CLDNN_MEM_POOL CLDNN_NV12_TWO_INPUTS CLDNN_PLUGIN_PRIORITY CLDNN_PLUGIN_THROTTLE CLDNN_SOURCES_DUMPS_DIR CLDNN_ENABLE_LOOP_UNROLLING CONFIG_FILE DEVICE_ID DUMP_KERNELS DYN_BATCH_ENABLED EXCLUSIVE_ASYNC_REQUESTS GPU_THROUGHPUT_STREAMS PERF_COUNT TUNING_FILE TUNING_MODE ]
+                SUPPORTED_CONFIG_KEYS : [ CACHE_DIR CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS CLDNN_GRAPH_DUMPS_DIR GPU_MAX_NUM_THREADS CLDNN_MEM_POOL CLDNN_NV12_TWO_INPUTS CLDNN_PLUGIN_PRIORITY CLDNN_PLUGIN_THROTTLE CLDNN_SOURCES_DUMPS_DIR GPU_ENABLE_LOOP_UNROLLING CONFIG_FILE DEVICE_ID DUMP_KERNELS DYN_BATCH_ENABLED EXCLUSIVE_ASYNC_REQUESTS GPU_THROUGHPUT_STREAMS PERF_COUNT TUNING_FILE TUNING_MODE ]
                 RANGE_FOR_ASYNC_INFER_REQUESTS : { 1, 2, 1 }
                 RANGE_FOR_STREAMS : { 1, 2 }
         Default values for device configuration keys:
                 CACHE_DIR : ""
                 CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS : YES
                 CLDNN_GRAPH_DUMPS_DIR : ""
-                CLDNN_MAX_NUM_THREADS : 8
                 CLDNN_MEM_POOL : YES
                 CLDNN_NV12_TWO_INPUTS : NO
                 CLDNN_PLUGIN_PRIORITY : 0
                 CLDNN_PLUGIN_THROTTLE : 0
                 CLDNN_SOURCES_DUMPS_DIR : ""
-                CLDNN_ENABLE_LOOP_UNROLLING : YES
+                GPU_MAX_NUM_THREADS : 8
+                GPU_ENABLE_LOOP_UNROLLING : YES
                 CONFIG_FILE : ""
                 DEVICE_ID : ""
                 DUMP_KERNELS : NO
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp
index ff5d9693522..3de19bdff87 100644
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@@ -5,6 +5,7 @@
 #include <sys/stat.h>
 
 #include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include "cldnn_config.h"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "ie_api.h"
@@ -39,6 +40,7 @@ static void createDirectory(std::string _path) {
     }
 }
 
+IE_SUPPRESS_DEPRECATED_START
 void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
     for (auto& kvp : configMap) {
@@ -69,7 +71,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) {
             std::stringstream ss(val);
             uint32_t uVal(0);
             ss >> uVal;
@@ -93,7 +96,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
                     IE_THROW(ParameterMismatch) << "Unsupported queue priority value: " << uVal;
             }
 
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) {
             std::stringstream ss(val);
             uint32_t uVal(0);
             ss >> uVal;
@@ -205,7 +209,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported property value by plugin: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS) == 0 ||
+                   key.compare(CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS) == 0) {
             if (val.compare(PluginConfigParams::YES) == 0) {
                 nv12_two_inputs = true;
             } else if (val.compare(PluginConfigParams::NO) == 0) {
@@ -221,7 +226,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
             } else {
                 IE_THROW(NotFound) << "Unsupported KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS flag value: " << val;
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_MAX_NUM_THREADS) == 0) {
             int max_threads = std::max(1, static_cast<int>(std::thread::hardware_concurrency()));
             try {
                 int val_i = std::stoi(val);
@@ -231,17 +236,17 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
                     n_threads = val_i;
                 }
             } catch (const std::exception&) {
-                IE_THROW() << "Wrong value for property key " << CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS << ": " << val
+                IE_THROW() << "Wrong value for property key " << GPUConfigParams::KEY_GPU_MAX_NUM_THREADS << ": " << val
                                    << "\nSpecify the number of threads use for build as an integer."
                                    << "\nOut of range value will be set as a default value, maximum concurrent threads.";
             }
-        } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING) == 0) {
+        } else if (key.compare(GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING) == 0) {
             if (val.compare(PluginConfigParams::YES) == 0) {
                 enable_loop_unrolling = true;
             } else if (val.compare(PluginConfigParams::NO) == 0) {
                 enable_loop_unrolling = false;
             } else {
-                IE_THROW(ParameterMismatch) << "Unsupported KEY_CLDNN_ENABLE_LOOP_UNROLLING flag value: " << val;
+                IE_THROW(ParameterMismatch) << "Unsupported KEY_GPU_ENABLE_LOOP_UNROLLING flag value: " << val;
             }
         } else {
             IE_THROW(NotFound) << "Unsupported property key by plugin: " << key;
@@ -297,6 +302,7 @@ void Config::adjustKeyMapValues() {
         default: break;
         }
         key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY] = qp;
+        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY] = qp;
     }
     {
         std::string qt = "0";
@@ -307,6 +313,7 @@ void Config::adjustKeyMapValues() {
         default: break;
         }
         key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE] = qt;
+        key_config_map[GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE] = qt;
     }
     {
         std::string tm = PluginConfigParams::TUNING_DISABLED;
@@ -328,11 +335,13 @@ void Config::adjustKeyMapValues() {
     key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
     key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
     key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = "";
-    key_config_map[CLDNNConfigParams::KEY_CLDNN_MAX_NUM_THREADS] = std::to_string(n_threads);
+    key_config_map[GPUConfigParams::KEY_GPU_MAX_NUM_THREADS] = std::to_string(n_threads);
 
     if (enable_loop_unrolling)
-        key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES;
+        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::YES;
     else
-        key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO;
+        key_config_map[GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING] = PluginConfigParams::NO;
 }
+IE_SUPPRESS_DEPRECATED_END
+
 }  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 86b9f2e4b95..171919a8077 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -79,7 +79,7 @@
 #include "cldnn_executable_network.h"
 #include "cldnn_custom_layer.h"
 #include "cldnn_itt.h"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 #ifdef __linux__
 # include <dlfcn.h>
diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
index c2289fa9fb0..5191da35c2e 100644
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@@ -16,7 +16,6 @@
 #include "cldnn_itt.h"
 
 #include <description_buffer.hpp>
-#include <cldnn/cldnn_config.hpp>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include "cldnn_async_infer_request.h"
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
index 04d40c9815d..1f835d8ac2c 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@@ -16,7 +16,6 @@
 #include "cldnn_graph.h"
 #include "simple_math.h"
 #include <description_buffer.hpp>
-#include <cldnn/cldnn_config.hpp>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include <fstream>
diff --git a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
index e6415688de2..6837c0b84c3 100644
--- a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -9,7 +9,7 @@
 
 #include <ie_compound_blob.h>
 
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <remote_blob_tests/remote_blob_helpers.hpp>
 #include <common_test_utils/test_common.hpp>
 #include <functional_test_utils/plugin_cache.hpp>
@@ -175,7 +175,7 @@ TEST_P(BatchedBlob_Test, canInputNV12) {
 
     /* XXX: is it correct to set KEY_CLDNN_NV12_TWO_INPUTS in case of remote blob? */
     auto exec_net_b = ie.LoadNetwork(net_remote, CommonTestUtils::DEVICE_GPU,
-                { { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, PluginConfigParams::YES} });
+                { { GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, PluginConfigParams::YES} });
     auto inf_req_remote = exec_net_b.CreateInferRequest();
     auto cldnn_context = exec_net_b.GetContext();
     cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
index e21d610db56..a8c039e4391 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp
@@ -4,6 +4,7 @@
 
 #include "behavior/config.hpp"
 #include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
@@ -12,6 +13,7 @@ namespace {
             InferenceEngine::Precision::FP16
     };
 
+    IE_SUPPRESS_DEPRECATED_START
     const std::vector<std::map<std::string, std::string>> inconfigs = {
             {{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, "OFF"}},
             {{InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, "ON"}},
@@ -46,6 +48,7 @@ namespace {
             {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_GPU},
                     {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}}
     };
+    IE_SUPPRESS_DEPRECATED_END
 
     INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
             ::testing::Combine(
@@ -73,6 +76,29 @@ namespace {
             {}
     };
 
+    IE_SUPPRESS_DEPRECATED_START
+    const std::vector<std::map<std::string, std::string>> conf_gpu = {
+            // Deprecated
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::NO}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE, "0"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE, "1"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY, "0"}},
+            {{InferenceEngine::CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY, "1"}},
+
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_NV12_TWO_INPUTS, InferenceEngine::PluginConfigParams::NO}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, "0"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, "0"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_PLUGIN_PRIORITY, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, "1"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_MAX_NUM_THREADS, "4"}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, InferenceEngine::PluginConfigParams::YES}},
+            {{InferenceEngine::GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, InferenceEngine::PluginConfigParams::NO}},
+    };
+    IE_SUPPRESS_DEPRECATED_END
+
     const std::vector<std::map<std::string, std::string>> multiconf = {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}}
     };
@@ -92,6 +118,13 @@ namespace {
                 ::testing::ValuesIn(conf)),
             CorrectConfigAPITests::getTestCaseName);
 
+    INSTANTIATE_TEST_CASE_P(smoke_GPU_BehaviorTests, CorrectConfigAPITests,
+            ::testing::Combine(
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                ::testing::ValuesIn(conf_gpu)),
+            CorrectConfigAPITests::getTestCaseName);
+
     INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, CorrectConfigAPITests,
             ::testing::Combine(
                     ::testing::ValuesIn(netPrecisions),
@@ -142,4 +175,4 @@ namespace {
                             IncorrectConfigAPITests::getTestCaseName);
 
 
-} // namespace
\ No newline at end of file
+} // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
index 3765c75864f..68b23831e47 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
@@ -11,7 +11,7 @@
 #endif
 #include "gpu/gpu_context_api_ocl.hpp"
 
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
index e15ea827caa..59f4dd21677 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_input.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/infer_request_input.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
index 6c38f5c841c..1135f6d9f7d 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request_output.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/infer_request_output.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
index 51979116646..729bf57c64a 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "behavior/test_plugin.hpp"
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 
 using namespace BehaviorTestsDefinitions;
 namespace {
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
index da308c032e2..4fffb2cad6e 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
@@ -4,7 +4,7 @@
 
 #include <string>
 #include <vector>
-#include "cldnn/cldnn_config.hpp"
+#include "gpu/gpu_config.hpp"
 #include "multi/multi_remote_blob_tests.hpp"
 #include "common_test_utils/test_constants.hpp"
 
diff --git a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
index aae2e0db8fa..bce1ef10691 100644
--- a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/tensor_iterator.cpp
@@ -9,7 +9,7 @@
 #include <map>
 #include <vector>
 #include <ngraph/op/util/attr_types.hpp>
-#include <cldnn/cldnn_config.hpp>
+#include <gpu/gpu_config.hpp>
 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
 #include "common_test_utils/test_constants.hpp"
 #include "ie_api.h"
@@ -289,8 +289,8 @@ namespace {
                 InferenceEngine::Precision::FP16,
             }), // precision
             ::testing::ValuesIn(std::vector<Config> {
-                {CommonTestUtils::DEVICE_GPU, {{CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING, PluginConfigParams::YES}}},
-                {CommonTestUtils::DEVICE_GPU, {{CLDNNConfigParams::KEY_CLDNN_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}}}
+                {CommonTestUtils::DEVICE_GPU, {{GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::YES}}},
+                {CommonTestUtils::DEVICE_GPU, {{GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}}}
             })), // configuration
         TensorIteratorWithConfigTest::getTestCaseName);
 }  // namespace
diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py
index 29aff45742e..26ef6246f0c 100644
--- a/tools/benchmark/main.py
+++ b/tools/benchmark/main.py
@@ -152,7 +152,7 @@ def run(args):
                 if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name:
                     logger.warning("Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint, " +
                                    "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)")
-                    config[device]['CLDNN_PLUGIN_THROTTLE'] = '1'
+                    config[device]['GPU_PLUGIN_THROTTLE'] = '1'
             elif device == MYRIAD_DEVICE_NAME:
                 set_throughput_streams()
                 config[device]['LOG_LEVEL'] = 'LOG_INFO'

From 2c3abf8f42e9a47d9e701779d333a732bbb24f68 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Wed, 9 Jun 2021 10:00:20 +0300
Subject: [PATCH 36/41] Use int64 for TopK indices output (#6085)

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 model-optimizer/extensions/front/onnx/top_k_ext.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/model-optimizer/extensions/front/onnx/top_k_ext.py b/model-optimizer/extensions/front/onnx/top_k_ext.py
index bae2bec3176..e073c593531 100644
--- a/model-optimizer/extensions/front/onnx/top_k_ext.py
+++ b/model-optimizer/extensions/front/onnx/top_k_ext.py
@@ -1,6 +1,8 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import numpy as np
+
 from extensions.ops.topk import TopK
 from mo.front.extractor import FrontExtractorOp
 from mo.front.onnx.extractors.utils import onnx_attr, onnx_node_has_attr
@@ -18,7 +20,8 @@ class TopKExtractor(FrontExtractorOp):
         TopK-11 (k as input, sorting manipulations through `sorted` and `largest` attrs)
         """
         attrs = {
-            'axis': onnx_attr(node, 'axis', 'i', default=-1)
+            'axis': onnx_attr(node, 'axis', 'i', default=-1),
+            'index_element_type': np.int64
         }
         if onnx_node_has_attr(node, 'k'):
             attrs['k'] = onnx_attr(node, 'k', 'i')

From 6e2d13937aea1b6a694767b49bb4ac8508f306e8 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 9 Jun 2021 10:09:25 +0300
Subject: [PATCH 37/41] ImportNetwork with explicit device name only (#5689)

* Import with explicit name

* Fixed LoadHetero_MultiArchs tests

* Fixed MYRIAD tests on Windows

* Fixed compilation in tests

* Updated tesets

* Fixed test

* Removed useless lines

* Removed custom VPU tests, replaced with common ones

* Fixed Windows

* Reverted SKIP_IF_NOT_IMPLEMENTED macro
---
 docs/IE_PLUGIN_DG/ExecutableNetwork.md        |  11 +-
 docs/IE_PLUGIN_DG/Plugin.md                   |  10 +-
 .../src/template_executable_network.cpp       |   8 +-
 .../src/template_executable_network.hpp       |   2 +-
 docs/template_plugin/src/template_plugin.cpp  |   8 +-
 docs/template_plugin/src/template_plugin.hpp  |   2 +-
 inference-engine/include/ie_core.hpp          |  11 +-
 .../hetero_executable_network.cpp             |   2 +-
 .../hetero_executable_network.hpp             |   2 +-
 .../src/hetero_plugin/hetero_plugin.cpp       |   9 +-
 .../src/hetero_plugin/hetero_plugin.hpp       |   5 +-
 .../ie_iexecutable_network_internal.cpp       |  21 +-
 .../interface/ie_iplugin_internal.cpp         |  43 +-
 .../src/inference_engine/ie_core.cpp          |  52 +--
 .../ie_iexecutable_network_internal.hpp       |   8 -
 .../interface/ie_iplugin_internal.hpp         |  27 +-
 inference-engine/src/plugin_api/ie_icore.hpp  |  12 -
 .../myriad_plugin/myriad_executable_network.h |  10 -
 .../src/vpu/myriad_plugin/myriad_plugin.cpp   |  14 -
 .../src/vpu/myriad_plugin/myriad_plugin.h     |   4 -
 .../inference_engine/caching_test.cpp         | 377 +++++++++---------
 .../skip_tests_config.cpp                     |   2 +
 .../skip_tests_config.cpp                     |   2 +
 .../behavior/core_integration.cpp             |  35 +-
 .../include/behavior/core_integration.hpp     |  49 ++-
 .../impl/mock_inference_plugin_internal.hpp   |   7 +-
 .../mock_iexecutable_network_internal.hpp     |   4 -
 .../mocks/mock_engine/mock_plugin.cpp         |  10 +-
 .../mocks/mock_engine/mock_plugin.hpp         |   8 +-
 .../cpp_interfaces/ie_plugin_test.cpp         |  23 --
 30 files changed, 316 insertions(+), 462 deletions(-)

diff --git a/docs/IE_PLUGIN_DG/ExecutableNetwork.md b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
index c5bfd889857..ae82b05e4ed 100644
--- a/docs/IE_PLUGIN_DG/ExecutableNetwork.md
+++ b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
@@ -49,20 +49,15 @@ The function accepts a const shared pointer to `ngraph::Function` object and per
 
 This constructor creates a backend specific graph by importing from a stream object:
 
-> **NOTE**: The export of backend specific graph is done in the `ExportImpl` method, and data formats must be the same for both import and export.
+> **NOTE**: The export of backend specific graph is done in the `Export` method, and data formats must be the same for both import and export.
 
 @snippet src/template_executable_network.cpp executable_network:ctor_import_stream
 
-### `ExportImpl()`
-
-**Implementation details:**   
-Base InferenceEngine::ExecutableNetworkThreadSafeDefault class implements the public InferenceEngine::ExecutableNetworkThreadSafeDefault::Export method as following:
-- Writes `_plugin->GetName()` to the `model` stream.
-- Calls the `ExportImpl` method defined in a derived class to dump a backend specific graph.
+### `Export()`
 
 The implementation of the method should write all data to the `model` stream, which is required to import a backend specific graph later in the `Plugin::Import` method:
 
-@snippet src/template_executable_network.cpp executable_network:export_impl
+@snippet src/template_executable_network.cpp executable_network:export
 
 ### `CreateInferRequest()`
 
diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md
index cadc8660fd3..6003eb691fc 100644
--- a/docs/IE_PLUGIN_DG/Plugin.md
+++ b/docs/IE_PLUGIN_DG/Plugin.md
@@ -159,21 +159,13 @@ The snippet below provides an example of the implementation for `GetMetric`:
 
 > **NOTE**: If an unsupported metric key is passed to the function, it must throw an exception.
 
-### `ImportNetworkImpl()`
+### `ImportNetwork()`
 
 The importing network mechanism allows to import a previously exported backend specific graph and wrap it 
 using an [ExecutableNetwork](@ref executable_network) object. This functionality is useful if 
 backend specific graph compilation takes significant time and/or cannot be done on a target host 
 device due to other reasons.
 
-**Implementation details:** The base plugin class InferenceEngine::IInferencePlugin implements InferenceEngine::IInferencePlugin::ImportNetwork 
-as follows: exports a device type (InferenceEngine::IInferencePlugin::_pluginName) and then calls `ImportNetworkImpl`, 
-which is implemented in a derived class. 
-If a plugin cannot use the base implementation InferenceEngine::IInferencePlugin::ImportNetwork, it can override base 
-implementation and define an output blob structure up to its needs. This 
-can be useful if a plugin exports a blob in a special format for integration with other frameworks 
-where a common Inference Engine header from a base class implementation is not appropriate. 
-
 During export of backend specific graph using `ExecutableNetwork::Export`, a plugin may export any 
 type of information it needs to import a compiled graph properly and check its correctness. 
 For example, the export information may include:
diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp
index e46bd63e5a0..4aba4622e50 100644
--- a/docs/template_plugin/src/template_executable_network.cpp
+++ b/docs/template_plugin/src/template_executable_network.cpp
@@ -175,9 +175,9 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const st
 }
 // ! [executable_network:get_metric]
 
-// ! [executable_network:export_impl]
-void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) {
-    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::ExportImpl");
+// ! [executable_network:export]
+void TemplatePlugin::ExecutableNetwork::Export(std::ostream& modelStream) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::Export");
 
     // Note: custom ngraph extensions are not supported
     std::map<std::string, ngraph::OpSet> custom_opsets;
@@ -198,4 +198,4 @@ void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) {
 
     // TODO: implement network precision, layout, preprocessing info serialization
 }
-// ! [executable_network:export_impl]
+// ! [executable_network:export]
diff --git a/docs/template_plugin/src/template_executable_network.hpp b/docs/template_plugin/src/template_executable_network.hpp
index ca3bca11ba8..a68df02f958 100644
--- a/docs/template_plugin/src/template_executable_network.hpp
+++ b/docs/template_plugin/src/template_executable_network.hpp
@@ -30,7 +30,7 @@ public:
 
     // Methods from a base class ExecutableNetworkThreadSafeDefault
 
-    void ExportImpl(std::ostream& model) override;
+    void Export(std::ostream& model) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                        InferenceEngine::OutputsDataMap networkOutputs) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp
index 87a509c8a77..a0f7a30ee17 100644
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@@ -95,14 +95,14 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(cons
 }
 // ! [plugin:load_exe_network_impl]
 
-// ! [plugin:import_network_impl]
-InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetworkImpl(std::istream& modelStream, const std::map<std::string, std::string>& config) {
-    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetworkImpl");
+// ! [plugin:import_network]
+InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& modelStream, const std::map<std::string, std::string>& config) {
+    OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork");
 
     auto fullConfig = Configuration {config, _cfg};
     return std::make_shared<ExecutableNetwork>(modelStream, fullConfig, std::static_pointer_cast<Plugin>(shared_from_this()));
 }
-// ! [plugin:import_network_impl]
+// ! [plugin:import_network]
 
 // ! [plugin:query_network]
 InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) const {
diff --git a/docs/template_plugin/src/template_plugin.hpp b/docs/template_plugin/src/template_plugin.hpp
index ef2b506d497..71c37410ea7 100644
--- a/docs/template_plugin/src/template_plugin.hpp
+++ b/docs/template_plugin/src/template_plugin.hpp
@@ -28,7 +28,7 @@ public:
     void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override;
     InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
     InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
-    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& model, const std::map<std::string, std::string>& config) override;
 
 private:
     friend class ExecutableNetwork;
diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp
index e87f8c65719..96f8d6b58af 100644
--- a/inference-engine/include/ie_core.hpp
+++ b/inference-engine/include/ie_core.hpp
@@ -174,9 +174,18 @@ public:
      * operation*
      * @return An executable network reference
      */
-    ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName = {},
+    ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                     const std::map<std::string, std::string>& config = {});
 
+    /**
+     * @deprecated Use Core::ImportNetwork with explicit device name
+     * @brief Creates an executable network from a previously exported network
+     * @param networkModel network model stream
+     * @return An executable network reference
+     */
+    INFERENCE_ENGINE_DEPRECATED("Use Core::ImportNetwork with explicit device name")
+    ExecutableNetwork ImportNetwork(std::istream& networkModel);
+
     /**
      * @brief Creates an executable network from a previously exported network within a specified
      * remote context.
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
index 58fb35111af..9f0135aa25e 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
@@ -550,7 +550,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
     this->SetPointerToPlugin(_heteroPlugin->shared_from_this());
 }
 
-void HeteroExecutableNetwork::ExportImpl(std::ostream& heteroModel) {
+void HeteroExecutableNetwork::Export(std::ostream& heteroModel) {
     pugi::xml_document doc;
     auto heteroNode = doc.append_child("hetero");
     heteroNode.append_attribute("name").set_value(_name.c_str());
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
index 85fc8d9c19c..59574ca2ce7 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
@@ -56,7 +56,7 @@ public:
 
     InferenceEngine::Parameter GetMetric(const std::string &name) const override;
 
-    void ExportImpl(std::ostream& modelFile) override;
+    void Export(std::ostream& modelFile) override;
 
 private:
     void InitCNNImpl(const InferenceEngine::CNNNetwork&    network);
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.cpp b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
index 1d8647716af..09986b1e48e 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
@@ -57,13 +57,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(cons
     return std::make_shared<HeteroExecutableNetwork>(network, mergeConfigs(_config, config), this);
 }
 
-InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetworkImpl(std::istream& heteroModel, const Configs& config) {
-    if (GetCore() == nullptr) {
-        IE_THROW() << "Please, work with HETERO device via InferencEngine::Core object";
-    }
-
-    return std::make_shared<HeteroExecutableNetwork>(heteroModel,
-        mergeConfigs(_config, config), this);
+InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istream& heteroModel, const std::map<std::string, std::string>& config) {
+    return std::make_shared<HeteroExecutableNetwork>(heteroModel, mergeConfigs(_config, config), this);
 }
 
 Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& config, const std::string & deviceName) const {
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.hpp b/inference-engine/src/hetero_plugin/hetero_plugin.hpp
index 2b5a93b829b..fbc602116d1 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.hpp
@@ -37,10 +37,11 @@ public:
     InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string,
                                          InferenceEngine::Parameter> & options) const override;
 
-    InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
+    InferenceEngine::IExecutableNetworkInternal::Ptr
+    ImportNetwork(std::istream& heteroModel, const std::map<std::string, std::string>& config) override;
 
     DeviceMetaInformationMap GetDevicePlugins(const std::string& targetFallback,
-        const Configs & localConfig) const;
+                                              const Configs & localConfig) const;
 
 private:
     Configs GetSupportedConfig(const Configs& config, const std::string & deviceName) const;
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
index bf3086551c1..6b5bb34c970 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
@@ -49,19 +49,17 @@ std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRe
 }
 
 void IExecutableNetworkInternal::Export(const std::string& modelFileName) {
-    // we need to write to stringstream first
-    // because in case of exception in ExportImpl the file is not created
-    std::stringstream strm;
-    ExportImpl(strm);
-    std::ofstream(modelFileName.c_str()) << strm.rdbuf();
+    std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
+
+    if (modelFile.is_open()) {
+        Export(modelFile);
+    } else {
+        IE_THROW() << "The " << modelFileName << " file can not be opened for Export";
+    }
 }
 
 void IExecutableNetworkInternal::Export(std::ostream& networkModel) {
-    std::stringstream strm;
-    strm.write(exportMagic.data(), exportMagic.size());
-    strm << _plugin->GetName() << std::endl;
-    ExportImpl(strm);
-    networkModel << strm.rdbuf();
+    IE_THROW(NotImplemented);
 }
 
 CNNNetwork IExecutableNetworkInternal::GetExecGraphInfo() {
@@ -97,7 +95,4 @@ std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRe
     IE_THROW(NotImplemented);
 }
 
-void IExecutableNetworkInternal::ExportImpl(std::ostream&) {
-    IE_THROW(NotImplemented);
-}
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
index 5637701754e..88599aa78b3 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
@@ -16,24 +16,12 @@
 #include <blob_factory.hpp>
 
 #include <istream>
+#include <fstream>
 #include <map>
 #include <memory>
 #include <string>
 
 namespace InferenceEngine {
-namespace {
-void parsePluginName(std::istream& networkModel) {
-    ExportMagic magic = {};
-    auto currentPos = networkModel.tellg();
-    networkModel.read(magic.data(), magic.size());
-    auto exportedWithName = (exportMagic == magic);
-    if (exportedWithName) {
-        networkModel.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-    } else {
-        networkModel.seekg(currentPos, networkModel.beg);
-    }
-}
-}  // namespace
 
 PreProcessInfo copyPreProcess(const PreProcessInfo& from) {
     PreProcessInfo to = from;
@@ -170,22 +158,26 @@ RemoteContext::Ptr IInferencePlugin::GetDefaultContext(const ParamMap&) {
     IE_THROW(NotImplemented);
 }
 
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(const std::string&,
-                                                                            const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
+std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(const std::string& modelFileName,
+                                                                            const std::map<std::string, std::string>& config) {
+    std::ifstream blobFile(modelFileName, std::ios::binary);
+
+    if (!blobFile.is_open()) {
+        IE_THROW(NetworkNotRead);
+    }
+
+    return ImportNetwork(blobFile, config);
 }
 
 std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
                                                                             const std::map<std::string, std::string>& config) {
-    parsePluginName(networkModel);
-    return ImportNetworkImpl(networkModel, config);
+    IE_THROW(NotImplemented);
 }
 
 std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
                                                                             const std::shared_ptr<RemoteContext>& context,
                                                                             const std::map<std::string, std::string>& config) {
-    parsePluginName(networkModel);
-    return ImportNetworkImpl(networkModel, context, config);
+   IE_THROW(NotImplemented);
 }
 
 void IInferencePlugin::SetCore(ICore* core) {
@@ -213,17 +205,6 @@ std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl
     IE_THROW(NotImplemented);
 }
 
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetworkImpl(std::istream&,
-                                                                                const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
-}
-
-std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetworkImpl(std::istream&,
-                                                                                const std::shared_ptr<RemoteContext>&,
-                                                                                const std::map<std::string, std::string>&) {
-    IE_THROW(NotImplemented);
-}
-
 void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork,
                                          const ConstInputsDataMap& inputs,
                                          const ConstOutputsDataMap& outputs) {
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp
index 28563a29b62..63814215037 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -395,6 +395,7 @@ public:
         opsetNames.insert("opset4");
         opsetNames.insert("opset5");
         opsetNames.insert("opset6");
+        opsetNames.insert("opset7");
     }
 
     ~Impl() override = default;
@@ -566,18 +567,6 @@ public:
     SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                               const std::map<std::string, std::string>& config) override {
         auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-
-        if (parsed._deviceName.empty()) {
-            ExportMagic magic = {};
-            auto currentPos = networkModel.tellg();
-            networkModel.read(magic.data(), magic.size());
-            auto exportedWithName = (exportMagic == magic);
-            if (exportedWithName) {
-                std::getline(networkModel, parsed._deviceName);
-            }
-            networkModel.seekg(currentPos, networkModel.beg);
-        }
-
         return GetCPPPluginByName(parsed._deviceName).ImportNetwork(networkModel, parsed._config);
     }
 
@@ -1022,18 +1011,6 @@ void Core::AddExtension(const IExtensionPtr& extension) {
 ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName,
                                       const std::map<std::string, std::string>& config) {
     OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
-
-    // TODO: remove once NotImplemented exception is deprecated and not used
-    if (deviceName.find("HETERO") == 0) {
-        IE_THROW() << "HETERO device does not support ImportNetwork";
-    }
-    if (deviceName.find("MULTI") == 0) {
-        IE_THROW() << "MULTI device does not support ImportNetwork";
-    }
-    if (deviceName.find("AUTO") == 0) {
-        IE_THROW() << "AUTO device does not support ImportNetwork";
-    }
-
     auto parsed = parseDeviceNameIntoConfig(deviceName, config);
     auto exec = _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config);
     return { exec, exec };
@@ -1041,10 +1018,33 @@ ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const st
 
 ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                       const std::map<std::string, std::string>& config) {
+    OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
     auto exec = _impl->ImportNetwork(networkModel, deviceName, config);
     return { exec, exec };
 }
 
+ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) {
+    OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork");
+
+    using ExportMagic = std::array<char, 4>;
+    constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}};
+
+    std::string deviceName;
+    ExportMagic magic = {};
+    auto currentPos = networkModel.tellg();
+    networkModel.read(magic.data(), magic.size());
+    if (exportMagic == magic) {
+        std::getline(networkModel, deviceName);
+    } else {
+        IE_THROW() << "Passed compiled stream does not contain device name. "
+            "Please, provide device name manually";
+    }
+    networkModel.seekg(currentPos, networkModel.beg);
+
+    auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {});
+    return { exec, exec };
+}
+
 ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
                                       const RemoteContext::Ptr& context,
                                       const std::map<std::string, std::string>& config) {
@@ -1124,8 +1124,8 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
             IE_THROW()
                 << "You can only GetConfig of the AUTO itself (without devices). "
                    "GetConfig is also possible for the individual devices before creating the AUTO on top.";
-      }
-  }
+        }
+    }
 
     auto parsed = parseDeviceNameIntoConfig(deviceName);
 
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
index 58951410383..1f3eb681e4b 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
@@ -140,14 +140,6 @@ protected:
     virtual std::shared_ptr<IInferRequestInternal> CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                           OutputsDataMap networkOutputs);
 
-    /**
-     * @brief Exports an internal hardware-dependent model to a stream.
-     * @note The function is called from IExecutableNetworkInternal::Export(std::ostream&),
-     * which performs common export first and calls this plugin-dependent implementation after.
-     * @param networkModel A stream to export network to.
-     */
-    virtual void ExportImpl(std::ostream& networkModel);
-
     InferenceEngine::InputsDataMap _networkInputs;  //!< Holds information about network inputs info
     InferenceEngine::OutputsDataMap _networkOutputs;  //!< Holds information about network outputs data
 
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
index 22c2f7e1c08..56e3e1cf5f7 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -286,29 +286,12 @@ protected:
                                                                            const std::map<std::string, std::string>& config);
 
     /**
-     * @brief Creates an executable network from an previously exported network
-     * @note The function is called from
-     * IInferencePlugin::ImportNetwork(std::istream&, const RemoteContext::Ptr&, const std::map<std::string, std::string>&)
-     * performs common steps first and calls this plugin-dependent implementation after.
-     * @param networkModel Reference to network model output stream
-     * @param config A string -> string map of parameters
-     * @return An Executable network
+     * @brief Set input and output information to executable network. This method is used to
+     * set addtional information to InferenceEngine::IExecutableNetworkInternal create by device plugin.
+     * @param exeNetwork An executable network object to set information to
+     * @param inputs An input information to set
+     * @param outputs An output information to set
      */
-    virtual std::shared_ptr<IExecutableNetworkInternal> ImportNetworkImpl(std::istream& networkModel,
-                                                                          const std::map<std::string, std::string>& config);
-
-    /**
-     * @brief Imports network wit RemoteContext
-     * @param networkModel Reference to network model output stream
-     * @param context - a pointer to plugin context derived from RemoteContext class used to
-     *        execute the network
-     * @param config A string -> string map of parameters
-     * @return An Executable network
-     */
-    virtual std::shared_ptr<IExecutableNetworkInternal> ImportNetworkImpl(std::istream& networkModel,
-                                                                          const std::shared_ptr<RemoteContext>& context,
-                                                                          const std::map<std::string, std::string>& config);
-
     void SetExeNetworkInfo(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork,
                            const ConstInputsDataMap& inputs,
                            const ConstOutputsDataMap& outputs);
diff --git a/inference-engine/src/plugin_api/ie_icore.hpp b/inference-engine/src/plugin_api/ie_icore.hpp
index 70d7aaff3a5..fb4ac0b3423 100644
--- a/inference-engine/src/plugin_api/ie_icore.hpp
+++ b/inference-engine/src/plugin_api/ie_icore.hpp
@@ -141,18 +141,6 @@ public:
     virtual ~ICore() = default;
 };
 
-/**
- * @brief Type of magic value
- * @ingroup ie_dev_api_plugin_api
- */
-using ExportMagic = std::array<char, 4>;
-
-/**
- * @brief Magic number used by ie core to identify exported network with plugin name
- * @ingroup ie_dev_api_plugin_api
- */
-constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}};
-
 /**
  * @private
  */
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
index 8a6c42c4e97..22824ee5ec1 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
@@ -93,16 +93,6 @@ public:
         model.write(_graphBlob.data(), _graphBlob.size());
     }
 
-    void Export(const std::string &modelFileName) override {
-        std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
-
-        if (modelFile.is_open()) {
-            Export(modelFile);
-        } else {
-            IE_THROW() << "The " << modelFileName << " file can not be opened for export";
-        }
-    }
-
     ie::Parameter GetMetric(const std::string &name) const override;
 
     ie::CNNNetwork GetExecGraphInfo() override;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
index f61b9fbf7fb..75e7ef395d9 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@@ -151,20 +151,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
     return executableNetwork;
 }
 
-InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(
-        const std::string& modelFileName,
-        const std::map<std::string, std::string>& config) {
-    VPU_PROFILE(ImportNetwork);
-
-    std::ifstream blobFile(modelFileName, std::ios::binary);
-
-    if (!blobFile.is_open()) {
-        IE_THROW(NetworkNotRead);
-    }
-
-    return ImportNetwork(blobFile, config);
-}
-
 InferenceEngine::Parameter Engine::GetMetric(const std::string& name,
                                      const std::map<std::string, InferenceEngine::Parameter> & options) const {
     const auto mvnc = _mvnc;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
index 07349f637e2..9fb074b5ac1 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
@@ -37,10 +37,6 @@ public:
 
     using ie::IInferencePlugin::ImportNetwork;
 
-    ie::IExecutableNetworkInternal::Ptr ImportNetwork(
-            const std::string& modelFileName,
-            const std::map<std::string, std::string>& config) override;
-
     ie::IExecutableNetworkInternal::Ptr ImportNetwork(
             std::istream& model,
             const std::map<std::string, std::string>& config) override;
diff --git a/inference-engine/tests/functional/inference_engine/caching_test.cpp b/inference-engine/tests/functional/inference_engine/caching_test.cpp
index dd19dd3815d..cad8bd4428e 100644
--- a/inference-engine/tests/functional/inference_engine/caching_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/caching_test.cpp
@@ -43,17 +43,18 @@ enum class TestLoadType {
     EContext,
     EModelName
 };
+
 using TestParam = std::tuple<TestLoadType, std::string, bool>;
 
 //  GCC4.8 limitation: have to specify type of each element in list
 static const std::vector<TestParam> loadVariants = {
-        TestParam { TestLoadType::ECNN, std::string("ByCNNNetwork"), false },
-        TestParam { TestLoadType::EContext, std::string("ByRemoteContext"), true },
-        TestParam { TestLoadType::EModelName, std::string("ByModelName"), false },
+    TestParam { TestLoadType::ECNN, std::string("ByCNNNetwork"), false },
+    TestParam { TestLoadType::EContext, std::string("ByRemoteContext"), true },
+    TestParam { TestLoadType::EModelName, std::string("ByModelName"), false },
 };
 
 static const std::vector<std::string> cacheFolders {
-        std::string("testCache"),
+    std::string("testCache"),
 };
 
 std::string getTestCaseName(const testing::TestParamInfo<std::tuple<TestParam, std::string>> &obj) {
@@ -100,12 +101,12 @@ public:
 
     MOCK_CONST_METHOD0(OnLoadNetworkFromFile, void(void));
 
-    MOCK_METHOD2(ImportNetworkImpl, std::shared_ptr<IExecutableNetworkInternal>(std::istream& networkModel,
-                                                                                const std::map<std::string, std::string>& config));
+    MOCK_METHOD2(ImportNetwork, IExecutableNetworkInternal::Ptr(std::istream& networkModel,
+                                                               const std::map<std::string, std::string>& config));
 
-    MOCK_METHOD3(ImportNetworkImpl, std::shared_ptr<IExecutableNetworkInternal>(std::istream& networkModel,
-                                                                                const RemoteContext::Ptr& context,
-                                                                                const std::map<std::string, std::string>& config));
+    MOCK_METHOD3(ImportNetwork, IExecutableNetworkInternal::Ptr(std::istream& networkModel,
+                                                                const RemoteContext::Ptr& context,
+                                                                const std::map<std::string, std::string>& config));
 
     MOCK_CONST_METHOD2(QueryNetwork, QueryNetworkResult(const CNNNetwork& network,
                                                         const std::map<std::string, std::string>& config));
@@ -120,7 +121,7 @@ class MockExecutableNetwork : public IExecutableNetworkInternal {
 
 public:
     MockExecutableNetwork() {}
-    MOCK_METHOD1(ExportImpl, void(std::ostream& networkModel));
+    MOCK_METHOD1(Export, void(std::ostream& networkModel));
     MOCK_METHOD0(CreateInferRequest, IInferRequestInternal::Ptr());
     MOCK_CONST_METHOD0(GetInputsInfo, ConstInputsDataMap());
     MOCK_CONST_METHOD0(GetOutputsInfo, ConstOutputsDataMap());
@@ -130,10 +131,10 @@ public:
     MOCK_METHOD1(setNetworkInputs, void(const InputsDataMap& networkInputs));
     MOCK_METHOD1(setNetworkOutputs, void(const OutputsDataMap& networkOutputs));
 
-    void Export(std::ostream& networkModel) override {
-        std::lock_guard<std::mutex> guard(m_pluginMutex);
-        IExecutableNetworkInternal::Export(networkModel);
-    }
+    // void Export(std::ostream& networkModel) override {
+    //     std::lock_guard<std::mutex> guard(m_pluginMutex);
+    //     IExecutableNetworkInternal::Export(networkModel);
+    // }
 
     void SetPointerToPlugin(const IInferencePlugin::Ptr& plugin) override {
         std::lock_guard<std::mutex> guard(m_pluginMutex);
@@ -323,13 +324,13 @@ private:
         ON_CALL(plugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).
                 WillByDefault(Return("mock"));
 
-        ON_CALL(plugin, ImportNetworkImpl(_, _, _)).
+        ON_CALL(plugin, ImportNetwork(_, _, _)).
                 WillByDefault(Invoke([&](std::istream &istr, RemoteContext::Ptr,
                                          const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
 
-        ON_CALL(plugin, ImportNetworkImpl(_, _)).
+        ON_CALL(plugin, ImportNetwork(_, _)).
                 WillByDefault(Invoke([&](std::istream &istr, const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
@@ -403,9 +404,9 @@ TEST_P(CachingTest, TestLoad) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -415,9 +416,9 @@ TEST_P(CachingTest, TestLoad) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -426,43 +427,43 @@ TEST_P(CachingTest, TestLoad) {
 }
 
 TEST_P(CachingTest, TestLoadCustomImportExport) {
-    const int customNumber = 1234;
+    const char customData[] = {1, 2, 3, 4, 5};
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).
+    ON_CALL(*mockPlugin, ImportNetwork(_, _, _)).
             WillByDefault(Invoke([&](std::istream& s, RemoteContext::Ptr,
                                      const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).
+    ON_CALL(*mockPlugin, ImportNetwork(_, _)).
             WillByDefault(Invoke([&](std::istream &s, const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*net, ExportImpl(_)).WillByDefault(Invoke([&] (std::ostream& s) {
-        s << customNumber;
+    ON_CALL(*net, Export(_)).WillByDefault(Invoke([&] (std::ostream& s) {
+        s.write(customData, sizeof(customData));
     }));
 
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -472,9 +473,9 @@ TEST_P(CachingTest, TestLoadCustomImportExport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -497,9 +498,9 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunctionWithCfg(ie, {{CUSTOM_KEY, "0"}});
@@ -509,9 +510,9 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunctionWithCfg(ie, {{CUSTOM_KEY, "1"}});
@@ -526,9 +527,9 @@ TEST_P(CachingTest, TestNoCacheEnabled) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             m_testFunction(ie);
         });
@@ -544,9 +545,9 @@ TEST_P(CachingTest, TestNoCacheSupported) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(m_type == TestLoadType::EModelName ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -563,9 +564,9 @@ TEST_P(CachingTest, TestNoCacheMetricSupported) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(m_type == TestLoadType::EModelName ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -652,8 +653,8 @@ TEST_P(CachingTest, TestNoCacheEnabled_cacheDirConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
         testLoad([&](Core &ie) {
             m_testFunction(ie);
         });
@@ -667,9 +668,9 @@ TEST_P(CachingTest, TestLoadChangeCacheDir) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -681,9 +682,9 @@ TEST_P(CachingTest, TestLoadChangeCacheDir) {
         MkDirGuard dir(newCacheDir);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), newCacheDir}});
             m_testFunction(ie);
@@ -698,9 +699,9 @@ TEST_P(CachingTest, TestClearCacheDir) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), ""}});
@@ -716,9 +717,9 @@ TEST_P(CachingTest, TestChangeOtherConfig) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"someKey", "someValue"}});
@@ -735,9 +736,9 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -747,9 +748,9 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_ANY_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir + "/" + longName}}));
@@ -769,9 +770,9 @@ TEST_P(CachingTest, TestCacheDirCreateRecursive) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), newCacheDir3}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -797,9 +798,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.0";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -810,9 +811,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.1";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -822,9 +823,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.50";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -835,9 +836,9 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.51";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -856,9 +857,9 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.0";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -869,9 +870,9 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             deviceToLoad = "mock.50";
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -887,9 +888,9 @@ TEST_P(CachingTest, TestThrowOnExport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1).WillOnce(Throw(1));
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1).WillOnce(Throw(1));
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_ANY_THROW(m_testFunction(ie));
@@ -906,9 +907,9 @@ TEST_P(CachingTest, TestThrowOnImport) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -918,13 +919,13 @@ TEST_P(CachingTest, TestThrowOnImport) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
         if (m_remoteContext) {
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(1).WillOnce(Throw(1));
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
         } else {
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-            EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1).WillOnce(Throw(1));
         }
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -933,9 +934,9 @@ TEST_P(CachingTest, TestThrowOnImport) {
     { // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -951,9 +952,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -974,9 +975,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -985,9 +986,9 @@ TEST_P(CachingTest, TestNetworkModified) {
     { // Step 3: same load, should be ok now
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1003,9 +1004,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1021,9 +1022,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     { // Step 2. Cache is corrupted, will be silently removed
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1032,9 +1033,9 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     { // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1050,9 +1051,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1083,9 +1084,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     { // Step 2. Build number mismatch, cache will be silently removed
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1094,9 +1095,9 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     { // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             EXPECT_NO_THROW(ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}));
             EXPECT_NO_THROW(m_testFunction(ie));
@@ -1118,9 +1119,9 @@ TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
     for (int i = 0; i < 2; i++) {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1138,9 +1139,9 @@ TEST_P(CachingTest, LoadHetero_OneDevice) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1152,9 +1153,9 @@ TEST_P(CachingTest, LoadHetero_OneDevice) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1172,9 +1173,9 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1187,9 +1188,9 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1200,20 +1201,20 @@ TEST_P(CachingTest, LoadHetero_TargetFallbackFromCore) {
 
 TEST_P(CachingTest, LoadHetero_MultiArchs) {
     EXPECT_CALL(*mockPlugin, GetMetric(_, _)).Times(AnyNumber());
-    int customNumber = 1234;
-    ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).
+    const char customData[] = {1, 2, 3, 4, 5};
+    ON_CALL(*mockPlugin, ImportNetwork(_, _)).
             WillByDefault(Invoke([&](std::istream &s, const std::map<std::string, std::string> &) {
-        int a;
-        s >> a;
-        EXPECT_EQ(customNumber, a);
+        char a[sizeof(customData)];
+        s.read(a, sizeof(customData));
+        EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
         auto mock = std::make_shared<MockExecutableNetwork>();
         EXPECT_CALL(*mock, GetInputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstInputsDataMap{}));
         EXPECT_CALL(*mock, GetOutputsInfo()).Times(AnyNumber()).WillRepeatedly(Return(ConstOutputsDataMap{}));
         return mock;
     }));
 
-    ON_CALL(*net, ExportImpl(_)).WillByDefault(Invoke([&] (std::ostream& s) {
-        s << customNumber;
+    ON_CALL(*net, Export(_)).WillByDefault(Invoke([&] (std::ostream& s) {
+        s.write(customData, sizeof(customData));
     }));
     EXPECT_CALL(*mockPlugin, QueryNetwork(_, _)).Times(AnyNumber()).WillRepeatedly(
             Invoke([&](const CNNNetwork &network, const std::map<std::string, std::string> &config) {
@@ -1249,9 +1250,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(AtLeast(2)); // for .1 and for .51
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(AtLeast(2)); // for .1 and for .51
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(AtLeast(2)); // for .1 and for .51
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1264,9 +1265,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(AtLeast(2)); // for .2 and for .52
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(AtLeast(2)); // for .2 and for .52
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1276,9 +1277,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(AtLeast(1));
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(AtLeast(1));
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(AtLeast(1));
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             m_testFunction(ie);
@@ -1305,9 +1306,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ie.SetConfig({{"TARGET_FALLBACK", "mock.1"}}, CommonTestUtils::DEVICE_HETERO);
@@ -1318,9 +1319,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(1);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{"TARGET_FALLBACK", "mock.1"}}, CommonTestUtils::DEVICE_HETERO);
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -1330,9 +1331,9 @@ TEST_P(CachingTest, LoadHetero_MultiArchs_TargetFallback_FromCore) {
     {
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{"TARGET_FALLBACK", "mock.51"}}, CommonTestUtils::DEVICE_HETERO);
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
@@ -1366,9 +1367,9 @@ TEST_P(CachingTest, LoadMulti_race) {
 
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(devCount - 1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(devCount - 1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), cacheDir}});
             ASSERT_NO_THROW(m_testFunction(ie));
@@ -1394,9 +1395,9 @@ TEST_P(CachingTest, Load_threads) {
         MkDirGuard guard(cacheDir);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(1);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(THREADS_COUNT - 1);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(1);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(THREADS_COUNT - 1);
+        EXPECT_CALL(*net, Export(_)).Times(1);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), cacheDir}});
             std::vector<std::thread> threads;
@@ -1443,12 +1444,12 @@ TEST_P(CachingTest, LoadMulti_Archs) {
         // Load network from file shall not be called for plugins with caching supported
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(0);
 
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(TEST_DEVICE_MAX_COUNT - 2)
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(TEST_DEVICE_MAX_COUNT - 2)
                 .WillRepeatedly(Invoke([&](std::istream &, const std::map<std::string, std::string> &) {
             return createMockIExecutableNet();
         }));
-        EXPECT_CALL(*net, ExportImpl(_)).Times(2);
+        EXPECT_CALL(*net, Export(_)).Times(2);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ASSERT_NO_THROW(m_testFunction(ie));
@@ -1490,9 +1491,9 @@ TEST_P(CachingTest, LoadMulti_NoCachingOnDevice) {
         // Load network from file shall not be called by Multi plugin for devices with caching supported
         EXPECT_CALL(*mockPlugin, OnLoadNetworkFromFile()).Times(0);
 
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
-        EXPECT_CALL(*net, ExportImpl(_)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, ImportNetwork(_, _)).Times(0);
+        EXPECT_CALL(*net, Export(_)).Times(0);
         testLoad([&](Core &ie) {
             ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
             ExecutableNetwork exeNet;
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index f013e544074..3d4678bf589 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -71,6 +71,8 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*smoke_SetBlobOfKindAUTO.*SetBlobOfKindTest.CompareWithRefs.*)",
         // reference doesn't cover I8, U8 cases. Issue: 55842
         R"(.*Gather7LayerTest.*netPRC=I8.*)",
+        // need to implement Export / Import
+        R"(.*IEClassImportExportTestP.*)"
     };
 #ifdef __APPLE__
         // TODO: Issue 55717
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 02889dd8dde..07bd2a26098 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -57,5 +57,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*LSTMSequence.*CompareWithRefs.*mode=CONVERT_TO_TI_RAND_SEQ_LEN_PARAM_seq.*direction=bidirectional_clip=0.7_netPRC=FP32.*)",
             // TODO: Issue: 54194
             R"(.*ActivationLayerTest.*SoftPlus.*)",
+            // need to implement Export / Import
+            R"(.*IEClassImportExportTestP.*)"
     };
 }
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
index 8edb52954a8..2eaa15ec866 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_integration.cpp
@@ -38,36 +38,7 @@ INSTANTIATE_TEST_CASE_P(
 // IEClassNetworkTestP tests, customized to add SKIP_IF_CURRENT_TEST_IS_DISABLED()
 //
 
-using IEClassNetworkTestP_VPU = IEClassNetworkTestP;
-
-TEST_P(IEClassNetworkTestP_VPU, smoke_ImportNetworkNoThrowWithDeviceName) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED();
-    Core ie;
-    std::stringstream strm;
-    ExecutableNetwork executableNetwork;
-    ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    ASSERT_NO_THROW(executableNetwork.Export(strm));
-    ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-}
-
-TEST_P(IEClassNetworkTestP_VPU, smoke_ExportUsingFileNameImportFromStreamNoThrowWithDeviceName) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED();
-    Core ie;
-    ExecutableNetwork executableNetwork;
-    std::string fileName{"ExportedNetwork"};
-    {
-        ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-        ASSERT_NO_THROW(executableNetwork.Export(fileName));
-    }
-    {
-        std::ifstream strm(fileName);
-        ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    }
-    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-}
-
-using IEClassNetworkTestP_VPU_GetMetric = IEClassNetworkTestP_VPU;
+using IEClassNetworkTestP_VPU_GetMetric = IEClassNetworkTestP;
 
 TEST_P(IEClassNetworkTestP_VPU_GetMetric, smoke_OptimizationCapabilitiesReturnsFP16) {
     Core ie;
@@ -86,13 +57,13 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::ValuesIn(devices));
 
 INSTANTIATE_TEST_CASE_P(
-        smoke_IEClassImportExportTestP, IEClassNetworkTestP_VPU,
+        smoke_IEClassImportExportTestP, IEClassImportExportTestP,
         ::testing::Values(std::string(CommonTestUtils::DEVICE_MYRIAD), "HETERO:" + std::string(CommonTestUtils::DEVICE_MYRIAD)));
 
 #if defined(ENABLE_MKL_DNN) && ENABLE_MKL_DNN
 
 INSTANTIATE_TEST_CASE_P(
-        smoke_IEClassImportExportTestP_HETERO_CPU, IEClassNetworkTestP_VPU,
+        smoke_IEClassImportExportTestP_HETERO_CPU, IEClassImportExportTestP,
         ::testing::Values("HETERO:" + std::string(CommonTestUtils::DEVICE_MYRIAD) + ",CPU"));
 #endif
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
index 834db01006f..adcd0e525b4 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
@@ -61,6 +61,7 @@ namespace BehaviorTestsDefinitions {
     }                                                                           \
 }
 
+
 class IEClassBasicTestP : public ::testing::Test, public WithParamInterface<std::pair<std::string, std::string> > {
 protected:
     std::string deviceName;
@@ -424,7 +425,16 @@ TEST_P(IEClassBasicTestP, ImportNetworkThrows) {
 
     if (deviceName == CommonTestUtils::DEVICE_CPU ||
         deviceName == CommonTestUtils::DEVICE_GPU) {
-        ASSERT_THROW(ie.ImportNetwork("model", deviceName), NotImplemented);
+        ASSERT_THROW(ie.ImportNetwork("model", deviceName), NetworkNotRead);
+
+        const std::string modelName = "compiled_blob.blob";
+        {
+            std::ofstream file(modelName);
+            file << "content";
+        }
+
+        EXPECT_THROW(ie.ImportNetwork(modelName, deviceName), NotImplemented);
+        ASSERT_EQ(0, std::remove(modelName.c_str()));
     }
 }
 
@@ -432,13 +442,13 @@ TEST(IEClassBasicTest, smoke_ImportNetworkHeteroThrows) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Core ie;
 
-    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_HETERO), Exception);
+    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_HETERO), NetworkNotRead);
 }
 
 TEST(IEClassBasicTest, smoke_ImportNetworkMultiThrows) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     InferenceEngine::Core ie;
-    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_MULTI), Exception);
+    ASSERT_THROW(ie.ImportNetwork("model", CommonTestUtils::DEVICE_MULTI), NetworkNotRead);
 }
 
 TEST_P(IEClassBasicTestP, ImportNetworkWithNullContextThrows) {
@@ -474,19 +484,18 @@ TEST_P(IEClassNetworkTestP, LoadNetworkActualHeteroDevice2NoThrow) {
 //
 // ImportExportNetwork
 //
-TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowIfNoDeviceName) {
+
+TEST_P(IEClassImportExportTestP, smoke_ImportNetworkThrowsIfNoDeviceName) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Core ie;
     std::stringstream strm;
     ExecutableNetwork executableNetwork;
     ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(strm));
-    if (!strm.str().empty()) {
-        SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm));
-    }
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.Export(strm));
+
+    IE_SUPPRESS_DEPRECATED_START
+    ASSERT_THROW(executableNetwork = ie.ImportNetwork(strm), Exception);
+    IE_SUPPRESS_DEPRECATED_END
 }
 
 TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowWithDeviceName) {
@@ -495,11 +504,9 @@ TEST_P(IEClassImportExportTestP, smoke_ImportNetworkNoThrowWithDeviceName) {
     std::stringstream strm;
     ExecutableNetwork executableNetwork;
     ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(actualNetwork, deviceName));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(strm));
-    SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm, deviceName));
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.Export(strm));
+    ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
+    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
 }
 
 TEST_P(IEClassImportExportTestP, smoke_ExportUsingFileNameImportFromStreamNoThrowWithDeviceName) {
@@ -509,18 +516,16 @@ TEST_P(IEClassImportExportTestP, smoke_ExportUsingFileNameImportFromStreamNoThro
     std::string fileName{"ExportedNetwork"};
     {
         ASSERT_NO_THROW(executableNetwork = ie.LoadNetwork(simpleNetwork, deviceName));
-        SKIP_IF_NOT_IMPLEMENTED(executableNetwork.Export(fileName));
+        ASSERT_NO_THROW(executableNetwork.Export(fileName));
     }
-    if (CommonTestUtils::fileExists(fileName)) {
+    {
         {
             std::ifstream strm(fileName);
-            SKIP_IF_NOT_IMPLEMENTED(executableNetwork = ie.ImportNetwork(strm, deviceName));
+            ASSERT_NO_THROW(executableNetwork = ie.ImportNetwork(strm, deviceName));
         }
         ASSERT_EQ(0, remove(fileName.c_str()));
     }
-    if (executableNetwork) {
-        ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
-    }
+    ASSERT_NO_THROW(executableNetwork.CreateInferRequest());
 }
 
 //
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
index 2a68e96c19e..82e2ff61f33 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
@@ -32,11 +32,8 @@ public:
     MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
     MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
 
-    using InferenceEngine::IInferencePlugin::ImportNetwork;
-
-    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> ImportNetworkImpl(std::istream& stream,
-                                                                                   const std::map <std::string, std::string>&) {
-        std::getline(stream, importedString);
+    std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
+    ImportNetwork(std::istream& stream, const std::map <std::string, std::string>&) {
         return {};
     }
 
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
index 4e221be90f7..c1cc30a944b 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
@@ -34,8 +34,4 @@ public:
     void WrapOstreamExport(std::ostream& networkModel) {
         IExecutableNetworkInternal::Export(networkModel);
     }
-    const std::string exportString = "MockExecutableNetworkInternal";
-    void ExportImpl(std::ostream& networkModel) override {
-        networkModel << exportString << std::endl;
-    }
 };
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
index cd2e7b95f46..4408614f61c 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
@@ -70,8 +70,8 @@ MockPlugin::LoadExeNetworkImpl(const CNNNetwork& network,
 }
 
 std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-MockPlugin::ImportNetworkImpl(std::istream& networkModel,
-                              const std::map<std::string, std::string>& config) {
+MockPlugin::ImportNetwork(std::istream& networkModel,
+                          const std::map<std::string, std::string>& config) {
     if (_target) {
         return _target->ImportNetwork(networkModel, config);
     } else {
@@ -80,9 +80,9 @@ MockPlugin::ImportNetworkImpl(std::istream& networkModel,
 }
 
 std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-MockPlugin::ImportNetworkImpl(std::istream& networkModel,
-                              const std::shared_ptr<InferenceEngine::RemoteContext>& context,
-                              const std::map<std::string, std::string>& config) {
+MockPlugin::ImportNetwork(std::istream& networkModel,
+                         const std::shared_ptr<InferenceEngine::RemoteContext>& context,
+                         const std::map<std::string, std::string>& config) {
     if (_target) {
         return _target->ImportNetwork(networkModel, context, config);
     } else {
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
index c01a8a8d175..c2654061abd 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
@@ -35,13 +35,13 @@ public:
                 const std::map<std::string, std::string> &config) override;
 
     std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-    ImportNetworkImpl(std::istream& networkModel,
+    ImportNetwork(std::istream& networkModel,
         const std::map<std::string, std::string>& config) override;
 
     std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>
-    ImportNetworkImpl(std::istream& networkModel,
-                      const std::shared_ptr<InferenceEngine::RemoteContext>& context,
-                      const std::map<std::string, std::string>& config) override;
+    ImportNetwork(std::istream& networkModel,
+        const std::shared_ptr<InferenceEngine::RemoteContext>& context,
+        const std::map<std::string, std::string>& config) override;
 
     InferenceEngine::Parameter GetMetric(const std::string& name,
                         const std::map<std::string, InferenceEngine::Parameter>& options) const override;
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
index 0945510d7a0..2d26c7bd0e2 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
@@ -149,29 +149,6 @@ TEST_F(InferenceEnginePluginInternalTest, failToSetNotAllocatedBlob) {
     }
 }
 
-TEST_F(InferenceEnginePluginInternalTest, executableNetworkInternalExportsMagicAndName) {
-    std::stringstream strm;
-    ASSERT_NO_THROW(mockIExeNetworkInternal->WrapOstreamExport(strm));
-    ExportMagic actualMagic = {};
-    strm.read(actualMagic.data(), actualMagic.size());
-    ASSERT_EQ(exportMagic, actualMagic);
-    std::string pluginName;
-    std::getline(strm, pluginName);
-    ASSERT_EQ(pluginId, pluginName);
-    std::string exportedString;
-    std::getline(strm, exportedString);
-    ASSERT_EQ(mockIExeNetworkInternal->exportString, exportedString);
-}
-
-TEST_F(InferenceEnginePluginInternalTest, pluginInternalEraseMagicAndNameWhenImports) {
-    std::stringstream strm;
-    ASSERT_NO_THROW(mockIExeNetworkInternal->WrapOstreamExport(strm));
-    ASSERT_NO_THROW(mock_plugin_impl->ImportNetwork(strm, {}));
-    ASSERT_EQ(mockIExeNetworkInternal->exportString, mock_plugin_impl->importedString);
-    mock_plugin_impl->importedString = {};
-}
-
-
 TEST(InferencePluginTests, throwsOnUninitializedGetVersion) {
     InferencePlugin plg;
     ASSERT_THROW(plg.GetVersion(), Exception);

From cbd48cf15f7c5c0d1779a5e2c492007af318f888 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Wed, 9 Jun 2021 10:12:58 +0300
Subject: [PATCH 38/41] [CPU] Statically found issues on Windows. (#6075)

---
 .../src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp          | 2 ++
 .../src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp   | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
index d226dd73890..678922f3a4b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -42,6 +42,8 @@ MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, co
     addOriginalInputPrecision(inPrc);
     outDims.emplace_back(dims);
     addOriginalOutputPrecision(outPrc);
+
+    errorPrefix = "Convert node with name '" + getName() + "'";
 }
 
 void MKLDNNConvertNode::getSupportedDescriptors() {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
index 2f97bbd2f85..38bebcd5271 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
@@ -58,6 +58,8 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Nod
         if (blockSize == 0)
             THROW_ERROR << "has incorrect block_size parameter is zero!";
 
+        size_t nSpatialDims = inDims[0].ndims() - 2;
+        blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     } else {
         IE_THROW(NotImplemented) << errorMessage;
     }
@@ -74,14 +76,13 @@ void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
-    size_t nSpatialDims = srcDims.size() - 2;
-    blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     if (srcDims[1] % blockStep)
         THROW_ERROR << "has block_size parameter which is incompatible with input tensor channels dimension size";
 
     if (srcDims[1] / blockStep != dstDims[1])
         THROW_ERROR << "has incompatible input/output channels";
 
+    size_t nSpatialDims = srcDims.size() - 2;
     for (size_t i = 0; i < nSpatialDims; ++i) {
         if (srcDims[i + 2] * blockSize != dstDims[i + 2])
             THROW_ERROR << "has incompatible spatial dims";

From 421465adc15521f91f6ea9dbbbb9e0bbeb0ea933 Mon Sep 17 00:00:00 2001
From: iliya mironov <iliya.mironov@intel.com>
Date: Wed, 9 Jun 2021 12:14:39 +0300
Subject: [PATCH 39/41] Add ShapeOfConstFolding transform (#5858)

* Add ShapeOfConstFolding transform

* Add unit tests

* Update bom file

* Update transform file

* Hot fix

* Fix midle replaser

* Update unit tests

* Fix get value

* Refactoring Const Folding transformation. Move to back

* Update bom file

* Remove unuse code

* Add some unit tests

* Refactoring unit test

* Hot fix
---
 model-optimizer/automation/package_BOM.txt    |   1 +
 .../extensions/back/ShapeOfConstFolding.py    |  29 +++
 .../back/ShapeOfConstFolding_test.py          | 170 ++++++++++++++++++
 3 files changed, 200 insertions(+)
 create mode 100644 model-optimizer/extensions/back/ShapeOfConstFolding.py
 create mode 100644 model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py

diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index be0dc1c37e0..e7c779543cd 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -57,6 +57,7 @@ extensions/back/ReverseInputChannels.py
 extensions/back/RNNSequenceTypeRename.py
 extensions/back/ScalarConstNormalize.py
 extensions/back/SelectBroadcast.py
+extensions/back/ShapeOfConstFolding.py
 extensions/back/ShuffleChannelPatternOptimization.py
 extensions/back/ShufflenetReLUReorder.py
 extensions/back/SpecialNodesFinalization.py
diff --git a/model-optimizer/extensions/back/ShapeOfConstFolding.py b/model-optimizer/extensions/back/ShapeOfConstFolding.py
new file mode 100644
index 00000000000..b97b46fc35d
--- /dev/null
+++ b/model-optimizer/extensions/back/ShapeOfConstFolding.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph, rename_nodes
+from mo.ops.const import Const
+
+
+class ShapeOfConstFolding(BackReplacementPattern):
+    """
+    The transformation folds ShapeOf(Const) -> Const
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.back.MatMulNormalizer import SmartReshape_HC_Reshape_MatMul
+        return [SmartReshape_HC_Reshape_MatMul]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for shapeof_node in graph.get_op_nodes(op='ShapeOf'):
+            in_node = shapeof_node.in_port(0).get_source().node
+            if in_node.op == 'Const':
+                shapeof_node.in_port(0).disconnect()
+                shape_name = shapeof_node.soft_get('name', shapeof_node.id)
+                shape_value = shapeof_node.out_port(0).data.get_value()
+                shape_const_node = Const(graph, {'name': shape_name + '/ExecutionConstValue',
+                                                 'value': shape_value}).create_node()
+                shapeof_node.out_port(0).get_connection().set_source(shape_const_node.out_port(0))
+                rename_nodes([(shapeof_node, shape_name + '/TBD'), (shape_const_node, shape_name)])
diff --git a/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py b/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py
new file mode 100644
index 00000000000..562c1416342
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/back/ShapeOfConstFolding_test.py
@@ -0,0 +1,170 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from extensions.back.ShapeOfConstFolding import ShapeOfConstFolding
+from mo.front.common.partial_infer.eltwise import eltwise_infer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph
+
+const_value = np.random.rand(1, 3, 30, 30)
+nodes_attributes = {'input': {'shape': int64_array([1, 3, 30, 30]), 'type': 'Parameter', 'kind': 'op',
+                              'op': 'Parameter'},
+                    'input_data': {'value': None, 'shape': int64_array([1, 3, 30, 30]), 'kind': 'data'},
+                    'const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': const_value},
+                    'const_data': {'kind': 'data', 'value': const_value},
+                    'shapeof_input': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([1, 3, 30, 30])},
+                    'shapeof_input_data': {'value': None, 'shape': None, 'kind': 'data',
+                                           'value': int64_array([1, 3, 30, 30])},
+
+                    'shapeof_const': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([1, 3, 30, 30])},
+                    'shapeof_const_data': {'value': None, 'shape': None, 'kind': 'data',
+                                           'value': int64_array([1, 3, 30, 30])},
+
+                    'mul': {'kind': 'op', 'op': 'Mul', 'infer': lambda node: eltwise_infer(node, lambda a, b: a * b)},
+                    'mul_data': {'kind': 'data', 'value': np.array([1, 9, 900, 900])},
+                    'last': {'kind': 'op', 'op': 'Result'},
+
+                    # new nodes
+                    'new_const_shapeof': {'type': 'Const', 'kind': 'op', 'op': 'Const',
+                                          'value': int64_array([1, 3, 30, 30])}
+                    }
+
+const_value2 = np.random.rand(30, 30)
+nodes_attributes2 = {'input': {'shape': int64_array([1, 3, 30, 30]), 'type': 'Parameter', 'kind': 'op',
+                               'op': 'Parameter'},
+                     'input_data': {'value': None, 'shape': int64_array([1, 3, 30, 30]), 'kind': 'data'},
+
+                     'const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': const_value2},
+                     'const_data': {'kind': 'data', 'value': const_value2},
+
+                     'shapeof_const': {'kind': 'op', 'op': 'ShapeOf', 'value': int64_array([2700, 30])},
+                     'shapeof_const_data': {'value': int64_array([2700, 30]), 'shape': None, 'kind': 'data'},
+
+                     'gather': {'kind': 'op', 'op': 'Gather', 'batch_dims': 0},
+                     'gather_data': {'kind': 'data'},
+
+                     'const_concat': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': [1]},
+                     'const_concat_data': {'kind': 'data', 'value': [1]},
+                     'concat': {'kind': 'op', 'op': 'Concat'},
+                     'concat_data': {'kind': 'data'},
+
+                     'reshape': {'kind': 'op', 'op': 'Reshape'},
+                     'reshape_data': {'kind': 'data'},
+
+                     'matmul': {'kind': 'op', 'op': 'MatMul'},
+                     'matmul_data': {'kind': 'data'},
+                     'last': {'kind': 'op', 'op': 'Result'},
+
+                     # new nodes
+                     'new_const_shapeof': {'type': 'Const', 'kind': 'op', 'op': 'Const',
+                                          'value': int64_array([2700, 30])},
+                     }
+
+
+class ShapeOfConstFoldingTests(unittest.TestCase):
+    def test_const_with_one_output(self):
+        graph = build_graph(nodes_attributes,
+                            [('input', 'input_data'),
+                             ('input_data', 'shapeof_input'),
+                             ('shapeof_input', 'shapeof_input_data'),
+                             ('shapeof_input_data', 'mul'),
+                             ('const', 'const_data'),
+                             ('const_data', 'shapeof_const'),
+                             ('shapeof_const', 'shapeof_const_data'),
+                             ('shapeof_const_data', 'mul'),
+                             ('mul', 'mul_data'),
+                             ('mul_data', 'last')],
+                            {
+                                'input': {'shape': int64_array([1, 3, 30, 30])},
+                                'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                'shapeof_input': {'value': int64_array([1, 3, 30, 30])},
+                                'shapeof_input_data': {'value': int64_array([1, 3, 30, 30])},
+                                'const': {'value': const_value},
+                                'const_data': {'value': const_value},
+                                'shapeof_const': {'value': int64_array([1, 3, 30, 30])},
+                                'shapeof_const_data': {'value': int64_array([1, 3, 30, 30])},
+                                'mul_data': {'value': int64_array([1, 9, 900, 900])},
+                            },
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('input', 'input_data'),
+                                 ('input_data', 'shapeof_input'),
+                                 ('shapeof_input', 'shapeof_input_data'),
+                                 ('shapeof_input_data', 'mul'),
+                                 ('new_const_shapeof', 'shapeof_const_data'),
+                                 ('shapeof_const_data', 'mul'),
+                                 ('mul', 'mul_data'),
+                                 ('mul_data', 'last')],
+                                {
+                                    'input': {'shape': int64_array([1, 3, 30, 30])},
+                                    'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                    'shapeof_input': {'value': int64_array([1, 3, 30, 30])},
+                                    'shapeof_input_data': {'value': int64_array([1, 3, 30, 30])},
+                                    'new_const_shapeof': {'value': int64_array([1, 3, 30, 30])},
+                                    'shapeof_const_data': {'value': int64_array([1, 3, 30, 30])},
+                                    'mul_data': {'value': int64_array([1, 9, 900, 900])},
+                                },
+                                nodes_with_edges_only=True)
+        ShapeOfConstFolding().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    def test_const_with_two_outputs(self):
+        graph = build_graph(nodes_attributes2,
+                            [('input', 'input_data'),
+                             ('input_data', 'reshape'),
+                             ('const', 'const_data'),
+                             ('const_data', 'shapeof_const'),
+                             ('shapeof_const', 'shapeof_const_data'),
+                             ('shapeof_const_data', 'gather'),
+                             ('gather', 'gather_data'),
+                             ('const_concat', 'const_concat_data'),
+                             ('const_concat_data', 'concat'),
+                             ('gather_data', 'concat'),
+                             ('concat', 'reshape'),
+                             ('reshape', 'reshape_data'),
+                             ('reshape_data', 'matmul'),
+                             ('const_data', 'matmul'),
+                             ('matmul', 'matmul_data'),
+                             ('matmul_data', 'last')
+                             ],
+                            {
+                                'input': {'shape': int64_array([1, 3, 30, 30])},
+                                'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                'shapeof_const': {'value': int64_array([2700, 30])},
+                                'shapeof_const_data': {'value': int64_array([2700, 30])},
+                            },
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes2,
+                                [('input', 'input_data'),
+                                 ('input_data', 'reshape'),
+                                 ('new_const_shapeof', 'shapeof_const_data'),
+                                 ('shapeof_const_data', 'gather'),
+                                 ('gather', 'gather_data'),
+                                 ('const_concat', 'const_concat_data'),
+                                 ('const_concat_data', 'concat'),
+                                 ('gather_data', 'concat'),
+                                 ('concat', 'reshape'),
+                                 ('reshape', 'reshape_data'),
+                                 ('reshape_data', 'matmul'),
+                                 ('const', 'const_data'),
+                                 ('const_data', 'matmul'),
+                                 ('matmul', 'matmul_data'),
+                                 ('matmul_data', 'last')],
+                                {
+                                    'input': {'shape': int64_array([1, 3, 30, 30])},
+                                    'input_data': {'shape': int64_array([1, 3, 30, 30])},
+                                    'new_const_shapeof': {'value': int64_array([2700, 30])},
+                                    'shapeof_const_data': {'value': int64_array([2700, 30])},
+                                },
+                                nodes_with_edges_only=True)
+        ShapeOfConstFolding().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)

From aa0c93300cc3ddd00e0cdcf0d10e1c57b16247cf Mon Sep 17 00:00:00 2001
From: Ivan Tikhonov <ivan.tikhonov@intel.com>
Date: Wed, 9 Jun 2021 12:19:36 +0300
Subject: [PATCH 40/41] Update for Python API to create networks with Memory
 layers (#6079)

* Update python API for Memory

* Run unit tests on CPU only

* add missed import

* Update tests

* fix py tests
---
 .../src/openvino/inference_engine/ie_api.pxd  |  2 +-
 .../python/tests/test_InferRequest.py         | 80 ++++++++++++++-----
 ngraph/python/src/pyngraph/function.cpp       | 38 +++++++++
 ngraph/python/tests/test_ngraph/test_basic.py | 22 +++++
 4 files changed, 122 insertions(+), 20 deletions(-)

diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
index 5d942f93050..efb389259d3 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from .cimport ie_api_impl_defs as C
-from .ie_api_impl_defs cimport CBlob, CTensorDesc, InputInfo, CPreProcessChannel, CPreProcessInfo, CExecutableNetwork
+from .ie_api_impl_defs cimport CBlob, CTensorDesc, InputInfo, CPreProcessChannel, CPreProcessInfo, CExecutableNetwork, CVariableState
 
 import os
 
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index 6928944139d..af79c0ff155 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -16,6 +16,20 @@ test_net_xml, test_net_bin = model_path(is_myriad)
 path_to_img = image_path()
 
 
+def create_function_with_memory(input_shape, data_type):
+    import ngraph as ng
+    from ngraph.impl import Function, Type
+
+    input_data = ng.parameter(input_shape, name="input_data", dtype=data_type)
+    rv = ng.read_value(input_data, "var_id_667")
+    add = ng.add(rv, input_data, name="MemoryAdd")
+    node = ng.assign(add, "var_id_667")
+    res = ng.result(add, "res")
+    func = Function(results=[res], sinks=[node], parameters=[input_data], name="name")
+    caps = Function.to_capsule(func)
+    return caps
+
+
 def read_image():
     import cv2
     n, c, h, w = (1, 3, 32, 32)
@@ -525,28 +539,56 @@ def test_resize_algorithm_work(device):
     assert np.allclose(res_1, res_2, atol=1e-2, rtol=1e-2)
 
 
-# issue 56653
-@pytest.mark.skip(reason="Test will enable when nGraph Python API allows to create network with memory")
-def test_query_state(device):
-    import ngraph as ng
-    from ngraph.impl import Function
-    input_data = ng.parameter([5, 7], name="input_data", dtype=np.float32)
-    rv = ng.read_value(input_data, "var_id_667")
-    #a = ng.add(rv, input_data)
-    node = ng.assign(rv, "var_id_667")
-    res = ng.result(rv, "res")
-    func = Function([res], sinks=[node], parameters=[input_data], name='test')
-    caps = Function.to_capsule(func)
+@pytest.mark.parametrize("mode", ["set_init_memory_state", "reset_memory_state", "normal"])
+@pytest.mark.parametrize("data_type", ["FP32", "FP16", "I32"])
+@pytest.mark.parametrize("input_shape", [[10], [10, 10], [10, 10, 10], [2, 10, 10, 10]])
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU",
+                    reason=f"Can't run test on device {os.environ.get('TEST_DEVICE', 'CPU')}, "
+                    "Memory layers fully supported only on CPU")
+def test_query_state_write_buffer(device, input_shape, data_type, mode):
+    ie_core = ie.IECore()
+    if device == "CPU":
+        if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
+            pytest.skip("Can't run on ARM plugin")
 
-    net = ie.IENetwork(caps)
+    layout = ["C", "HW", "CHW", "NCHW"]
+    np_data_type = {"FP32": np.float32, "FP16": np.float16, "I32": np.int32}
+
+    from openvino.inference_engine import TensorDesc, Blob
+
+    net = ie.IENetwork(create_function_with_memory(input_shape, np_data_type[data_type]))
     ie_core = ie.IECore()
     exec_net = ie_core.load_network(network=net, device_name=device, num_requests=1)
     request = exec_net.requests[0]
     mem_states = request.query_state()
     mem_state = mem_states[0]
-    with pytest.raises(ValueError) as e:
-        ones_arr = np.ones(shape=(1, 800), dtype=np.float32)
-        mem_state.state.buffer[:] = ones_arr
-    assert "assignment destination is read-only" in str(e.value)
-    assert mem_state.name == 'id_1'
-    assert mem_state.state.tensor_desc.precision == 'FP32'
+
+    assert mem_state.name == 'var_id_667'
+    # todo: Uncomment after fix 45611,
+    #  CPU plugin returns outputs and memory state in FP32 in case of FP16 original precision
+    #assert mem_state.state.tensor_desc.precision == data_type
+
+    for i in range(1, 10):
+        if mode == "set_init_memory_state":
+            # create initial value
+            const_init = 5
+            init_array = np.full(input_shape, const_init, dtype=np_data_type[mem_state.state.tensor_desc.precision])
+            tensor_desc = TensorDesc(mem_state.state.tensor_desc.precision, input_shape, layout[len(input_shape) - 1])
+            blob = Blob(tensor_desc, init_array)
+            mem_state.state = blob
+
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+            expected_res = np.full(input_shape, 1 + const_init, dtype=np_data_type[data_type])
+        elif mode == "reset_memory_state":
+            # reset initial state of ReadValue to zero
+            mem_state.reset()
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+
+            # always ones
+            expected_res = np.full(input_shape, 1, dtype=np_data_type[data_type])
+        else:
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+            expected_res = np.full(input_shape, i, dtype=np_data_type[data_type])
+
+        assert np.allclose(res['MemoryAdd'], expected_res, atol=1e-6), \
+            "Expected values: {} \n Actual values: {} \n".format(expected_res, res)
diff --git a/ngraph/python/src/pyngraph/function.cpp b/ngraph/python/src/pyngraph/function.cpp
index 6b60372a85a..ed84d9c458c 100644
--- a/ngraph/python/src/pyngraph/function.cpp
+++ b/ngraph/python/src/pyngraph/function.cpp
@@ -7,6 +7,7 @@
 
 #include "ngraph/function.hpp"     // ngraph::Function
 #include "ngraph/op/parameter.hpp" // ngraph::op::Parameter
+#include "ngraph/op/sink.hpp"
 #include "pyngraph/function.hpp"
 
 namespace py = pybind11;
@@ -17,6 +18,42 @@ void regclass_pyngraph_Function(py::module m)
 {
     py::class_<ngraph::Function, std::shared_ptr<ngraph::Function>> function(m, "Function");
     function.doc() = "ngraph.impl.Function wraps ngraph::Function";
+
+    function.def(py::init([](const ngraph::ResultVector& res,
+                             const std::vector<std::shared_ptr<ngraph::Node>>& nodes,
+                             const ngraph::ParameterVector& params,
+                             const std::string& name) {
+                     ngraph::SinkVector sinks;
+                     for (const auto& node : nodes)
+                     {
+                         auto sink = std::dynamic_pointer_cast<ngraph::op::Sink>(node);
+                         NGRAPH_CHECK(sink != nullptr, "Node {} is not instance of Sink");
+                         sinks.push_back(sink);
+                     }
+                     return std::make_shared<ngraph::Function>(res, sinks, params, name);
+                 }),
+                 py::arg("results"),
+                 py::arg("sinks"),
+                 py::arg("parameters"),
+                 py::arg("name"),
+                 R"(
+                    Create user-defined Function which is a representation of a model.
+
+                    Parameters
+                    ----------
+                    results : List[op.Result]
+                        List of results.
+
+                    sinks : List[Node]
+                        List of Nodes to be used as Sinks (e.g. Assign ops).
+
+                    parameters : List[op.Parameter]
+                        List of parameters.
+
+                    name : str
+                        String to set as function's friendly name.
+                 )");
+
     function.def(py::init<const std::vector<std::shared_ptr<ngraph::Node>>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
                           const std::string&>(),
@@ -37,6 +74,7 @@ void regclass_pyngraph_Function(py::module m)
                     name : str
                         String to set as function's friendly name.
                  )");
+
     function.def(py::init<const std::shared_ptr<ngraph::Node>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
                           const std::string&>(),
diff --git a/ngraph/python/tests/test_ngraph/test_basic.py b/ngraph/python/tests/test_ngraph/test_basic.py
index 210bcb99ae0..da6cf993d3f 100644
--- a/ngraph/python/tests/test_ngraph/test_basic.py
+++ b/ngraph/python/tests/test_ngraph/test_basic.py
@@ -403,3 +403,25 @@ def test_mutiple_outputs():
     output = computation(input_data)
 
     assert np.equal(output, expected_output).all()
+
+
+def test_sink_function_ctor():
+    input_data = ng.parameter([2, 2], name="input_data", dtype=np.float32)
+    rv = ng.read_value(input_data, "var_id_667")
+    add = ng.add(rv, input_data, name="MemoryAdd")
+    node = ng.assign(add, "var_id_667")
+    res = ng.result(add, "res")
+    function = Function(results=[res], sinks=[node], parameters=[input_data], name="TestFunction")
+
+    ordered_ops = function.get_ordered_ops()
+    op_types = [op.get_type_name() for op in ordered_ops]
+    assert op_types == ["Parameter", "ReadValue", "Add", "Assign", "Result"]
+    assert len(function.get_ops()) == 5
+    assert function.get_output_size() == 1
+    assert function.get_output_op(0).get_type_name() == "Result"
+    assert function.get_output_element_type(0) == input_data.get_element_type()
+    assert list(function.get_output_shape(0)) == [2, 2]
+    assert (function.get_parameters()[0].get_partial_shape()) == PartialShape([2, 2])
+    assert len(function.get_parameters()) == 1
+    assert len(function.get_results()) == 1
+    assert function.get_friendly_name() == "TestFunction"

From 8c74a0a52c1cfa4a74fa2539e0ba4d91d80f6145 Mon Sep 17 00:00:00 2001
From: Ivan Novoselov <ivan.novoselov@intel.com>
Date: Wed, 9 Jun 2021 14:27:43 +0300
Subject: [PATCH 41/41] [CPU] Fixed reorder for strided both inputs and outputs
 (#6082)

---
 inference-engine/thirdparty/mkl-dnn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index 1cb9d0615aa..87516e47dae 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit 1cb9d0615aaf511b51b8f8fc3c3ff8805ad9be6c
+Subproject commit 87516e47dae71fc9c326d0f3685c1572c740e127