From 2f07b982517137031f530b6940baf7b4eb745c6b Mon Sep 17 00:00:00 2001
From: Indira Salyahova <indira.salyahova@intel.com>
Date: Wed, 15 Dec 2021 12:12:54 +0300
Subject: [PATCH 01/27] [POT] Support layout in pot (#9060)

* support layout pot

* pylint
---
 .../pot/configs/simplified_mode_template.json |  1 +
 .../tools/pot/data_loaders/creator.py         |  1 +
 .../tools/pot/data_loaders/image_loader.py    | 29 ++++++++++++++++++-
 .../openvino/tools/pot/data_loaders/utils.py  |  6 ++--
 tools/pot/tests/test_image_loading.py         | 26 +++++++++++++++++
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/tools/pot/configs/simplified_mode_template.json b/tools/pot/configs/simplified_mode_template.json
index 52db5686cd4..2df4aa1e898 100644
--- a/tools/pot/configs/simplified_mode_template.json
+++ b/tools/pot/configs/simplified_mode_template.json
@@ -17,6 +17,7 @@ of all possible parameters can be found in the default_quantization_spec.json */
 
     "engine": {
         "type": "simplified",
+        "layout": "NCHW",               // Layout of input data. Supported ["NCHW", "NHWC", "CHW", "CWH"] layout
         "data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
                                         // specify template for file names to filter images to load.
                                         // Templates are unix style (This option valid only in simplified mode)
diff --git a/tools/pot/openvino/tools/pot/data_loaders/creator.py b/tools/pot/openvino/tools/pot/data_loaders/creator.py
index 14e76e92f00..f4cd1e05fa9 100644
--- a/tools/pot/openvino/tools/pot/data_loaders/creator.py
+++ b/tools/pot/openvino/tools/pot/data_loaders/creator.py
@@ -26,6 +26,7 @@ def create_data_loader(config, model):
         if tuple(in_node.shape) != (1, 3):
             data_loader = ImageLoader(config)
             data_loader.shape = in_node.shape
+            data_loader.get_layout(in_node)
             return data_loader
 
     if data_loader is None:
diff --git a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py
index 4ba603555e6..d81a5586d4c 100644
--- a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py
+++ b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py
@@ -3,6 +3,7 @@
 
 from cv2 import imread, IMREAD_GRAYSCALE
 
+from openvino.runtime import Layout, Dimension # pylint: disable=E0611,E0401
 from ..api.data_loader import DataLoader
 from ..data_loaders.utils import prepare_image, collect_img_files
 
@@ -14,6 +15,7 @@ class ImageLoader(DataLoader):
 
         self._img_files = collect_img_files(config.data_source)
         self._shape = None
+        self._layout = config.get('layout', None)
         self._crop_central_fraction = config.get('central_fraction', None)
 
     def __getitem__(self, idx):
@@ -37,4 +39,29 @@ class ImageLoader(DataLoader):
         if image is None:
             raise Exception('Can not read the image: {}'.format(img_path))
 
-        return prepare_image(image, self.shape[-2:], self._crop_central_fraction)
+        return prepare_image(image, self._layout, self.shape[-2:], self._crop_central_fraction)
+
+    def get_layout(self, input_node):
+        if self._layout is not None:
+            if 'C' not in self._layout or 'H' not in self._layout or 'W' not in self._layout:
+                raise ValueError('Unexpected {} layout'.format(self._layout))
+            self._layout = Layout(self._layout)
+            return
+
+        layout_from_ir = input_node.graph.graph.get('layout', None)
+        if layout_from_ir is not None:
+            self._layout = Layout(layout_from_ir)
+            return
+
+        image_colors_dim = (Dimension(3), Dimension(1))
+        num_dims = len(self._shape)
+        if num_dims == 4:
+            if self._shape[1] in image_colors_dim:
+                self._layout = Layout("NCHW")
+            elif self._shape[3] in image_colors_dim:
+                self._layout = Layout("NHWC")
+        elif num_dims == 3:
+            if self._shape[0] in image_colors_dim:
+                self._layout = Layout("CHW")
+            elif self._shape[2] in image_colors_dim:
+                self._layout = Layout("HWC")
diff --git a/tools/pot/openvino/tools/pot/data_loaders/utils.py b/tools/pot/openvino/tools/pot/data_loaders/utils.py
index d60d5b4d1ff..fde14d66ba2 100644
--- a/tools/pot/openvino/tools/pot/data_loaders/utils.py
+++ b/tools/pot/openvino/tools/pot/data_loaders/utils.py
@@ -9,6 +9,7 @@ from pathlib import Path
 import numpy as np
 import cv2 as cv
 
+from openvino.runtime import Layout # pylint: disable=E0611,E0401
 from openvino.tools.pot.utils.logger import get_logger
 
 logger = get_logger(__name__)
@@ -34,12 +35,11 @@ def crop(image, central_fraction):
     return image[start_height:start_height + dst_height, start_width:start_width + dst_width]
 
 
-def prepare_image(image, dst_shape, central_fraction=None):
-
+def prepare_image(image, layout, dst_shape, central_fraction=None):
     if central_fraction:
         image = crop(image, central_fraction)
 
-    if image.shape[-1] in [3, 1]:
+    if layout == Layout('NCHW') or layout == Layout('CHW'):
         image = cv.resize(image, dst_shape[::-1])
         return image.transpose(2, 0, 1)
 
diff --git a/tools/pot/tests/test_image_loading.py b/tools/pot/tests/test_image_loading.py
index 0836e3025ff..ff82d73c3d6 100644
--- a/tools/pot/tests/test_image_loading.py
+++ b/tools/pot/tests/test_image_loading.py
@@ -44,3 +44,29 @@ def test_check_image(tmp_path, models, model_name, model_framework):
     num_images_in_dir = len(os.listdir(path_image_data))
 
     assert num_images_from_data_loader == num_images_in_dir
+
+
+TEST_MODELS_LAYOUT = [('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)),
+                      ('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)),
+                      ('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)),
+                      ('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3))]
+
+
+@pytest.mark.parametrize(
+    'model_name, model_framework, layout, reference_shape', TEST_MODELS,
+    ids=['{}_{}'.format(m[0], m[1]) for m in TEST_MODELS])
+def test_check_layout(tmp_path, models, model_name, model_framework, layout, reference_shape):
+    test_dir = Path(__file__).parent
+    path_image_data = os.path.join(test_dir, "data/image_data")
+
+    engine_config = Dict({"device": "CPU",
+                          "type": "simplified",
+                          "layout": layout,
+                          "data_source": path_image_data})
+    model = models.get(model_name, model_framework, tmp_path)
+    model = load_model(model.model_params)
+
+    data_loader = create_data_loader(engine_config, model)
+    image = data_loader.item()
+
+    assert image.shape == reference_shape

From b492b591363fe5493b02656a86ab186c910b00ab Mon Sep 17 00:00:00 2001
From: Ilya Znamenskiy <ilya.znamenskiy@intel.com>
Date: Wed, 15 Dec 2021 12:17:13 +0300
Subject: [PATCH 02/27] [GPU] Fix for conv/deconv weights calculated in runtime
 (#8952)

---
 .../convolution/convolution_kernel_base.cpp   |   5 +-
 .../clDNN/src/binary_convolution.cpp          |   2 +-
 .../thirdparty/clDNN/src/convolution.cpp      |  13 +-
 .../thirdparty/clDNN/src/deconvolution.cpp    |  24 +-
 .../src/graph_optimizer/reorder_inputs.cpp    |  33 +-
 .../thirdparty/clDNN/src/layout_optimizer.cpp |  20 +-
 .../thirdparty/clDNN/src/network.cpp          |  18 +-
 .../thirdparty/clDNN/src/program.cpp          |  14 +-
 .../thirdparty/clDNN/src/program_node.cpp     |   3 +-
 .../tests/test_cases/convolution_gpu_test.cpp | 816 ++++++++----------
 .../tests/test_cases/fusings_gpu_test.cpp     |   8 +-
 .../clDNN/tests/test_cases/memory_test.cpp    |  50 +-
 .../intel_gpu/src/plugin/ops/convolution.cpp  |  10 +-
 src/plugins/intel_gpu/src/plugin/program.cpp  |   2 +-
 14 files changed, 474 insertions(+), 544 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
index e810a835807..c08ba2d292a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -196,15 +196,16 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
         return {};
     }
 
+    auto preferredWeightsLayout = GetPreferredWeightsLayout(newParams);
     bool succeed = UpdateWeightsParams(newParams,
                                        options,
-                                       GetPreferredWeightsLayout(newParams),
+                                       preferredWeightsLayout,
                                        kd.weightsReorderParams,
                                        GetSupportedKey(),
                                        newParams.groups,
                                        newParams.transposed);
 
-    bool bSupportedWeightsLayout = newParams.weights.GetLayout() == GetPreferredWeightsLayout(newParams);
+    bool bSupportedWeightsLayout = newParams.weights.GetLayout() == preferredWeightsLayout;
     const bool bWeightsOK = bSupportedWeightsLayout || options.allowStaticInputReordering;
 
     if (!succeed || !bWeightsOK) {
diff --git a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
index d10ee575a93..acec60b0e5e 100644
--- a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
@@ -125,7 +125,7 @@ binary_convolution_inst::typed_primitive_inst(network& network, binary_convoluti
                               "Only one-dimensional batch size are supported");
         CLDNN_ERROR_LESS_THAN(node.id(),
                               "Weights feature maps number",
-                              (input_inst.size.feature[0] + pad.feature[0]) / split,
+                              input_inst.size.feature[0],
                               "input feature maps number",
                               filter_inst.size.feature[0],
                               "Weights/ifm mismatch");
diff --git a/inference-engine/thirdparty/clDNN/src/convolution.cpp b/inference-engine/thirdparty/clDNN/src/convolution.cpp
index 0c478a0da6c..fe1de244f9c 100644
--- a/inference-engine/thirdparty/clDNN/src/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp
@@ -97,7 +97,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
         input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb)
         CLDNN_ERROR_MESSAGE(
             node.id(),
-            "Input for convolution should not be in windograd weights format - it is reserved for weights only");
+            "Input for convolution should not be in winograd weights format - it is reserved for weights only");
 
     if (input_layout.format == format::winograd_2x3_s1_data) {
         CLDNN_ERROR_NOT_EQUAL(node.id(),
@@ -369,10 +369,19 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
                               "Only one-dimensional batch size are supported");
         CLDNN_ERROR_LESS_THAN(node.id(),
                               "Weights feature maps number",
-                              (input_inst.size.feature[0] + pad.feature[0]) / split,
+                              input_inst.size.feature[0],
                               "input feature maps number",
                               weights_ifm,
                               "Weights/ifm mismatch");
+
+        if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
+            CLDNN_ERROR_NOT_EQUAL(node.id(),
+                                  "Weights feature maps number",
+                                  input_inst.size.feature[0],
+                                  "input feature maps number",
+                                  weights_ifm,
+                                  "Weights/ifm mismatch");
+        }
     }
 }
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
index c506c77d058..ab11ef233f4 100644
--- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
@@ -82,11 +82,11 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
     int32_t off_factor = -2;
     size_t spatial_dims = cldnn::format::traits(input_layout.format).spatial_num;
     CLDNN_ERROR_GREATER_THAN(node.id(),
-                                   "number of spatial dimensions",
-                                   spatial_dims,
-                                   "expected number of dimensions",
-                                   3,
-                                   "As for now, deconvolutions with more than 3 dimensions are not supported");
+                             "number of spatial dimensions",
+                             spatial_dims,
+                             "expected number of dimensions",
+                             3,
+                             "As for now, deconvolutions with more than 3 dimensions are not supported");
 
     int32_t x = off_factor * pad.spatial[0] + (input_layout.size.spatial[0] - 1) * strd.spatial[0] + filter_size.spatial[0];
     int32_t y = 1;
@@ -208,6 +208,7 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
                                   1,
                                   "Spatial[0] of bias should be 1. Bias isn't 1D vector.");
         }
+
         CLDNN_ERROR_NOT_EQUAL(node.id(),
                               "deconvolution padding filling value",
                               node.get_output_layout().data_padding.filling_value(),
@@ -240,10 +241,19 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
                               "Only one-dimensional features are supported");
         CLDNN_ERROR_LESS_THAN(node.id(),
                               "Weights feature maps number",
-                              (input_inst.size.feature[0] + pad.feature[0]) / split,
+                              input_inst.size.feature[0],
                               "input feature maps number",
                               weights_ifm,
-                              "Weights/ifm mimsmatch");
+                              "Weights/ifm mismatch");
+
+        if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
+            CLDNN_ERROR_NOT_EQUAL(node.id(),
+                                  "Weights feature maps number",
+                                  input_inst.size.feature[0],
+                                  "input feature maps number",
+                                  weights_ifm,
+                                  "Weights/ifm mismatch");
+        }
     }
 }
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
index f0869e06b2a..4e2b1892b60 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@@ -536,7 +536,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
         }
     };
 
-    const auto reorder_input_deconvolution = [&p, &lo, &rf](typed_program_node<deconvolution>& deconv_node) {
+    const auto reorder_input_and_weights_deconvolution = [&p, &lo, &rf](typed_program_node<deconvolution>& deconv_node) {
         auto& input = deconv_node.input();
         auto input_layout = input.get_output_layout();
         auto new_format = lo.get_preferred_format(deconv_node);
@@ -547,14 +547,41 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
                 p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second);
             }
         }
+
+        auto& weights = deconv_node.weights();
+        auto weights_layout = weights.get_output_layout();
+        if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
+            auto dims = weights_layout.format.dimension();
+            auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
+            auto reorder = rf.get_reorder(weights.id(), weights_layout,
+                layout{ weights_layout.data_type, preferred_format, weights_layout.size });
+            if (reorder.first) {
+                p.add_intermediate(reorder.first, deconv_node, 1, !reorder.second);
+            }
+        }
+    };
+
+    const auto reorder_weights_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
+        auto& weights = conv_node.weights();
+        auto weights_layout = weights.get_output_layout();
+        if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
+            auto dims = weights_layout.format.dimension();
+            auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
+            auto reorder = rf.get_reorder(weights.id(), weights_layout,
+                layout{ weights_layout.data_type, preferred_format, weights_layout.size });
+            if (reorder.first) {
+                p.add_intermediate(reorder.first, conv_node, 1, !reorder.second);
+            }
+        }
     };
 
     for (auto& prim : p.get_processing_order()) {
-        program_helpers::do_for_types<detection_output, binary_convolution, deconvolution>(
+        program_helpers::do_for_types<detection_output, binary_convolution, deconvolution, convolution>(
             *prim,
             reorder_input_detection_output,
             reorder_input_binary_convolution,
-            reorder_input_deconvolution);
+            reorder_input_and_weights_deconvolution,
+            reorder_weights_convolution);
     }
 
     for (auto n : p.get_processing_order()) {
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index f2c3ae9c84d..6156ef8e8eb 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -1320,17 +1320,27 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
             impl_candidate = impl_types::ocl;
         }
 
+        size_t eltw_dep = 0;
         for (auto& fo : node.get_fused_primitives()) {
             if (fo.node->is_type<eltwise>()) {
                 auto in_layout = node.get_dependency(fo.dep_start_idx).get_output_layout();
                 auto out_layout = node.get_output_layout();
                 auto in_dt = in_layout.data_type;
                 auto out_dt = out_layout.data_type;
-                if ((out_layout.count() == in_layout.count()) &&
-                    (data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt &&
-                    fo.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(in_layout)) {
-                    impl_candidate = impl_types::ocl;
-                    break;
+                if (fo.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(in_layout)) {
+                    if ((out_layout.count() == in_layout.count()) &&
+                        (data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt) {
+                        impl_candidate = impl_types::ocl;
+                        break;
+                    }
+                    if (in_layout.size == out_layout.size && in_layout.format == out_layout.format && in_layout.data_padding == out_layout.data_padding &&
+                        data_type_traits::size_of(in_dt) == data_type_traits::size_of(out_dt)) {
+                        if (eltw_dep > 0) {
+                            impl_candidate = impl_types::ocl;
+                            break;
+                        }
+                        eltw_dep = fo.dep_start_idx;
+                    }
                 }
             } else if (fo.node->is_type<activation>()) {
                 // Some activations aren't implemented in oneDNN
diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp
index 98b3a3160a3..febc8d61bce 100644
--- a/inference-engine/thirdparty/clDNN/src/network.cpp
+++ b/inference-engine/thirdparty/clDNN/src/network.cpp
@@ -514,15 +514,17 @@ void network::allocate_primitives() {
                         can_reuse_eltwise_mem = true;
                     }
 
-                    if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) {
-                        auto& eltw_inst = _primitives.at(eltw_in.id());
-                        auto& prim_inst = _primitives.at(node->id());
-                        auto eltw_mem_type = eltw_inst->output_memory().get_allocation_type();
-                        auto prim_mem_type = prim_inst->output_memory().get_allocation_type();
+                    if (!can_reuse_eltwise_mem) {
+                        if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) {
+                            auto& eltw_inst = _primitives.at(eltw_in.id());
+                            auto& prim_inst = _primitives.at(node->id());
+                            auto eltw_mem_type = eltw_inst->output_memory().get_allocation_type();
+                            auto prim_mem_type = prim_inst->output_memory().get_allocation_type();
 
-                        // Keep lockable memory type for `prim_inst` output if needed
-                        if (eltw_mem_type != prim_mem_type && eltw_mem_type != allocation_type::cl_mem && eltw_mem_type != allocation_type::usm_host)
-                            can_reuse_eltwise_mem = false;
+                            // Keep lockable memory type for `prim_inst` output if needed
+                            if (eltw_mem_type != prim_mem_type && eltw_mem_type != allocation_type::cl_mem && eltw_mem_type != allocation_type::usm_host)
+                                can_reuse_eltwise_mem = false;
+                        }
                     }
 
                     if (fused_op.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(eltw_in_layout) && !can_reuse_eltwise_mem) {
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index 9d60731d1e2..ebdc3f3920e 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -741,10 +741,10 @@ program_node& program::get_or_create(std::shared_ptr<primitive> prim) {
 }
 
 void program::add_intermediate(program_node& node,
-                                    program_node& next,
-                                    size_t prev_idx,
-                                    bool connect_int_node_with_old_dep,
-                                    bool move_usrs_of_prev_to_node) {
+                               program_node& next,
+                               size_t prev_idx,
+                               bool connect_int_node_with_old_dep,
+                               bool move_usrs_of_prev_to_node) {
     if (connect_int_node_with_old_dep && !node.dependencies.empty())
         throw std::invalid_argument(
             "Node which is about to be added in between two other nodes should not have any existing dependencies");
@@ -1112,8 +1112,8 @@ void program::remove_nodes(std::vector<program_node*>& to_remove) {
 // TODO: break this function into number of smaller ones + add per-primitive fields (possibly use
 // primitive_inst::to_string?)
 void program::dump_program(const char* stage,
-                                bool with_full_info,
-                                std::function<bool(program_node const&)> const& filter) const {
+                           bool with_full_info,
+                           std::function<bool(program_node const&)> const& filter) const {
     std::string path = get_dir_path(options);
     if (path.empty() || !with_full_info) {
         return;
@@ -1230,7 +1230,7 @@ void program::save_pass_info(std::string pass_name) {
 }
 
 void program::add_optimized_primitive_info(primitive_id optimized_primitive_id,
-                                                std::vector<primitive_id> replaced_with_ids) {
+                                           std::vector<primitive_id> replaced_with_ids) {
     for (auto& e : optimized) {
         auto it = std::find_if(e.second.begin(), e.second.end(), [&optimized_primitive_id](const primitive_id& id) {
            return optimized_primitive_id == id;
diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp
index 30aece05a5b..acc2b143bfd 100644
--- a/inference-engine/thirdparty/clDNN/src/program_node.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp
@@ -428,7 +428,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
         // Ignore optimized operations for "previous" operation in our operation pair
         while (type_is_any_optimized(prev_type) && cur_post_op_idx < post_ops_size - 1) {
             prev_post_op_idx++;
-            cur_post_op_idx++;
+            if (prev_post_op_idx == cur_post_op_idx)
+                cur_post_op_idx++;
             prev_type = cur_post_ops[prev_post_op_idx].op_type;
             cur_type = cur_post_ops[cur_post_op_idx].op_type;
         }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
index 607164c7799..6c224fd0e67 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -344,13 +344,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_
             data("biases", biases),
             convolution(
                     "conv",
-                    {"input", "trans"},
+                    { "input", "trans" },
                     { "weights" },
                     { "biases" },
                     1,
                     1,
                     { 1, 1, 1, 1 },
-                    tensor{{ 0, 0, 1, 1 }, 0},
+                    tensor{ { 0, 0, 1, 1 }, 0 },
                     { 1, 1, 1, 1 },
                     { 1, 4, 4, 4 })
     );
@@ -475,13 +475,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1)
             data("biases", biases),
             convolution(
                     "conv",
-                    {"input", "trans"},
+                    { "input", "trans" },
                     { "weights" },
                     { "biases" },
                     1,
                     1,
                     { 1, 1, 1, 1 },
-                    tensor{{ 0, 0, 2, 2 }, 0},
+                    tensor{ { 0, 0, 2, 2 }, 0 },
                     { 1, 1, 2, 2 },
                     { 1, 4, 4, 4 })
     );
@@ -638,13 +638,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) {
             data("biases", biases),
             convolution(
                     "conv",
-                    {"input", "trans"},
+                    { "input", "trans" },
                     { "weights" },
                     { "biases" },
                     1,
                     2,
                     { 1, 1, 1, 1 },
-                    tensor{{ 0, 0, 2, 2 }, 0},
+                    tensor{ { 0, 0, 2, 2 }, 0 },
                     { 1, 1, 2, 2 },
                     { 1, 4, 4, 4 })
     );
@@ -698,8 +698,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32,format::yxfb,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } });
 
     set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
     set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
@@ -710,7 +710,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
     topology topology(
         input_layout("input", input->get_layout()),
         data("weights", weights),
-        convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
+        convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -769,8 +769,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8,format::bfyx,{ 1, 1, 3, 2 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 3, 2 } });
 
     set_values(input, { 1.1f, 2.4f, 3.5f, 4.5f, 5.8f,
                         2.9f, 2.3f, 3.5f, 4.4f, 6.6f,
@@ -784,10 +784,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
 
     topology topology(
         input_layout("input", input->get_layout()),
-        reorder("to_int","input", { data_types::i8,format::bfyx,{ 1, 1, 5, 4 } }),
+        reorder("to_int","input", { data_types::i8, format::bfyx, { 1, 1, 5, 4 } }),
         data("weights", weights),
-        convolution("conv", "to_int", { "weights" }, { 1,1,1,2 }),
-        reorder("output", "conv", { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }));
+        convolution("conv", "to_int", { "weights" }, { 1, 1, 1, 2 }),
+        reorder("output", "conv", { data_types::f32, format::bfyx, { 1, 1, 3, 2 } }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -839,7 +839,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) {
     topology topology(
         input_layout("input", input->get_layout()),
         data("weights", weights),
-        convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
+        convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -921,9 +921,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 4, 4, 4 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 2, 2, 2 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1, 1 } });
 
     set_values(input, {
         1.0f,  0.0f,  1.0f,  0.0f,
@@ -1009,10 +1009,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) {
     }
 }
 
-TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
+TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
     //  data is similar as in basic_convolution3D
     auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 2, 4, 4, 4 } });
     auto weights_1 = engine.allocate_memory({ data_types::f32, format::goizyx, tensor(cldnn::group(2), cldnn::batch(1), cldnn::feature(1), cldnn::spatial(2, 2, 2))});
     auto biases_1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(2)) });
 
@@ -1078,7 +1078,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
             },
             {
                 { 3.0f,   4.0f,   6.0f },
-                { 6.0f,   5.0f,   10.0f},
+                { 6.0f,   5.0f,   10.0f },
                 { 9.0f,   4.0f,   1.0f }
             },
         },
@@ -1105,7 +1105,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
         input_layout("input", input->get_layout()),
         data("weights_1", weights_1),
         data("biases_1", biases_1),
-        convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32, true));
+        convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{ 1, 2, 3, 3, 3 }, data_types::f32, true));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -1141,138 +1141,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
     }
 }
 
-TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
-    //  data is similar as in basic_convolution3D_split2
-    auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } });
-
-    set_values(input, {
-        1.0f,  0.0f,  1.0f,  0.0f,
-        1.0f,  1.0f,  3.0f,  1.0f,
-        1.0f,  1.0f,  0.0f,  2.0f,
-        0.0f,  2.0f,  1.0f,  1.0f,
-        1.0f,  0.0f,  0.0f,  1.0f,
-        2.0f,  0.0f,  1.0f,  2.0f,
-        3.0f,  1.0f,  1.0f,  1.0f,
-        0.0f,  0.0f,  3.0f,  1.0f,
-        2.0f,  0.0f,  1.0f,  1.0f,
-        3.0f,  3.0f,  1.0f,  0.0f,
-        2.0f,  1.0f,  1.0f,  0.0f,
-        3.0f,  2.0f,  1.0f,  2.0f,
-        1.0f,  0.0f,  2.0f,  0.0f,
-        1.0f,  0.0f,  3.0f,  3.0f,
-        3.0f,  1.0f,  0.0f,  0.0f,
-        1.0f,  1.0f,  0.0f,  2.0f,
-        1.0f,  0.0f,  1.0f,  0.0f,
-        1.0f,  1.0f,  3.0f,  1.0f,
-        1.0f,  1.0f,  0.0f,  2.0f,
-        0.0f,  2.0f,  1.0f,  1.0f,
-        1.0f,  0.0f,  0.0f,  1.0f,
-        2.0f,  0.0f,  1.0f,  2.0f,
-        3.0f,  1.0f,  1.0f,  1.0f,
-        0.0f,  0.0f,  3.0f,  1.0f,
-        2.0f,  0.0f,  1.0f,  1.0f,
-        3.0f,  3.0f,  1.0f,  0.0f,
-        2.0f,  1.0f,  1.0f,  0.0f,
-        3.0f,  2.0f,  1.0f,  2.0f,
-        1.0f,  0.0f,  2.0f,  0.0f,
-        1.0f,  0.0f,  3.0f,  3.0f,
-        3.0f,  1.0f,  0.0f,  0.0f,
-        1.0f,  1.0f,  0.0f,  2.0f,
-    });
-
-    set_values(weights, {
-        0.0f,  1.0f,
-        0.0f,  0.0f,
-        2.0f,  1.0f,
-        0.0f,  0.0f,
-        0.0f,  1.0f,
-        0.0f,  0.0f,
-        2.0f,  1.0f,
-        0.0f,  0.0f,
-    });
-
-    set_values(biases, { 1.0f, 2.0f });
-
-    VVVVF<float> output_vec = {
-        {
-            {
-                { 3.0f,   2.0f,   2.0f },
-                { 6.0f,   5.0f,   6.0f },
-                { 9.0f,   4.0f,   6.0f }
-            },
-            {
-                { 5.0f,   2.0f,   5.0f },
-                { 10.0f,   9.0f,   5.0f },
-                { 7.0f,   5.0f,   4.0f }
-            },
-            {
-                { 3.0f,   4.0f,   6.0f },
-                { 6.0f,   5.0f,   10.0f },
-                { 9.0f,   4.0f,   1.0f }
-            },
-        },
-        {
-            {
-                { 4.0f,   3.0f,   3.0f },
-                { 7.0f,   6.0f,   7.0f },
-                { 10.0f,  5.0f,   7.0f }
-            },
-            {
-                { 6.0f,   3.0f,   6.0f },
-                { 11.0f,  10.0f,  6.0f },
-                { 8.0f,   6.0f,   5.0f }
-            },
-            {
-                { 4.0f,   5.0f,   7.0f },
-                { 7.0f,   6.0f,  11.0f },
-                { 10.0f,  5.0f,   2.0f }
-            },
-        }
-    };
-
-    topology topology(
-        input_layout("input", input->get_layout()),
-        data("weights", weights),
-        data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }));
-
-    network network(engine, topology);
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "conv");
-
-    auto output_memory = outputs.at("conv").get_memory();
-    auto output_layout = output_memory->get_layout();
-    cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
-
-    int z_size = output_layout.size.spatial[2];
-    int y_size = output_layout.size.spatial[1];
-    int x_size = output_layout.size.spatial[0];
-    int f_size = output_layout.size.feature[0];
-    int b_size = output_layout.size.batch[0];
-    EXPECT_EQ(output_layout.format, format::bfzyx);
-    EXPECT_EQ(b_size, 1);
-    EXPECT_EQ(f_size, 2);
-    EXPECT_EQ(z_size, 3);
-    EXPECT_EQ(y_size, 3);
-    EXPECT_EQ(x_size, 3);
-    for (int f = 0; f < f_size; ++f) {
-        for (int z = 0; z < z_size; ++z) {
-            for (int y = 0; y < y_size; ++y) {
-                for (int x = 0; x < x_size; ++x) {
-                    EXPECT_EQ(output_vec[f][z][y][x],
-                        output_ptr[f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x]);
-                }
-            }
-        }
-    }
-}
-
 TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
     auto& engine = get_test_engine();
 
@@ -1284,8 +1152,8 @@ TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("weights2", weights2),
-        convolution::create_with_output_size("conv1", "input", { "weights" }, {1, 64, 160, 160}, {1, 1, 2, 2}, {0, 0, -3, -3}),
-        convolution::create_with_output_size("conv2", "input", { "weights2" }, {1, 64, 320, 320}, {1, 1, 1, 1}, {0, 0, -3, -3})
+        convolution::create_with_output_size("conv1", "input", { "weights" }, { 1, 64, 160, 160 }, { 1, 1, 2, 2 }, { 0, 0, -3, -3 }),
+        convolution::create_with_output_size("conv2", "input", { "weights2" }, { 1, 64, 320, 320 }, { 1, 1, 1, 1 }, { 0, 0, -3, -3 })
         );
 
     network network(engine, topology);
@@ -1315,8 +1183,8 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,2,2,2} });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2,2,1,1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
 
     set_values(input, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
@@ -1398,7 +1266,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution( "conv", "input", { "weights" }, { "biases" }, { 0,0,1,2 }));
+        convolution( "conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -1462,7 +1330,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
             ,
             { "biases" }
             ,
-            { 0,0,1,2 }
+            { 0, 0, 1, 2 }
     ));
     cldnn::build_options options;
     options.set_option(cldnn::build_option::optimize_data(true));
@@ -1552,11 +1420,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,1,1 },
-            tensor{{ 0,0,1,2 }, 0},
+            { 1, 1, 1, 1 },
+            tensor{ { 0, 0, 1, 2 }, 0 },
             { 1, 1, 1, 1 },
             "",
-            padding{ { 0,0,0,0 }, 0 })
+            padding{ { 0, 0, 0, 0 }, 0 })
     );
 
     network network(engine, topology);
@@ -1629,9 +1497,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
@@ -1653,13 +1521,13 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,1,1 },
-            { 0,0,0,0 },
             { 1, 1, 1, 1 },
-            { 0,0,1,2 },
-            { 0,0,1,2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 },
+            { 0, 0, 1, 2 },
+            { 0, 0, 1, 2 },
             "",
-            padding{ { 0,0,0,0 }, 0 })
+            padding{ { 0, 0, 0, 0 }, 0 })
     );
 
     network network(engine, topology);
@@ -1726,9 +1594,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
@@ -1749,15 +1617,15 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
         convolution(
             "conv",
             "input",
-            {"weights"},
-            {"biases"},
-            {1, 1, 1, 1},
-            {0, 0, 0, 0},
-            {1, 1, 1, 1},
-            tensor{{0, 0, 1, 2}, 0},
-            tensor{{0, 0, 2, 3}, 0},
+            { "weights" },
+            { "biases" },
+            { 1, 1, 1, 1 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 },
+            tensor{ { 0, 0, 1, 2 }, 0 },
+            tensor{ { 0, 0, 2, 3 }, 0 },
             "",
-            padding{{0, 0, 0, 0}, 0}));
+            padding{ { 0, 0, 0, 0 }, 0 }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -1830,9 +1698,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
@@ -1858,13 +1726,13 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,1,1 },
-            { 0,0,1,2 },
             { 1, 1, 1, 1 },
-            { 0,0,1,2 },
-            { 0,0,1,2 },
+            { 0, 0, 1, 2 },
+            { 1, 1, 1, 1 },
+            { 0, 0, 1, 2 },
+            { 0, 0, 1, 2 },
             "",
-            padding{ { 0,0,0,0 }, 0 })
+            padding{ { 0, 0, 0, 0 }, 0 })
     );
 
     network network(engine, topology);
@@ -1940,9 +1808,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
     set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
@@ -1967,15 +1835,15 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) {
         convolution(
             "conv",
             "input",
-            {"weights"},
-            {"biases"},
-            {1, 1, 1, 1},
-            tensor{{0, 0, 1, 2}, 0},
-            {1, 1, 1, 1},
-            tensor{{0, 0, 1, 2}, 0},
-            tensor{{0, 0, 2, 3}, 0},
+            { "weights" },
+            { "biases" },
+            { 1, 1, 1, 1 },
+            tensor{ { 0, 0, 1, 2 }, 0 },
+            { 1, 1, 1, 1 },
+            tensor{ { 0, 0, 1, 2 }, 0 },
+            tensor{ { 0, 0, 2, 3 }, 0 },
             "",
-            padding{{0, 0, 0, 0}, 0}));
+            padding{ { 0, 0, 0, 0 }, 0 }));
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -2069,11 +1937,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,1,1 },
-            tensor{{ 0,0,1,2 }, 0},
+            { 1, 1, 1, 1 },
+            tensor{ { 0, 0, 1, 2 }, 0 },
             { 1, 1, 1, 1 },
             "",
-            padding{ { 0,0,-x_pad,-y_pad }, 0 })
+            padding{ { 0, 0, -x_pad, -y_pad }, 0 })
     );
 
     network network(engine, topology);
@@ -2156,7 +2024,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32,  format::yxfb, { 1, 1, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32,  format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32,  format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2168,7 +2036,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", {"weights"}, {"biases"}, {1,1,2,2})
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 })
     );
 
     network network(engine, topology);
@@ -2226,7 +2094,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 1, 1 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2238,7 +2106,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 })
     );
 
     network network(engine, topology);
@@ -2284,7 +2152,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2296,7 +2164,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 })
     );
 
     network network(engine, topology);
@@ -2338,7 +2206,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 1, 1 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2350,7 +2218,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 } )
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 } )
     );
 
     network network(engine, topology);
@@ -2390,7 +2258,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 1, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 2 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
@@ -2402,7 +2270,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 5, 5 })
     );
 
     network network(engine, topology);
@@ -2449,7 +2317,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 3 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
 
@@ -2461,7 +2329,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 5, 5 })
     );
 
     network network(engine, topology);
@@ -2505,7 +2373,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 3 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
 
@@ -2517,7 +2385,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 })
     );
 
     network network(engine, topology);
@@ -2561,7 +2429,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2573,7 +2441,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 })
+        convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 })
     );
 
     network network(engine, topology);
@@ -2618,7 +2486,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -2635,11 +2503,11 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,2,2 },
-            tensor{{ 0,0,1,1 }, 0},
+            { 1, 1, 2, 2 },
+            tensor{ { 0, 0, 1, 1 }, 0 },
             { 1, 1, 1, 1 },
             "",
-            padding{ { 0,0,1,1 }, 0 })
+            padding{ { 0, 0, 1, 1 }, 0 })
     );
 
     network network(engine, topology);
@@ -2694,8 +2562,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } });
-    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2))});
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 2 } });
+    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2))});
     auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
@@ -2717,9 +2585,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
             { "weights1" },
             { "biases1" },
             2,
-            { 0,0,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 0, 0, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -2792,8 +2660,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } });
-    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2)) });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 2, 2 }, 2 } });
+    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) });
     auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
@@ -2815,9 +2683,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
             { "weights1" },
             { "biases1" },
             2,
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -2853,9 +2721,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
     //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
@@ -2879,9 +2747,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
             { "weights" },
             { "biases" },
             2, // number of groups
-            { 0,0,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 0, 0, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -2910,9 +2778,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx)
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
@@ -2928,7 +2796,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx)
 
     topology topology(
         input_layout("input", input->get_layout()),
-        reorder("input_1", "input", { data_types::f32,format::bfyx,{ 1, 2, 4, 4 } }),
+        reorder("input_1", "input", { data_types::f32, format::bfyx, { 1, 2, 4, 4 } }),
         data("weights", weights),
         data("biases", biases),
         convolution(
@@ -2937,9 +2805,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx)
             { "weights" },
             { "biases" },
             2, // number of groups
-            { 0,0,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 0, 0, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -2968,9 +2836,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 4, 4 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values(input, {
         -0.5f, -0.5f,  0.5f,  0.5f,  1.0f,  1.0f,  1.5f,  1.5f,  0.5f,  0.5f,  2.3f,  2.3f,  2.0f,  2.0f, -0.4f, -0.4f,
@@ -2994,9 +2862,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
             { "weights" },
             { "biases" },
             2, // number of groups
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3034,7 +2902,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 16, 4, 4 } });
 
     set_values(input, {
         -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f,
@@ -3055,8 +2923,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
         -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f,
     });
 
-    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) });
-    auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
+    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) });
+    auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } });
 
     set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f,  -1.2f, 1.5f, 0.5f, -0.5f,
                            -2.0f, 0.5f, 3.5f, 1.5f,  -1.2f, 1.5f, 0.5f, -0.5f,
@@ -3089,9 +2957,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
                 { weights_id },
                 { bias_id },
                 16,  // number of groups
-                { 1,1,2,2 },
-                { 0,0,0,0 },
-                { 1,1,1,1 })
+                { 1, 1, 2, 2 },
+                { 0, 0, 0, 0 },
+                { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3123,7 +2991,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
     //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 16, 4, 4 } });
 
     set_values(input, {
         -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
@@ -3144,8 +3012,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
         0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
     });
 
-    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) });
-    auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
+    auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) });
+    auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } });
 
     set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f,  -1.2f, 1.5f, 0.5f, -0.5f,
                            -2.0f, 0.5f, 3.5f, 1.5f,  -1.2f, 1.5f, 0.5f, -0.5f,
@@ -3178,9 +3046,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
             { weights_id },
             { bias_id },
             16,  // number of groups
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3217,7 +3085,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
 
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 16, 4, 4 } });
 
     set_values(input, {
         -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f,
@@ -3240,8 +3108,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
 
     topology topology(input_layout("input", input->get_layout()));
 
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } });
 
     set_values(weights,
         {
@@ -3263,7 +3131,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
             -1.2f, 1.5f, 0.5f, -0.5f
         }
     );
-    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
+    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f });
 
     topology.add(
         data("weights", weights),
@@ -3277,9 +3145,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
             { "weights" },
             { "bias" },
             16,
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3311,7 +3179,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
     //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 16, 4, 4 } });
 
     set_values(input, {
         -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
@@ -3334,8 +3202,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
 
     topology topology(input_layout("input", input->get_layout()));
 
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } });
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } });
 
     set_values(weights,
         {
@@ -3358,7 +3226,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
         }
     );
 
-    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
+    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f });
 
     topology.add(
             data("weights", weights),
@@ -3372,9 +3240,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
             { "weights" },
             { "bias" },
             16,
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3442,7 +3310,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 4 } });
     auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
     auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
     auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
@@ -3467,9 +3335,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx)
             "input",
             { "weights1", "weights2" },
             { "biases1", "biases2" },
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3523,7 +3391,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 4 } });
     auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
     auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
     auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
@@ -3548,9 +3416,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
             "input",
             { "weights1", "weights2" },
             { "biases1", "biases2" },
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3610,7 +3478,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 6 } });
     auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
     auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
     auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } });
@@ -3635,9 +3503,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
             "input",
             { "weights1", "weights2" },
             { "biases1", "biases2" },
-            { 1,1,2,2 },
-            { 0,0,0,0 },
-            { 1,1,1,1 })
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
+            { 1, 1, 1, 1 })
     );
 
     network network(engine, topology);
@@ -3687,7 +3555,7 @@ TEST(convolution_gpu, trivial_convolution_relu) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -3709,8 +3577,8 @@ TEST(convolution_gpu, trivial_convolution_relu) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,2,2 },
-            { 0,0,0,0 },
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
             { 1, 1, 1, 1 }),
         activation(
             "out",
@@ -3764,7 +3632,7 @@ TEST(convolution_gpu, relu_with_negative_slope) {
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
-    //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
+    //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } });
     auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
     auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 
@@ -3786,14 +3654,14 @@ TEST(convolution_gpu, relu_with_negative_slope) {
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,2,2 },
-            { 0,0,0,0 },
+            { 1, 1, 2, 2 },
+            { 0, 0, 0, 0 },
             { 1, 1, 1, 1 }),
         activation(
             "out",
             "conv",
             activation_func::relu_negative_slope,
-            {0.1f, 0.0f}
+            { 0.1f, 0.0f }
         )
     );
 
@@ -3820,9 +3688,9 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
 
     extern const std::vector<float> conv_1x1_output;
 
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 16, 8, 16, 16 } });
-    auto weights_conv_1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 8, 8, 1, 1 } });
-    auto weights_conv_2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 8, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 16, 8, 16, 16 } });
+    auto weights_conv_1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 8, 8, 1, 1 } });
+    auto weights_conv_2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } });
 
     set_random_values<float>(input);
     set_random_values<float>(weights_conv_1);
@@ -3909,9 +3777,9 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
     auto input = engine.allocate_memory({ data_types::f32, input_format, input_size });
     auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
     auto weights = engine.allocate_memory({ data_types::f32, weights_format, weights_size });
-    auto biases = engine.allocate_memory({ data_types::f32, biases_format, {1,output_feature_count,1,1}});
+    auto biases = engine.allocate_memory({ data_types::f32, biases_format, { 1, output_feature_count, 1, 1 } });
 
-    //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
+    //auto output = memory::allocate({ output_format, { batch_size, { output_x, output_y }, output_feature_count } });
 
     // input:
     std::vector<float> input_vals_template {
@@ -4014,8 +3882,8 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
             "input",
             { "weights" },
             { "biases" },
-            { 1,1,stride_x,stride_y },
-            { 0,0,0,0 },
+            { 1, 1, stride_x, stride_y },
+            { 0, 0, 0, 0 },
             { 1, 1, 1, 1 }),
             activation(
                 "out",
@@ -4060,7 +3928,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
 }
 
 void add_primitives(engine& engine, topology& topology) {
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 2 } });
 
     std::vector<char> weights_values = { 1, 2, 1,
                                          2, 1, 2,
@@ -4068,7 +3936,7 @@ void add_primitives(engine& engine, topology& topology) {
                                          19, 17, -1,
                                          -10, 32, 23 };
     set_values<char>(weights, weights_values);
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
     set_values(biases, { 1.0f, -8.0f });
 
     topology.add(
@@ -4108,7 +3976,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
     //  Bias:
     //  1 -8
     auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({ data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
+    auto input = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 5, 4 } });
 
     VVVF<char> output_vec = {
         {
@@ -4153,7 +4021,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 3.0f);
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 3.0f);
             }
         }
 }
@@ -4161,9 +4029,9 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4192,7 +4060,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) {
         input_layout("input", input->get_layout()),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "input", { "weights" }, { "biases" }, tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
+        convolution("conv", "input", { "weights" }, { "biases" }, tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4219,7 +4087,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) {
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<<x << " y=" << y << " f=" << f;
             }
         }
@@ -4228,11 +4096,11 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) {
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_and_activations) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
-    auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 1, 1 } });
-    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+    auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 1, 1 } });
+    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4266,7 +4134,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
         data("a_zp", a_zp),
         data("w_zp", w_zp),
         convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4293,7 +4161,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<< x << " y=" << y << " f=" << f;
             }
         }
@@ -4302,10 +4170,10 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_tensor) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
-    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4337,7 +4205,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
         data("biases", biases),
         data("a_zp", a_zp),
         convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4364,7 +4232,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<< x << " y=" << y << " f=" << f;
             }
         }
@@ -4373,10 +4241,10 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_channel) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 2, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 2, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
-    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 2, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 2, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4422,7 +4290,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
         data("biases", biases),
         data("a_zp", a_zp),
         convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4449,7 +4317,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<< x << " y=" << y << " f=" << f;
             }
         }
@@ -4458,10 +4326,10 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_channel_3ic_with_sub) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 3, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 3, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
-    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 3, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 3, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 3, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+    auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 3, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4521,9 +4389,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
         data("biases", biases),
         data("a_zp", a_zp),
         activation("activation", "input", activation_func::relu),  // needed just to add padding
-        eltwise("in", {"activation", "a_zp"}, eltwise_mode::sub, data_types::f32),
+        eltwise("in", { "activation", "a_zp" }, eltwise_mode::sub, data_types::f32),
         convolution("conv", "in", { "weights" }, { "biases" }, 1,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, data_types::f32, false),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }, data_types::f32, false),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4550,7 +4418,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<< x << " y=" << y << " f=" << f;
             }
         }
@@ -4559,10 +4427,10 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
 TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_per_channel) {
     auto& engine = get_test_engine();
 
-    auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } });
-    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } });
-    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
-    auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 1, 1 } });
+    auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 5, 4 } });
+    auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 3 } });
+    auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+    auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 1, 1 } });
 
     set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
                                  2, 2, 3, 4, 6,
@@ -4594,7 +4462,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p
         data("biases", biases),
         data("w_zp", w_zp),
         convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }, false),
         reorder("out", "conv", format::bfyx, data_types::f32));
 
     build_options opts;
@@ -4621,7 +4489,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p
     for (int f = 0; f < f_size; f++)
         for (int y = 0; y < y_size; ++y) {
             for (int x = 0; x < x_size; ++x) {
-                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) <<
+                EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
                 " x="<< x << " y=" << y << " f=" << f;
             }
         }
@@ -4666,15 +4534,15 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
     auto input = engine.allocate_memory({ data_types::f32, input_format, input_size });
     auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y );
     auto weights = engine.allocate_memory({ data_types::f32, weights_format, weights_size });
-    auto biases_size = tensor( 1,output_feature_count,1,1 );
+    auto biases_size = tensor( 1, output_feature_count, 1, 1 );
     auto biases = engine.allocate_memory({ data_types::f32, biases_format, biases_size });
     auto output_size = tensor( batch_size, output_feature_count, output_x, output_y );
-    //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}});
+    //auto output = memory::allocate({ output_format, { batch_size, { output_x, output_y }, output_feature_count } });
 
     //auto input_cvtd = engine.allocate_memory({ data_types::f16, input_size });
     //auto weights_cvtd = engine.allocate_memory({ data_types::f16, weights_size });
     //auto biases_cvtd = engine.allocate_memory({ data_types::f16, biases_size });
-    //auto output_cvtd  = memory::allocate({output_cvt_format, {batch_size, {output_x, output_y}, output_feature_count}});
+    //auto output_cvtd  = memory::allocate({ output_cvt_format, { batch_size, { output_x, output_y }, output_feature_count } });
 
     // input:
     std::vector<float> input_vals_template {
@@ -4767,32 +4635,32 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
         }
     }
 
-    //auto expected_float = engine.allocate_memory({ data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
-    //auto expected_half  = engine.allocate_memory({ data_types::f16,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
-    //auto expected       = engine.allocate_memory({ data_types::f32,{ format::x,{ static_cast<int32_t>(output_vals.size()) } } });
+    //auto expected_float = engine.allocate_memory({ data_types::f32, { format::x, { static_cast<int32_t>(output_vals.size()) } } });
+    //auto expected_half  = engine.allocate_memory({ data_types::f16, { format::x, { static_cast<int32_t>(output_vals.size()) } } });
+    //auto expected       = engine.allocate_memory({ data_types::f32, { format::x, { static_cast<int32_t>(output_vals.size()) } } });
 
 //    set_values(expected_float, output_vals);
-//    auto cvt_expected_f32_f16 = reorder::create({expected_float, expected_half});
-//    auto cvt_expected_f16_f32 = reorder::create({expected_half, expected});
-//    execute({cvt_expected_f32_f16, cvt_expected_f16_f32}).wait();
+//    auto cvt_expected_f32_f16 = reorder::create({ expected_float, expected_half });
+//    auto cvt_expected_f16_f32 = reorder::create({ expected_half, expected });
+//    execute({ cvt_expected_f32_f16, cvt_expected_f16_f32 }).wait();
 //
 //    auto expected_ptr = expected.as<const memory&>().pointer<float>();
 
     // Computing convolution.
     topology topology(
         input_layout("input", input->get_layout()),
-        reorder("cvt_input", "input", {data_types::f16, input_format, input_size}),
+        reorder("cvt_input", "input", { data_types::f16, input_format, input_size }),
         data("weights", weights),
-        reorder("cvt_weights", "weights", {data_types::f16, weights_format, weights_size}),
+        reorder("cvt_weights", "weights", { data_types::f16, weights_format, weights_size }),
         data("biases", biases),
-        reorder("cvt_biases", "biases", {data_types::f16, biases_format, biases_size}),
+        reorder("cvt_biases", "biases", { data_types::f16, biases_format, biases_size }),
         convolution(
             "conv",
             "cvt_input",
             { "cvt_weights" },
             { "cvt_biases" },
-            { 1,1,stride_x,stride_y }),
-        reorder("output", "conv", {data_types::f32, output_format, output_size})
+            { 1, 1, stride_x, stride_y }),
+        reorder("output", "conv", { data_types::f32, output_format, output_size })
     );
 
     network network(engine, topology);
@@ -5092,7 +4960,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32)
         topology.add(data("biases_fsv", biases_mem));
 
         auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, { "biases_fsv" },
-                                    { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                    { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_fsv);
@@ -5115,7 +4983,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32)
         }
 
         auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" },
-            { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+            { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_fsv);
@@ -5124,7 +4992,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32)
 
     build_options options;
     implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     options.set_option(build_option::optimize_data(true));
     network network(engine, topology, options);
 
@@ -5194,7 +5062,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) {
         data("weights_fsv", weights_mem));
 
     // add input padding by X and Y
-    layout w_pad(data_types::f16, format::bfyx, input_size, padding({ 0,0,1,1 }, { 0, 0, 0, 0 }));
+    layout w_pad(data_types::f16, format::bfyx, input_size, padding({ 0, 0, 1, 1 }, { 0, 0, 0, 0 }));
     topology.add(reorder("input_fsv", "input", w_pad));
 
     // Generate bias data
@@ -5225,7 +5093,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) {
 
     build_options options;
     implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" };
-    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     options.set_option(build_option::optimize_data(true));
     network network(engine, topology, options);
 
@@ -5383,7 +5251,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop)
         topology.add(data("biases_fsv", biases_mem));
 
         auto conv_fsv = convolution("conv_fsv", "right_crop", { "weights_fsv" }, { "biases_fsv" },
-            { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+            { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
         topology.add(conv_fsv);
     }
@@ -5405,7 +5273,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop)
         }
 
         auto conv_fsv = convolution("conv_fsv", "right_crop", { "weights_fsv" },
-            { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+            { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
         topology.add(conv_fsv);
     }
@@ -5432,7 +5300,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop)
 
     build_options options;
     implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" };
-    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     options.set_option(build_option::optimize_data(true));
     network network(engine, topology, options);
 
@@ -5497,12 +5365,12 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) {
 
     topology topology_ref(
         input_layout("input", input->get_layout()),
-        reorder("to_int", "input", {data_types::i8, format::bfyx, {batch_num, input_f, input_size_x, input_size_y}}),
+        reorder("to_int", "input", { data_types::i8, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "to_int", {"weights"}, {"biases"}, {1, 1, 1, 1}, tensor{{0, 0, 2, 2}, 0}, {1, 1, 1, 1}, "",
-                    padding{{0, 0, output_padding, output_padding}, 0}),
-        reorder("output", "conv", {data_types::f32, format::bfyx, {batch_num, input_f, input_size_x, input_size_y}}));
+        convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 1, 1 }, "",
+                    padding{ { 0, 0, output_padding, output_padding }, 0 }),
+        reorder("output", "conv", { data_types::f32, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }));
 
     build_options build_opt;
 
@@ -5519,12 +5387,12 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) {
 
     topology topology_act(
         input_layout("input", input->get_layout()),
-        reorder("to_int", "input", { data_types::i8,format::b_fs_yx_fsv4,{ batch_num, input_f, input_size_x, input_size_y } }),
+        reorder("to_int", "input", { data_types::i8,format::b_fs_yx_fsv4, { batch_num, input_f, input_size_x, input_size_y } }),
         data("weights", weights),
         data("biases", biases),
-        convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{{ 0, 0, 2, 2 }, 0}, { 1, 1, 1, 1 }, "",
+        convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 1, 1 }, "",
             padding{ { 0, 0, output_padding, output_padding }, 0 }),
-        reorder("output", "conv", { data_types::f32,format::bfyx,{ batch_num, input_f, input_size_x, input_size_y } }));
+        reorder("output", "conv", { data_types::f32,format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }));
 
     build_options build_opt_act;
 
@@ -5634,7 +5502,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16)
         topology.add(data("biases_fsv", biases_mem));
 
         auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" }, { "biases_fsv" },
-                                    { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                    { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_fsv);
@@ -5659,7 +5527,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16)
 
 
         auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" },
-            { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+            { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_fsv);
@@ -5737,7 +5605,7 @@ void blockedFormatZeroCheck(cldnn::memory::ptr out_mem) {
         // skip on new batch
         if (i % (number_of_zeroes / batch_skip) == 0)
             zero_ind += to_skip;
-        if (zero_ind >= (size_t)b*f*spatials)
+        if (zero_ind >= (size_t) b * f * spatials)
             return;
 
         zero_ind += f_mod;
@@ -5862,7 +5730,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                    { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                    { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -5885,7 +5753,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                    { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                    { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -5895,7 +5763,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -5999,7 +5867,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                        { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                        { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6022,7 +5890,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                        { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                        { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6032,7 +5900,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6128,7 +5996,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6151,15 +6019,15 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
     }
 
     const float scalar = 5.5f;
-    auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, {1, 1, 1, 1, 1} });
-    set_values(scale_mem, {scalar});
+    auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 1, 1, 1 } });
+    set_values(scale_mem, { scalar });
 
     topology.add(data("scalar", scale_mem));
     topology.add(scale("scale", "conv_bsv16_fsv16", "scalar"));
@@ -6168,7 +6036,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6296,7 +6164,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6319,7 +6187,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6329,9 +6197,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" }));
     implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
-    options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}}));
+    options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6436,7 +6304,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6459,7 +6327,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6469,9 +6337,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" }));
     implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
-    options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}}));
+    options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6567,7 +6435,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops)
         topology.add(data("biases", biases_mem));
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" },
-                                       { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0});
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
@@ -6590,15 +6458,15 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops)
         }
 
         auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" },
-                                       { 1, 1, stride, stride }, tensor{ {0, 0, pad, pad}, 0 });
+                                       { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 });
         conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_bsv16_fsv16);
     }
 
     const float scalar = 5.5f;
-    auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 1, 1, 1} });
-    set_values(scale_mem, {scalar});
+    auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+    set_values(scale_mem, { scalar });
 
     topology.add(data("scalar", scale_mem));
     topology.add(scale("scale", "conv_bsv16_fsv16", "scalar"));
@@ -6607,9 +6475,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops)
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"}));
+    options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" }));
     implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" };
-    options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}}));
+    options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6738,7 +6606,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32)
     }
 
     auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, groups,
-                                { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0});
+                                { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 });
     conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
     topology.add(conv_fsv);
@@ -6746,7 +6614,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32)
     build_options options;
     options.set_option(build_option::optimize_data(true));
     implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6881,7 +6749,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16)
     }
 
     auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, groups,
-                                { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0});
+                                { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 });
     conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
     topology.add(conv_fsv);
@@ -6889,7 +6757,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16)
     build_options options;
     options.set_option(build_option::optimize_data(true));
     implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
-    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -6937,15 +6805,15 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa
     auto stride = tensor{ 1, 1, 1, 1 };
     auto pad = tensor{ 0 };
     auto dilation = tensor{ 1, 1, 1, 1 };
-    auto output_size = tensor{ 1, num_groups, 1, 2};
+    auto output_size = tensor{ 1, num_groups, 1, 2 };
     auto input_lower_sizes = { 0, 16, 0, 0 };
     auto input_upper_sizes = { 0, 64, 0, 0 };
 
     auto& engine = get_test_engine();
 
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_size });
-    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, weights_size});
-    auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, bias_size});
+    auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, weights_size });
+    auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, bias_size });
 
     set_values<float>(input, {
          3, -1, -1, -1,  2, -2,  2,  2,  0,  1, -5,  4, -1,  4,  1,  0,
@@ -6983,7 +6851,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa
     build_options options;
     options.set_option(build_option::optimize_data(true));
     implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
-    options.set_option(build_option::force_implementations({ {"conv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv", conv_impl } }));
 
     network network(engine, topology, options);
     network.set_input_data("input", input);
@@ -7088,7 +6956,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx)
     }
 
     auto conv_fsv = convolution("conv", "input", { "weights" }, groups,
-                                { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0});
+                                { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 });
     conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
     topology.add(conv_fsv);
@@ -7096,7 +6964,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx)
     build_options options;
     options.set_option(build_option::optimize_data(true));
     implementation_desc conv_impl = { format::bfyx, "" };
-    options.set_option(build_option::force_implementations({ {"conv", conv_impl} }));
+    options.set_option(build_option::force_implementations({ { "conv", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -7385,10 +7253,10 @@ TEST_P(convolution_grouped_gpu, base) {
 
     topology topology(input_layout("input", input->get_layout()),
                       data("weights", weights),
-                      reorder("input_fsv", "input", {data_types::i8, input_data_format, input_size}),
+                      reorder("input_fsv", "input", { data_types::i8, input_data_format, input_size }),
                       convolution("conv",
                                   "input_fsv",
-                                  {"weights"},
+                                  { "weights" },
                                   std::vector<primitive_id>(0),
                                   weights_zp_prim_name,
                                   input_zp_prim_name,
@@ -7400,7 +7268,7 @@ TEST_P(convolution_grouped_gpu, base) {
                                   tensor(batch(1), feature(1), spatial(1, 1, 1, 1)),
                                   ref_conv_out_size,
                                   true),
-                      reorder("out", "conv", {data_types::f32, format::bfzyx, ref_conv_out_size}));
+                      reorder("out", "conv", { data_types::f32, format::bfzyx, ref_conv_out_size }));
 
     if (has_input_zp)
         topology.add(data(input_zp_prim_name[0], input_zp));
@@ -7413,9 +7281,9 @@ TEST_P(convolution_grouped_gpu, base) {
 
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    options.set_option(build_option::outputs({"conv", "out"}));
-    implementation_desc conv_impl = {input_data_format, impl_name};
-    options.set_option(build_option::force_implementations({{"conv", conv_impl}}));
+    options.set_option(build_option::outputs({ "conv", "out" }));
+    implementation_desc conv_impl = { input_data_format, impl_name };
+    options.set_option(build_option::force_implementations({ { "conv", conv_impl } }));
 
     cldnn::network network(engine, topology, options);
     network.set_input_data("input", input);
@@ -7505,7 +7373,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) {
     auto weights_size = tensor(output_f, input_f, filter_y, filter_x, 1);
     auto weights_data = generate_random_4d<FLOAT16>(output_f, input_f, filter_y, filter_x, -1, 1);
     auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
-    auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size});
+    auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size });
     set_values(weights_mem, weights_data_bfyx);
 
     // Will be used to store reference values calculated in branches depending on bias
@@ -7516,7 +7384,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) {
     if (with_bias) {
         auto biases_size = tensor(1, output_f, 1, 1);
         auto biases_data = generate_random_1d<FLOAT16>(output_f, -1, 1);
-        auto biases_mem = engine.allocate_memory({data_types::f16, format::bfyx, biases_size});
+        auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size });
         set_values(biases_mem, biases_data);
 
         for (auto bi = 0; bi < batch_num; ++bi) {
@@ -7534,16 +7402,16 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) {
         topology.add(input_layout("input", input_mem->get_layout()),
                      data("weights_fsv", weights_mem),
                      data("bias", biases_mem),
-                     reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size}));
+                     reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size }));
 
         auto conv_fsv = convolution("conv_fsv",
                                     "input_fsv",
-                                    {"weights_fsv"},
-                                    {"bias"},
+                                    { "weights_fsv" },
+                                    { "bias" },
                                     groups,
-                                    {1, 1, stride, stride},
-                                    tensor{{0, 0, pad_x, pad_y}, 0});
-        conv_fsv.output_padding = padding({0, 0, output_padding, output_padding}, 0.f);
+                                    { 1, 1, stride, stride },
+                                    tensor{ { 0, 0, pad_x, pad_y }, 0 });
+        conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
 
         topology.add(conv_fsv);
     } else {
@@ -7561,21 +7429,21 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) {
 
         topology.add(input_layout("input", input_mem->get_layout()),
                      data("weights_fsv", weights_mem),
-                     reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size}));
+                     reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size }));
 
         auto conv_fsv = convolution("conv_fsv",
                                     "input_fsv",
-                                    {"weights_fsv"},
+                                    { "weights_fsv" },
                                     groups,
-                                    {1, 1, stride, stride},
-                                    tensor{{0, 0, pad_x, pad_y}, 0});
-        conv_fsv.output_padding = padding({0, 0, output_padding, output_padding}, 0.f);
+                                    { 1, 1, stride, stride },
+                                    tensor{ { 0, 0, pad_x, pad_y }, 0 });
+        conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
         topology.add(conv_fsv);
     }
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    implementation_desc conv_impl = {input_data_format, impl_name};
-    options.set_option(build_option::force_implementations({{"conv_fsv", conv_impl}}));
+    implementation_desc conv_impl = { input_data_format, impl_name };
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -7668,10 +7536,10 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding)
 
     // Add convolution
     auto input_stride = tensor(1, 1, stride, stride);
-    auto pad = tensor({0, 0, pad_x, pad_y}, 0);
+    auto pad = tensor({ 0, 0, pad_x, pad_y }, 0);
     auto input_dilation = tensor(1, 1, 1, 1);
-    auto input_padding_before = tensor({0, 0, pad_x, pad_y}, 0);
-    auto input_padding_after = tensor({0, 0, pad_x, pad_y}, 0);
+    auto input_padding_before = tensor({ 0, 0, pad_x, pad_y }, 0);
+    auto input_padding_after = tensor({ 0, 0, pad_x, pad_y }, 0);
 
     auto conv_fsv = convolution("conv_fsv", "input_fsv16", { "weights_fsv" }, input_stride, pad, input_dilation, input_padding_before, input_padding_after);
     conv_fsv.output_padding = padding({ 0, 32, 2, 2 }, 0.f);
@@ -7697,7 +7565,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding)
     // Exec target network (fusing: conv+reorder)
     build_options options_target;
     implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" };
-    options_target.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     options_target.set_option(build_option::optimize_data(true));
 
     network network_target(engine, topology, options_target);
@@ -7770,9 +7638,9 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type)
 
     // Add convolution
     auto input_stride = tensor(1, 1, stride, stride);
-    auto pad = tensor({0, 0, pad_x, pad_y}, 0);
+    auto pad = tensor({ 0, 0, pad_x, pad_y }, 0);
     auto input_dilation = tensor(1, 1, 1, 1);
-    auto no_padding = tensor({0, 0, pad_x, pad_y}, 0);
+    auto no_padding = tensor({ 0, 0, pad_x, pad_y }, 0);
 
     auto conv_fsv = convolution("conv_fsv", "input_fsv16", { "weights_fsv" }, input_stride, pad, input_dilation, no_padding, no_padding);
     topology.add(conv_fsv);                                                                                 // format 8 to 8 -> after fusing, format 8 to 3
@@ -7796,7 +7664,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type)
     // Exec target network (fusing: conv+reorder)
     build_options options_target;
     implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" };
-    options_target.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     options_target.set_option(build_option::optimize_data(true));
 
     network network_target(engine, topology, options_target);
@@ -7836,7 +7704,7 @@ public:
         auto wei_mem = engine.allocate_memory(wei_lay);
         auto weights_flat = flatten_4d(format::bfyx, _weights);
         set_values(wei_mem, weights_flat);
-        layout reordered_layout = layout{input_type(), input_format(), input_size(), padding_size()};
+        layout reordered_layout = layout{ input_type(), input_format(), input_size(), padding_size() };
         auto topo = topology();
         topo.add(input_layout("input", input_lay));
         topo.add(reorder("input_reorder", "input", reordered_layout));
@@ -7866,7 +7734,7 @@ public:
                 { weights_id },
                 static_cast<uint32_t>(groups()),
                 tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)),
-                tensor({0, 0, _offset_x, _offset_y}, 0),
+                tensor({ 0, 0, _offset_x, _offset_y }, 0),
                 tensor(batch(0), feature(0), spatial(_dilation_x, _dilation_y)));
             conv_prim.output_data_type = output_type();
             topo.add(conv_prim);
@@ -7882,7 +7750,7 @@ public:
                 { "bias" },
                 static_cast<uint32_t>(groups()),
                 tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)),
-                tensor({0, 0, _offset_x, _offset_y}, 0),
+                tensor({ 0, 0, _offset_x, _offset_y }, 0),
                 tensor(batch(0), feature(0), spatial(_dilation_x, _dilation_y)));
             conv_prim.output_data_type = output_type();
             topo.add(conv_prim);
@@ -7902,7 +7770,7 @@ public:
 
         auto build_opts = build_options(
             build_option::optimize_data(true),
-            build_option::force_implementations({ {"conv", {input_format(), ""}} })
+            build_option::force_implementations({ { "conv", { input_format(), "" } } })
         );
         auto prog = program::build_program(engine, topo, build_opts);
 
@@ -8201,7 +8069,7 @@ public:
         auto wei_mem = engine.allocate_memory(wei_lay);
         auto wei_flat = flatten_4d(format::bfyx, this->_weights);
         set_values(wei_mem, wei_flat);
-        layout reordered_layout = layout{this->input_type(), this->input_format(), this->input_size(), this->padding_size()};
+        layout reordered_layout = layout{ this->input_type(), this->input_format(), this->input_size(), this->padding_size() };
         auto topo = topology();
         topo.add(input_layout("input", input_lay));
         topo.add(reorder("input_reorder", "input", reordered_layout));
@@ -8231,7 +8099,7 @@ public:
                 { weights_id },
                 static_cast<uint32_t>(this->groups()),
                 tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
-                tensor({0,0, this->_offset_x, this->_offset_y}, 0),
+                tensor({ 0, 0, this->_offset_x, this->_offset_y }, 0),
                 tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
             conv_prim.output_data_type = this->output_type();
             topo.add(conv_prim);
@@ -8247,7 +8115,7 @@ public:
                 { "bias" },
                 static_cast<uint32_t>(this->groups()),
                 tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
-                tensor({0,0, this->_offset_x, this->_offset_y}, 0),
+                tensor({ 0, 0, this->_offset_x, this->_offset_y }, 0),
                 tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
             conv_prim.output_data_type = this->output_type();
             topo.add(conv_prim);
@@ -8262,7 +8130,7 @@ public:
 
         auto build_opts = build_options(
             build_option::optimize_data(true),
-            build_option::force_implementations({ {"conv", { this->input_format(), ""}} })
+            build_option::force_implementations({ { "conv", { this->input_format(), "" } } })
         );
         auto prog = program::build_program(engine, topo, build_opts);
 
@@ -8851,9 +8719,9 @@ TEST_P(convolution_test, CONVOLUTION) {
 }
 
 INSTANTIATE_TEST_SUITE_P(DISABLED_CONVOLUTION,
-                        convolution_test,
-                        ::testing::ValuesIn(convolution_test::generate_all_test_params()),
-                        tests::generic_test::custom_param_name_functor());
+                         convolution_test,
+                         ::testing::ValuesIn(convolution_test::generate_all_test_params()),
+                         tests::generic_test::custom_param_name_functor());
 
 
 #ifdef ENABLE_ONEDNN_FOR_GPU
@@ -8952,7 +8820,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
     auto weights_size = tensor(output_f, input_f, filter_y, filter_x, 1);
     auto weights_data = generate_random_4d<FLOAT16>(output_f, input_f, filter_y, filter_x, -1, 1);
     auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
-    auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size});
+    auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size });
     set_values(weights_mem, weights_data_bfyx);
 
     // Will be used to store reference values calculated in branches depending on bias
@@ -8963,7 +8831,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
     if (with_bias) {
         auto biases_size = tensor(1, output_f, 1, 1);
         auto biases_data = generate_random_1d<FLOAT16>(output_f, -1, 1);
-        auto biases_mem = engine.allocate_memory({data_types::f16, format::bfyx, biases_size});
+        auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size });
         set_values(biases_mem, biases_data);
 
         for (auto bi = 0; bi < batch_num; ++bi) {
@@ -8981,16 +8849,16 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
         topology.add(input_layout("input", input_mem->get_layout()),
                      data("weights_fsv", weights_mem),
                      data("bias", biases_mem),
-                     reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size}));
+                     reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size }));
 
         auto conv_fsv = convolution("conv_fsv",
                                     "input_fsv",
-                                    {"weights_fsv"},
-                                    {"bias"},
+                                    { "weights_fsv" },
+                                    { "bias" },
                                     groups,
-                                    {1, 1, stride, stride},
-                                    {0, 0, 0, 0});
-        conv_fsv.output_padding = padding({0, 0, 0, 0}, 0.f);
+                                    { 1, 1, stride, stride },
+                                    { 0, 0, 0, 0 });
+        conv_fsv.output_padding = padding({ 0, 0, 0, 0 }, 0.f);
 
         topology.add(conv_fsv);
     } else {
@@ -9008,21 +8876,21 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
 
         topology.add(input_layout("input", input_mem->get_layout()),
                      data("weights_fsv", weights_mem),
-                     reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size}));
+                     reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size }));
 
         auto conv_fsv = convolution("conv_fsv",
                                     "input_fsv",
-                                    {"weights_fsv"},
+                                    { "weights_fsv" },
                                     groups,
-                                    {1, 1, stride, stride},
-                                    {0, 0, 0, 0});
-        conv_fsv.output_padding = padding({0, 0, 0, 0}, 0.f);
+                                    { 1, 1, stride, stride },
+                                    { 0, 0, 0, 0 });
+        conv_fsv.output_padding = padding({ 0, 0, 0, 0 }, 0.f);
         topology.add(conv_fsv);
     }
     build_options options;
     options.set_option(build_option::optimize_data(true));
-    implementation_desc conv_impl = {input_data_format, impl_name, prim_impl_types};
-    options.set_option(build_option::force_implementations({{"conv_fsv", conv_impl}}));
+    implementation_desc conv_impl = { input_data_format, impl_name, prim_impl_types };
+    options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } }));
     network network(engine, topology, options);
 
     network.set_input_data("input", input_mem);
@@ -9032,7 +8900,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
         std::cerr << p.original_id << " " << p.kernel_id << std::endl;
 
     auto out_mem = network.get_output("conv_fsv").get_memory();
-    mem_lock<FLOAT16> out_ptr{out_mem, get_test_stream()};
+    mem_lock<FLOAT16> out_ptr{ out_mem, get_test_stream() };
     auto out_lay = out_mem->get_layout();
 
     ASSERT_EQ(out_mem->get_layout().format, input_data_format);
@@ -9075,15 +8943,15 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) {
     auto weights_size = tensor(16, 16, 1, 1, 1);
     auto weights_data = generate_random_4d<FLOAT16>(output_f, input_f, 1, 1, -1, 1);
     auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
-    auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size});
+    auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size });
     set_values(weights_mem, weights_data_bfyx);
 
     auto input = input_layout("input", input_mem->get_layout());
     auto weights = data("weights", weights_mem);
-    auto input_reorder = reorder("input_fsv", "input", {data_types::f16, format::b_fs_yx_fsv16, input_size});
+    auto input_reorder = reorder("input_fsv", "input", { data_types::f16, format::b_fs_yx_fsv16, input_size });
     auto conv1 = convolution("conv1", "input_fsv", { "weights" });
-    auto conv2 = convolution("conv2", "conv1", { "weights" }, {1, 1, 1, 1}, {0, 0, -1, -1}, {1, 1, 1, 1}, {output_b, output_f, output_x, output_x});
-    auto output_reorder = reorder("reorder", "conv2", {data_types::f32, format::bfyx, {output_b, output_f, output_x, output_x}});
+    auto conv2 = convolution("conv2", "conv1", { "weights" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }, { 1, 1, 1, 1 }, { output_b, output_f, output_x, output_x });
+    auto output_reorder = reorder("reorder", "conv2", { data_types::f32, format::bfyx, { output_b, output_f, output_x, output_x } });
 
     topology topology_test(input, weights, input_reorder, conv1, conv2, output_reorder);
     topology topology_ref(input, weights, input_reorder, conv1, conv2, output_reorder);
@@ -9091,13 +8959,13 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) {
     build_options options_test;
     implementation_desc conv1_impl_test = { format::b_fs_yx_fsv16, "", impl_types::onednn };
     implementation_desc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl };
-    options_test.set_option(build_option::force_implementations({ {"conv1", conv1_impl_test}, {"conv2", conv2_impl_test} }));
+    options_test.set_option(build_option::force_implementations({ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } }));
     options_test.set_option(build_option::optimize_data(true));
 
     build_options options_ref;
     implementation_desc conv1_impl_ref = { format::bfyx, "", impl_types::ocl };
     implementation_desc conv2_impl_ref = { format::bfyx, "", impl_types::ocl };
-    options_ref.set_option(build_option::force_implementations({ {"conv1", conv1_impl_ref}, {"conv2", conv2_impl_ref} }));
+    options_ref.set_option(build_option::force_implementations({ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } }));
     options_ref.set_option(build_option::optimize_data(true));
 
     network network_test(engine, topology_test, options_test);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 957ca159a0a..962759bdc7c 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -681,7 +681,7 @@ TEST_P(conv_fp32_reorder_fsv16_to_bfyx_conv, basic) {
         reorder("reorder_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32),
         convolution("conv_prim", "reorder_fsv16", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
         reorder("reorder_bfyx", "conv_prim", format::bfyx, data_types::f32),
-        convolution("conv_output", "reorder_bfyx", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
+        convolution("conv_output", "reorder_bfyx", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
         activation("activation", "conv_output", activation_func::abs),
         reorder("reorder_output", "activation", p.default_format, data_types::f32)
     );
@@ -10059,7 +10059,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
         data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
         convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
         reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract),
-        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
+        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
         activation("activation", "conv_output", activation_func::abs)
     );
 
@@ -10088,7 +10088,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat
         convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
         reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32),
         activation("activation_quantize", "reorder_fsv32", activation_func::relu),
-        convolution("conv_output", "activation_quantize", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
+        convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
         activation("activation", "conv_output", activation_func::abs)
     );
 
@@ -10116,7 +10116,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
         data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
         convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
         reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ {0, 0, 1, 1}, 0 })),
-        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
+        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
         activation("activation", "conv_output", activation_func::abs),
         activation("activation2", "conv_prim", activation_func::abs),
         eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum)
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
index d1c76a316c4..642fb65f603 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
@@ -43,7 +43,7 @@ TEST(memory_tests, DISABLED_network_creation_loop)
 {
     engine eng;
 
-    memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } });
+    memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } });
 
     topology tpl{
         input_layout("in", in->get_layout()),
@@ -66,7 +66,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) {
     auto x_size = 1;
     auto y_size = 1;
 
-    auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+    auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
@@ -86,7 +86,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 64);
+    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)64);
  }
 
 TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
@@ -99,13 +99,13 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
     auto x_size = 4;
     auto y_size = 4;
 
-    auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+    auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
     topology.add(activation("relu", "input", activation_func::relu));
     topology.add(activation("relu1", "relu", activation_func::relu));
-    topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 }));
+    topology.add(pooling("pool1", "relu1", pooling_mode::max, { 1, 1, 3, 3 }, { 1, 1, 2, 2 }));
     topology.add(activation("relu2", "pool1", activation_func::relu));
     topology.add(activation("relu3", "relu2", activation_func::relu));
     topology.add(activation("relu4", "relu3", activation_func::relu));
@@ -133,7 +133,7 @@ TEST(memory_pool, multi_outputs_network) {
     auto x_size = 4;
     auto y_size = 4;
 
-    auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+    auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
@@ -153,7 +153,7 @@ TEST(memory_pool, multi_outputs_network) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)1536);
+    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 1536);
 }
 
 TEST(memory_pool, oooq) {
@@ -171,14 +171,14 @@ TEST(memory_pool, oooq) {
     auto x_size = 4;
     auto y_size = 4;
 
-    auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+    auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
     topology.add(activation("relu1", "input", activation_func::relu));
     topology.add(activation("relu2", "input", activation_func::relu));
     topology.add(activation("relu3", "input", activation_func::relu));
-    topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f));
+    topology.add(concatenation("concat1", { "relu1", "relu2" },concatenation::along_f));
     topology.add(activation("relu4", "concat1", activation_func::relu));
     topology.add(activation("relu5", "relu3", activation_func::relu));
     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
@@ -209,7 +209,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
     auto inp_x_size = 4;
     auto inp_y_size = 4;
 
-    auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
+    auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
 
     set_values(input,
     {   1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f,
@@ -227,7 +227,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
     topology.add(activation("relu4", "concat1", activation_func::relu));
     topology.add(activation("relu5", "relu3", activation_func::relu));
     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
-    topology.add(activation("relu6", "concat2", activation_func::linear, {1.0f, 0.5f}));
+    topology.add(activation("relu6", "concat2", activation_func::linear, { 1.0f, 0.5f }));
 
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
@@ -286,8 +286,8 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) {
     auto inp_x_size = 4;
     auto inp_y_size = 4;
 
-    auto input= engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
-    auto weights = engine->allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
+    auto input= engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
+    auto weights = engine->allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } });
 
     std::vector<float> dummy_input_data_1 = {
        /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f,
@@ -373,10 +373,10 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
     layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }};
     auto input_1 = engine->allocate_memory(lay_batch_1);
     auto input_8 = engine->allocate_memory(lay_batch_8);
-    auto weights = engine->allocate_memory({ dt, fmt, { 1, 1, 3, 2 } });
+    auto weights = engine->allocate_memory({ dt, fmt, { 1, 3, 3, 2 } });
 
-    std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1);
-    std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1);
+    std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1 * feature_num * inp_x_size * inp_y_size, 0, 1);
+    std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8 * feature_num * inp_x_size * inp_y_size, 0, 1);
 
     set_values(input_1, dummy_input_data_1);
     set_values(input_8, dummy_input_data_8);
@@ -396,14 +396,14 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
     auto outputs = network_first.execute();
 
     auto dev_info = engine->get_device_info();
-    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928);
+    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 4744);
 
     topo.change_input_layout("input", input_1->get_layout());//change input layout to batch=1
 
     network network_second(*engine, topo, bo);
     network_second.set_input_data("input", input_1);
     auto outputs_second = network_second.execute();
-    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328);
+    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 5912);
 }
 
 TEST(memory_pool, shared_dep_two_output) {
@@ -459,20 +459,20 @@ TEST(memory_pool, non_opt_intermidate_opt_after) {
 
     auto input_memory1 = engine.allocate_memory(input_layout1);
     auto input_memory2 = engine.allocate_memory(input_layout2);
-    auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
+    auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 1, 1 }));
     auto data_memory = cldnn::data("scale_mem", scale_memory);
 
     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f });
     set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f });
-    set_values(scale_memory, { 1.0f});
+    set_values(scale_memory, { 1.0f });
 
     auto reshape_tensor = cldnn::tensor(8, 1, 1, 1);
     auto input = cldnn::input_layout("input1", input_layout1);
     auto input2 = cldnn::input_layout("input2", input_layout2);
     auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b);
     auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor);
-    auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 });
-    auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 });
+    auto crop1 = cldnn::crop("crop1", "reshape", { 1, 1, 1, 1 }, { 0, 0, 0, 0 });
+    auto crop2 = cldnn::crop("crop2", "reshape", { 1, 1, 1, 1 }, { 1, 0, 0, 0 });
     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
 
@@ -508,7 +508,7 @@ TEST(memory_pool, add_mem_dep_test) {
     auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 });
 
     auto input_memory1 = engine.allocate_memory(input_layout1);
-    auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
+    auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 1, 1 }));
     auto data_memory = cldnn::data("scale_mem", scale_memory);
 
     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f,
@@ -518,8 +518,8 @@ TEST(memory_pool, add_mem_dep_test) {
     auto input = cldnn::input_layout("input1", input_layout1);
     auto actv1 = cldnn::activation("input_activ1", "input1", activation_func::abs);
     auto actv2 = cldnn::activation("input_activ2", "input1", activation_func::abs);
-    auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
-    auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
+    auto crop1 = cldnn::crop("crop1", "input_activ1", { 1, 1, 2, 2 }, { 0, 0, 0, 0 });
+    auto crop2 = cldnn::crop("crop2", "input_activ2", { 1, 1, 2, 2 }, { 0, 1, 0, 0 });
     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
     auto actv3 = cldnn::activation("out3", "elt1", activation_func::abs);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp
index e04e2a390db..248a0d3758b 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp
@@ -137,11 +137,12 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ng
 
     auto weightsName = inputs[1];
     auto weights_node = op->get_input_node_shared_ptr(1);
-    // WA: For the cases like Const(weights)->Sub(zp)->Deconv.
+    bool hasConstantWeights = IsNodeOnConstPath(weights_node);
+    // WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights.
     // Dimensions order of weights blob is IOYX, but
     // the selected format is OIYX by default. So we need to swap (and transpose) I and O dimensions to match the format
     // For Constant node on input transpose is not needed, because the data is transposed on const node creation
-    if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
+    if ((hasConstantWeights && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) || !hasConstantWeights) {
         std::string permuteName = layerName + "_cldnn_weights_permute";
         auto weights_rank = op->get_input_shape(1).size();
         std::vector<uint16_t> permute_order(weights_rank);
@@ -195,11 +196,12 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p
 
     auto weightsName = inputs[1];
     auto weights_node = op->get_input_node_shared_ptr(1);
-    // WA: For the cases like Const(weights)->Sub(zp)->Deconv.
+    bool hasConstWeights = IsNodeOnConstPath(weights_node);
+    // WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights.
     // Dimensions order of weights blob is IOYX, but
     // the selected format is OIYX by default. So we need to swap I and O dimensions to match the format.
     // For Constant node on input transpose is not needed, because the data is transposed on const node creation
-    if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
+    if ((hasConstWeights && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) || !hasConstWeights) {
         std::string permuteName = layerName + "_cldnn_weights_permute";
         auto weights_rank = op->get_input_shape(1).size();
         std::vector<uint16_t> permute_order(weights_rank);
diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp
index c2cc3875497..cc3fb1a6e10 100644
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@@ -346,7 +346,7 @@ bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
     std::function<bool(const std::shared_ptr<ngraph::Node>&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr<ngraph::Node>& node) {
         if (nodes_processed.count(node)) return true;
         nodes_processed.insert(node);
-        // If input is constant, then drop if from the processing list
+        // If input is constant, then drop it from the processing list
         if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(node) != nullptr)
             return true;
         // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path

From 1177d2b282a7699cc398717ccd8a35dd7a375bf0 Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Wed, 15 Dec 2021 13:15:13 +0300
Subject: [PATCH 03/27] [GPU] Change FQ output for first Convolution (#9200)

* update onednn_gpu

* [GPU] Add bs_fs_yx_bsv8_fsv4 format

Co-authored-by: Kim,SungEun <sungeun.kim@intel.com>
---
 .../clDNN/api/intel_gpu/runtime/tensor.hpp    |  2 +
 .../kernel_selector/common/tensor_type.cpp    |  6 +++
 .../kernel_selector/common/tensor_type.h      |  1 +
 .../include/batch_headers/fetch_data.cl       | 33 ++++++++++++++
 .../kernel_selector/core/common/jitter.cpp    |  2 +
 .../core/kernel_selector_common.cpp           |  1 +
 .../clDNN/src/impls/ocl/convolution.cpp       |  5 +++
 .../clDNN/src/impls/ocl/eltwise.cpp           |  7 +++
 .../src/impls/onednn/concatenation_onednn.cpp |  5 +++
 .../src/impls/onednn/convolution_onednn.cpp   |  5 +++
 .../src/impls/onednn/deconvolution_onednn.cpp |  5 +++
 .../clDNN/src/impls/onednn/utils.cpp          |  1 +
 .../clDNN/src/include/to_string_utils.h       |  2 +
 .../clDNN/src/kernel_selector_helper.cpp      |  4 ++
 .../thirdparty/clDNN/src/layout_optimizer.cpp |  6 ++-
 .../thirdparty/clDNN/src/program_helpers.cpp  | 43 ++++++++-----------
 16 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp
index 846cf6e4bf6..aeea86c190e 100644
--- a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp
+++ b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp
@@ -85,6 +85,7 @@ struct format {
         bs_fs_zyx_bsv16_fsv16,                  ///< format used for 3D blocked convolution (batch and features blocked by 16)
         bs_fs_yx_bsv16_fsv16,                   ///< format used for 2D blocked convolution (batch and features blocked by 16)
         bs_fs_yx_bsv4_fsv4,                     ///< format used for 2D blocked convolution (batch and features blocked by 4)
+        bs_fs_yx_bsv8_fsv4,                     ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4)
         bs_fs_yx_bsv4_fsv2,                     ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2)
         bs_fs_zyx_bsv4_fsv4,                    ///< format used for 3D blocked convolution (batch and features blocked by 4)
         bs_fs_zyx_bsv4_fsv2,                    ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2)
@@ -255,6 +256,7 @@ struct format {
                 { bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, "bfzyx",  "bfxyz",  {{0, 16 }, {1, 16}}}},
                 { bs_fs_yx_bsv16_fsv16,  { 1, 1, 2, 0, "bfyx",   "bfxy?",  {{0, 16 }, {1, 16}}}},
                 { bs_fs_yx_bsv4_fsv4,    { 1, 1, 2, 0, "bfyx",   "bfxy?",  {{0, 4 }, {1, 4}}}},
+                { bs_fs_yx_bsv8_fsv4,    { 1, 1, 2, 0, "bfyx",   "bfxy?",  {{0, 8 }, {1, 4}}}},
                 { bs_fs_yx_bsv4_fsv2,    { 1, 1, 2, 0, "bfyx",   "bfxy?",  {{0, 4 }, {1, 2}}}},
                 { bs_fs_zyx_bsv4_fsv4,   { 1, 1, 3, 0, "bfzyx",  "bfxyz",  {{0, 4 }, {1, 4}}}},
                 { bs_fs_zyx_bsv4_fsv2,   { 1, 1, 3, 0, "bfzyx",  "bfxyz",  {{0, 4 }, {1, 2}}}},
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
index 97d6c7da91b..ce7ec16ad4e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
@@ -29,6 +29,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{
     { DataLayout::bs_fs_zyx_bsv16_fsv16, {  0,  1,  2, -1,  3,  4 } },
     { DataLayout::bs_fs_yx_bsv16_fsv16,  {  0,  1, -1, -1,  2,  3 } },
     { DataLayout::bs_fs_yx_bsv4_fsv4,    {  0,  1, -1, -1,  2,  3 } },
+    { DataLayout::bs_fs_yx_bsv8_fsv4,    {  0,  1, -1, -1,  2,  3 } },
     { DataLayout::bs_fs_yx_bsv4_fsv2,    {  0,  1, -1, -1,  2,  3 } },
     { DataLayout::bs_fs_yx_bsv32_fsv32,  {  0,  1, -1, -1,  2,  3 } },
     { DataLayout::bs_fs_yx_bsv32_fsv16,  {  0,  1, -1, -1,  2,  3 } },
@@ -206,6 +207,11 @@ NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
             newDims[2] = RoundUp(newDims[2], 4);
             newDims[3] = RoundUp(newDims[3], 4);
             break;
+        case bs_fs_yx_bsv8_fsv4:
+            assert(newDims.size() == 4);
+            newDims[2] = RoundUp(newDims[2], 4);
+            newDims[3] = RoundUp(newDims[3], 8);
+            break;
         case bs_fs_yx_bsv4_fsv2:
             assert(newDims.size() == 4);
             newDims[2] = RoundUp(newDims[2], 2);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
index 7ed87ec644b..fb57e4592dc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
@@ -39,6 +39,7 @@ enum DataLayout {
     bs_fs_yx_bsv16_fsv16,   // batch, feature, 2D spatial. Blocks of 16 batch and channels
     bs_fs_zyx_bsv16_fsv16,  // batch, feature, 3D spatial. Blocks of 16 batch and channels
     bs_fs_yx_bsv4_fsv4,     // batch, feature, 2D spatial. Blocks of 4 batch and 4 channels
+    bs_fs_yx_bsv8_fsv4,     // batch, feature, 2D spatial. Blocks of 8 batch and 4 channels
     bs_fs_yx_bsv4_fsv2,     // batch, feature, 2D spatial. Blocks of 4 batch and 2 channels
     bs_fs_yx_bsv32_fsv32,   // batch, feature, 2D spatial. Blocks of 32 batch and 32 channels
     bs_fs_yx_bsv32_fsv16,   // batch, feature, 2D spatial. Blocks of 32 batch and 16 channels
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl
index b35522168b5..5af9d161ce3 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl
@@ -506,6 +506,22 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f,  uint z, uint y, uint x,
         CAT(prefix, _PAD_BEFORE_SIZE_X),                            \
         CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4)
 
+#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX(prefix, b, f, y, x)       \
+    get_bs_fs_zyx_bsv_fsv_index(                                    \
+        b, f, 0, y, x,                                              \
+        CAT(prefix, _SIZE_X),                                       \
+        CAT(prefix, _SIZE_Y),                                       \
+        CAT(prefix, _SIZE_Z),                                       \
+        CAT(prefix, _FEATURE_NUM),                                  \
+        CAT(prefix, _PAD_BEFORE_FEATURE_NUM),                       \
+        CAT(prefix, _PAD_AFTER_FEATURE_NUM),                        \
+        CAT(prefix, _PAD_BEFORE_SIZE_Z),                            \
+        CAT(prefix, _PAD_AFTER_SIZE_Z),                             \
+        CAT(prefix, _PAD_BEFORE_SIZE_Y),                            \
+        CAT(prefix, _PAD_AFTER_SIZE_Y),                             \
+        CAT(prefix, _PAD_BEFORE_SIZE_X),                            \
+        CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4)
+
 #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX(prefix, b, f, y, x)       \
     get_bs_fs_zyx_bsv_fsv_index(                                    \
         b, f, 0, y, x,                                              \
@@ -605,6 +621,23 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f,  uint z, uint y, uint x,
         CAT(prefix, _PAD_BEFORE_SIZE_X),                             \
         CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4)
 
+#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX_SAFE(prefix, b, f, y, x)   \
+    get_bs_fs_zyx_bsv_fsv_index_safe(                                \
+        b, f, 0, y, x,                                               \
+        CAT(prefix, _SIZE_X),                                        \
+        CAT(prefix, _SIZE_Y),                                        \
+        CAT(prefix, _SIZE_Z),                                        \
+        CAT(prefix, _FEATURE_NUM),                                   \
+        CAT(prefix, _BATCH_NUM),                                     \
+        CAT(prefix, _PAD_BEFORE_FEATURE_NUM),                        \
+        CAT(prefix, _PAD_AFTER_FEATURE_NUM),                         \
+        CAT(prefix, _PAD_BEFORE_SIZE_Z),                             \
+        CAT(prefix, _PAD_AFTER_SIZE_Z),                              \
+        CAT(prefix, _PAD_BEFORE_SIZE_Y),                             \
+        CAT(prefix, _PAD_AFTER_SIZE_Y),                              \
+        CAT(prefix, _PAD_BEFORE_SIZE_X),                             \
+        CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4)
+
 #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX_SAFE(prefix, b, f, y, x)   \
     get_bs_fs_zyx_bsv_fsv_index_safe(                                \
         b, f, 0, y, x,                                               \
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
index e5927422532..73f164b3659 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -334,6 +334,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
                        layout == DataLayout::fs_b_yx_fsv32 ||
                        layout == DataLayout::bs_fs_yx_bsv16_fsv16 ||
                        layout == DataLayout::bs_fs_yx_bsv4_fsv4 ||
+                       layout == DataLayout::bs_fs_yx_bsv8_fsv4 ||
                        layout == DataLayout::bs_fs_yx_bsv4_fsv2 ||
                        layout == DataLayout::bs_fs_yx_bsv32_fsv16 ||
                        layout == DataLayout::bs_fs_yx_bsv32_fsv32) {
@@ -346,6 +347,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
                     layout == DataLayout::bs_fs_yx_bsv32_fsv32  ||
                     layout == DataLayout::bs_fs_yx_bsv32_fsv16  ||
                     layout == DataLayout::bs_fs_yx_bsv4_fsv4  ||
+                    layout == DataLayout::bs_fs_yx_bsv8_fsv4  ||
                     layout == DataLayout::bs_fs_yx_bsv4_fsv2  ||
                     layout == DataLayout::bs_fs_yx_bsv16_fsv16)
                     safe_index_func_val = "GET_DATA_" + layout_str + "_INDEX_SAFE(" + _name + ", b, f, y, x)";
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
index 3491e475e07..75349b31f3e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
@@ -105,6 +105,7 @@ std::string toString(DataLayout l) {
         case kernel_selector::DataLayout::bs_fs_yx_bsv16_fsv16:  return "BS_FS_YX_BSV16_FSV16";
         case kernel_selector::DataLayout::bs_fs_zyx_bsv16_fsv16: return "BS_FS_ZYX_BSV16_FSV16";
         case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv4:    return "BS_FS_YX_BSV4_FSV4";
+        case kernel_selector::DataLayout::bs_fs_yx_bsv8_fsv4:    return "BS_FS_YX_BSV8_FSV4";
         case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv2:    return "BS_FS_YX_BSV4_FSV2";
         case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv32:  return "BS_FS_YX_BSV32_FSV32";
         case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv16:  return "BS_FS_YX_BSV32_FSV16";
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
index a0c8a0874a1..69d79e22315 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
@@ -225,6 +225,11 @@ attach_convolution_impl::attach_convolution_impl() {
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
 
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
+
         std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
index 3e8c233e126..b15c473fb89 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
@@ -214,6 +214,13 @@ attach_eltwise_impl::attach_eltwise_impl() {
         std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4),
 
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4),
+
         std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp
index 2367674d762..c9e337a6466 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp
@@ -119,6 +119,11 @@ attach_concatenation_onednn::attach_concatenation_onednn() {
         std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
+
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
     });
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp
index c10ea0d5b5d..54e0328fdc9 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp
@@ -256,6 +256,11 @@ attach_convolution_onednn::attach_convolution_onednn() {
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
 
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
+
         std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp
index bce13ce1698..6b65c181acd 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp
@@ -199,6 +199,11 @@ attach_deconvolution_onednn::attach_deconvolution_onednn() {
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
         std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
 
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
+
         std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
         std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp
index a9fd1206e43..72e2effc0e1 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp
@@ -91,6 +91,7 @@ dnnl::memory::format_tag convert_data_format(cldnn::format fmt) {
         case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c;
         case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c;
         case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b;
+        case cldnn::format::bs_fs_yx_bsv8_fsv4: return dnnl::memory::format_tag::ABcd8a4b;
         case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b;
         case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c;
         case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c;
diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
index 66975629a08..801895c275c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
+++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
@@ -97,6 +97,8 @@ inline std::string fmt_to_str(format fmt) {
             return "bs_fs_yx_bsv4_fsv2";
         case format::bs_fs_yx_bsv4_fsv4:
             return "bs_fs_yx_bsv4_fsv4";
+        case format::bs_fs_yx_bsv8_fsv4:
+            return "bs_fs_yx_bsv8_fsv4";
         case format::bs_fs_yx_bsv32_fsv32:
             return "bs_fs_yx_bsv32_fsv32";
         case format::b_fs_zyx_fsv16:
diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
index ac577c70f22..540e84a81ea 100644
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -136,6 +136,8 @@ kernel_selector::data_layout to_data_layout(format f) {
             return kernel_selector::data_layout::bs_fs_yx_bsv32_fsv16;
         case format::bs_fs_yx_bsv4_fsv4:
             return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4;
+        case format::bs_fs_yx_bsv8_fsv4:
+            return kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4;
         case format::bs_fs_yx_bsv4_fsv2:
             return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv2;
         case format::bs_fs_yx_bsv32_fsv32:
@@ -193,6 +195,8 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) {
             return cldnn::format::bs_fs_yx_bsv4_fsv2;
         case kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4:
             return cldnn::format::bs_fs_yx_bsv4_fsv4;
+        case kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4:
+            return cldnn::format::bs_fs_yx_bsv8_fsv4;
         case kernel_selector::data_layout::bs_fs_yx_bsv32_fsv32:
             return cldnn::format::bs_fs_yx_bsv32_fsv32;
         case kernel_selector::data_layout::nv12:
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index 6156ef8e8eb..1c4518b1654 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -284,10 +284,11 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
         return true;
 
     if (next.is_type<convolution>() &&
-        (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4) &&
+        (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4 || fmt_prev == format::bs_fs_yx_bsv8_fsv4) &&
         ((fmt_next == format::b_fs_yx_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
         (fmt_next == format::bs_fs_yx_bsv32_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
         (fmt_next == format::bs_fs_yx_bsv4_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
+        (fmt_next == format::bs_fs_yx_bsv8_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
         (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 &&
         (prev_output_layout.size.feature[0] == 3 || (prev_output_layout.size.feature[0] == 4 && (prev_dt == data_types::u8 || prev_dt == data_types::i8))))))
         return true;
@@ -1269,6 +1270,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
             format::bs_fs_yx_bsv32_fsv16,
             format::bs_fs_yx_bsv32_fsv32,
             format::bs_fs_yx_bsv4_fsv4,
+            format::bs_fs_yx_bsv8_fsv4,
             format::bs_fs_yx_bsv4_fsv2,
             format::bs_fs_zyx_bsv4_fsv4,
             format::bs_fs_zyx_bsv4_fsv2,
@@ -1463,7 +1465,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
                     if (data_type_traits::is_floating_point(conv.get_output_layout().data_type) || ws.spatial[0] != 7 || conv.get_primitive()->groups > 1)
                         expected = format::bfyx;
                     else
-                        expected = format::bs_fs_yx_bsv4_fsv4;
+                        expected = format::bs_fs_yx_bsv8_fsv4;
 
                     auto conv_output_layout = conv.get_output_layout();
                     auto weights_layout = conv.weights(0).get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
index 09e3fbf6c99..bddd611cf8a 100644
--- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
@@ -139,30 +139,25 @@ std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, l
         return {false, false};
     if (l1.get_linear_size() != l2.get_linear_size())
         return {false, false};
-    if ((l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) ||
-        (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4) ||
-        (l1.format == format::fs_b_yx_fsv32 && l2.format != format::fs_b_yx_fsv32) ||
-        (l2.format == format::fs_b_yx_fsv32 && l1.format != format::fs_b_yx_fsv32) ||
-        (l1.format == format::b_fs_yx_fsv16 && l2.format != format::b_fs_yx_fsv16) ||
-        (l2.format == format::b_fs_yx_fsv16 && l1.format != format::b_fs_yx_fsv16) ||
-        (l1.format == format::b_fs_yx_fsv32 && l2.format != format::b_fs_yx_fsv32) ||
-        (l2.format == format::b_fs_yx_fsv32 && l1.format != format::b_fs_yx_fsv32) ||
-        (l1.format == format::b_fs_zyx_fsv32 && l2.format != format::b_fs_zyx_fsv32) ||
-        (l2.format == format::b_fs_zyx_fsv32 && l1.format != format::b_fs_zyx_fsv32) ||
-        (l1.format == format::b_fs_zyx_fsv16 && l2.format != format::b_fs_zyx_fsv16) ||
-        (l2.format == format::b_fs_zyx_fsv16 && l1.format != format::b_fs_zyx_fsv16) ||
-        (l1.format == format::bs_fs_yx_bsv4_fsv4 && l2.format != format::bs_fs_yx_bsv4_fsv4) ||
-        (l2.format == format::bs_fs_yx_bsv4_fsv4 && l1.format != format::bs_fs_yx_bsv4_fsv4) ||
-        (l1.format == format::bs_fs_yx_bsv4_fsv2 && l2.format != format::bs_fs_yx_bsv4_fsv2) ||
-        (l2.format == format::bs_fs_yx_bsv4_fsv2 && l1.format != format::bs_fs_yx_bsv4_fsv2) ||
-        (l1.format == format::bs_fs_yx_bsv32_fsv16 && l2.format != format::bs_fs_yx_bsv32_fsv16) ||
-        (l2.format == format::bs_fs_yx_bsv32_fsv16 && l1.format != format::bs_fs_yx_bsv32_fsv16) ||
-        (l1.format == format::bs_fs_yx_bsv32_fsv32 && l2.format != format::bs_fs_yx_bsv32_fsv32) ||
-        (l2.format == format::bs_fs_yx_bsv32_fsv32 && l1.format != format::bs_fs_yx_bsv32_fsv32) ||
-        (l1.format == format::bs_fs_yx_bsv16_fsv16 && l2.format != format::bs_fs_yx_bsv16_fsv16) ||
-        (l2.format == format::bs_fs_yx_bsv16_fsv16 && l1.format != format::bs_fs_yx_bsv16_fsv16) ||
-        (l1.format == format::bs_fs_zyx_bsv16_fsv16 && l2.format != format::bs_fs_zyx_bsv16_fsv16) ||
-        (l2.format == format::bs_fs_zyx_bsv16_fsv16 && l1.format != format::bs_fs_zyx_bsv16_fsv16))
+
+    auto check_format = [&l1, &l2](cldnn::format format) {
+        return (l1.format == format && l2.format != format) ||
+               (l2.format == format && l1.format != format);
+    };
+
+    if (check_format(format::b_fs_yx_fsv4) ||
+        check_format(format::fs_b_yx_fsv32) ||
+        check_format(format::b_fs_yx_fsv16) ||
+        check_format(format::b_fs_yx_fsv32) ||
+        check_format(format::b_fs_zyx_fsv32) ||
+        check_format(format::b_fs_zyx_fsv16) ||
+        check_format(format::bs_fs_yx_bsv4_fsv4) ||
+        check_format(format::bs_fs_yx_bsv8_fsv4) ||
+        check_format(format::bs_fs_yx_bsv4_fsv2) ||
+        check_format(format::bs_fs_yx_bsv32_fsv16) ||
+        check_format(format::bs_fs_yx_bsv32_fsv32) ||
+        check_format(format::bs_fs_yx_bsv16_fsv16) ||
+        check_format(format::bs_fs_zyx_bsv16_fsv16))
         return {false, false};
 
     auto l1_pitch = l1.get_pitches();

From 9aedece39896dd3a40f651f7623e278240f322e0 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Wed, 15 Dec 2021 14:32:43 +0300
Subject: [PATCH 04/27] Fixed cpu tests location (#9224)

---
 .../cpu/shape_inference_test/gather_elements_shape_inference.cpp  | 0
 .../unit/cpu/shape_inference_test/gather_shape_inference.cpp      | 0
 .../unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp | 0
 .../unit/cpu/shape_inference_test/one_hot_shape_inference.cpp     | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp (100%)
 rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp (100%)
 rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp (100%)
 rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp (100%)

diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp
similarity index 100%
rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp
rename to src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp
diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp
similarity index 100%
rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp
rename to src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp
diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp
similarity index 100%
rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp
rename to src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp
diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp
similarity index 100%
rename from inference-engine/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp
rename to src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp

From aa457268d44242142b3461d75092a4a07c2c76d6 Mon Sep 17 00:00:00 2001
From: Vladimir Dudnik <vladimir.dudnik@intel.com>
Date: Wed, 15 Dec 2021 17:58:06 +0300
Subject: [PATCH 05/27] [IE Samples] make coverity happy (#9203)

* make coverity happy

* apply code style
---
 .../c/common/opencv_c_wrapper/bmp_reader.c    | 103 +++++++++++++++---
 1 file changed, 86 insertions(+), 17 deletions(-)

diff --git a/samples/c/common/opencv_c_wrapper/bmp_reader.c b/samples/c/common/opencv_c_wrapper/bmp_reader.c
index e4362277f67..05ec4861901 100644
--- a/samples/c/common/opencv_c_wrapper/bmp_reader.c
+++ b/samples/c/common/opencv_c_wrapper/bmp_reader.c
@@ -1,31 +1,72 @@
 #include "bmp_reader.h"
 
+#include <memory.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 int readBmpImage(const char* fileName, BitMap* image) {
-    FILE* input = fopen(fileName, "rb");
+    size_t cnt;
+    int status = 0;
+    FILE* input = 0;
 
-    if (input == NULL) {
-        printf("[BMP] file %s is not opened\n", fileName);
-        return 1;
+    if (NULL == fileName || NULL == image) {
+        printf("[BMP] bad arguments\n");
+        status = -1;
+        goto Exit;
     }
 
-    fread(&image->header.type, 2, 1, input);
+    memset(image, 0, sizeof(BitMap));
+
+    input = fopen(fileName, "rb");
+    if (input == NULL) {
+        printf("[BMP] file %s is not opened\n", fileName);
+        status = 1;
+        goto Exit;
+    }
+
+    cnt = fread(&image->header.type, sizeof(image->header.type), sizeof(unsigned char), input);
+    if (cnt != sizeof(image->header.type)) {
+        printf("[BMP] file read error\n");
+        status = 2;
+        goto Exit;
+    }
 
     if (image->header.type != 'M' * 256 + 'B') {
         printf("[BMP] file is not bmp type\n");
-        return 2;
+        status = 2;
+        goto Exit;
     }
 
-    fread(&image->header.size, 4, 1, input);
-    fread(&image->header.reserved, 4, 1, input);
-    fread(&image->header.offset, 4, 1, input);
+    cnt = fread(&image->header.size, sizeof(image->header.size), sizeof(unsigned char), input);
+    if (cnt != sizeof(image->header.size)) {
+        printf("[BMP] file read error\n");
+        status = 2;
+        goto Exit;
+    }
 
-    fread(&image->infoHeader, sizeof(BmpInfoHeader), 1, input);
+    cnt = fread(&image->header.reserved, sizeof(image->header.reserved), sizeof(unsigned char), input);
+    if (cnt != sizeof(image->header.reserved)) {
+        printf("[BMP] file read error\n");
+        status = 2;
+        goto Exit;
+    }
+
+    cnt = fread(&image->header.offset, sizeof(image->header.offset), sizeof(unsigned char), input);
+    if (cnt != sizeof(image->header.offset)) {
+        printf("[BMP] file read error\n");
+        status = 2;
+        goto Exit;
+    }
+
+    cnt = fread(&image->infoHeader, sizeof(BmpInfoHeader), sizeof(unsigned char), input);
+    if (cnt != sizeof(image->header.offset)) {
+        printf("[BMP] file read error\n");
+        status = 2;
+        goto Exit;
+    }
 
     image->width = image->infoHeader.width;
-    image->height = image->infoHeader.height;
+    image->height = abs(image->infoHeader.height);
 
     if (image->infoHeader.bits != 24) {
         printf("[BMP] 24bpp only supported. But input has: %d\n", image->infoHeader.bits);
@@ -38,21 +79,49 @@ int readBmpImage(const char* fileName, BitMap* image) {
     }
 
     int padSize = image->width & 3;
+    size_t row_size = (size_t)image->width * 3;
     char pad[3];
-    size_t size = image->width * image->height * 3;
+    size_t size = row_size * image->height;
 
     image->data = malloc(sizeof(char) * size);
+    if (NULL == image->data) {
+        printf("[BMP] memory allocation failed\n");
+        return 5;
+    }
 
-    fseek(input, image->header.offset, 0);
+    if (0 != fseek(input, image->header.offset, SEEK_SET)) {
+        printf("[BMP] file seek error\n");
+        status = 2;
+        goto Exit;
+    }
 
     // reading by rows in invert vertically
     int i;
     for (i = 0; i < image->height; i++) {
         unsigned int storeAt = image->infoHeader.height < 0 ? i : (unsigned int)image->height - 1 - i;
-        fread(image->data + image->width * 3 * storeAt, image->width * 3, 1, input);
-        fread(pad, padSize, 1, input);
+        cnt = fread(image->data + row_size * storeAt, row_size, sizeof(unsigned char), input);
+        if (cnt != row_size) {
+            printf("[BMP] file read error\n");
+            status = 2;
+            goto Exit;
+        }
+
+        cnt = fread(pad, padSize, sizeof(unsigned char), input);
+        if (cnt != padSize) {
+            printf("[BMP] file read error\n");
+            status = 2;
+            goto Exit;
+        }
     }
 
-    fclose(input);
-    return 0;
+Exit:
+    if (0 != status && NULL != image && NULL != image->data) {
+        free(image->data);
+    }
+
+    if (NULL != input) {
+        fclose(input);
+    }
+
+    return status;
 }

From e6d08aef5b871d4370b5a294e792a1ee43e4695e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 15 Dec 2021 21:21:46 +0300
Subject: [PATCH 06/27] Don't use EXCLUDE_FROM_ALL with samples targets (#9237)

---
 docs/template_plugin/backend/CMakeLists.txt      | 15 ++++-----------
 docs/template_plugin/src/CMakeLists.txt          |  1 -
 samples/c/common/opencv_c_wrapper/CMakeLists.txt |  2 +-
 samples/cpp/common/format_reader/CMakeLists.txt  |  2 +-
 src/core/tests/frontend/shared/CMakeLists.txt    |  2 +-
 src/tests/unit/vpu/CMakeLists.txt                |  1 -
 6 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/docs/template_plugin/backend/CMakeLists.txt b/docs/template_plugin/backend/CMakeLists.txt
index e075a555099..df959ed86e5 100644
--- a/docs/template_plugin/backend/CMakeLists.txt
+++ b/docs/template_plugin/backend/CMakeLists.txt
@@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_
 file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
 add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src})
 
-
 # developer package
+
 openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend)
 
-install(TARGETS interpreter_backend
-        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-        ARCHIVE DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
-if(NOT BUILD_SHARED_LIBS)
-    install(TARGETS interpreter_backend
-            RUNTIME DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-            ARCHIVE DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-            LIBRARY DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
-endif()
+# install
+
+ov_install_static_lib(interpreter_backend template)
diff --git a/docs/template_plugin/src/CMakeLists.txt b/docs/template_plugin/src/CMakeLists.txt
index 125ee872d02..18b20731e18 100644
--- a/docs/template_plugin/src/CMakeLists.txt
+++ b/docs/template_plugin/src/CMakeLists.txt
@@ -37,4 +37,3 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL
 # ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
 #                     POSSIBLE_PLUGINS ${TARGET_NAME})
 # [cmake:plugin]
-ov_install_static_lib(interpreter_backend tests)
diff --git a/samples/c/common/opencv_c_wrapper/CMakeLists.txt b/samples/c/common/opencv_c_wrapper/CMakeLists.txt
index bfb4cdf88db..f7cc3f8ae9c 100644
--- a/samples/c/common/opencv_c_wrapper/CMakeLists.txt
+++ b/samples/c/common/opencv_c_wrapper/CMakeLists.txt
@@ -9,7 +9,7 @@ file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/
 file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
 
 # create library
-add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${HEADERS} ${SOURCES})
+add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})
 
 # Find OpenCV components if exist
 find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
diff --git a/samples/cpp/common/format_reader/CMakeLists.txt b/samples/cpp/common/format_reader/CMakeLists.txt
index d97d55d327a..3ce377270a9 100644
--- a/samples/cpp/common/format_reader/CMakeLists.txt
+++ b/samples/cpp/common/format_reader/CMakeLists.txt
@@ -13,7 +13,7 @@ source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})
 
 # Create library file from sources.
-add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${MAIN_SRC} ${LIBRARY_HEADERS})
+add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
 
 # Find OpenCV components if exist
 find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
diff --git a/src/core/tests/frontend/shared/CMakeLists.txt b/src/core/tests/frontend/shared/CMakeLists.txt
index 2c1cec6c213..ae4bcb5ac04 100644
--- a/src/core/tests/frontend/shared/CMakeLists.txt
+++ b/src/core/tests/frontend/shared/CMakeLists.txt
@@ -11,7 +11,7 @@ add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${LIBRARY_SRC} ${LIBRARY_HEAD
 
 target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../..)
-target_link_libraries(${TARGET_NAME} PUBLIC frontend_common interpreter_backend engines_test_util
+target_link_libraries(${TARGET_NAME} PUBLIC frontend_common engines_test_util
         ngraph cnpy commonTestUtils ngraph_test_util openvino::util)
 
 target_compile_definitions(${TARGET_NAME}
diff --git a/src/tests/unit/vpu/CMakeLists.txt b/src/tests/unit/vpu/CMakeLists.txt
index f43866fac26..cd505a6b7e0 100644
--- a/src/tests/unit/vpu/CMakeLists.txt
+++ b/src/tests/unit/vpu/CMakeLists.txt
@@ -21,7 +21,6 @@ addIeTargetTest(
             vpu_graph_transformer_test_static
             mvnc
             ngraph
-            interpreter_backend
             inference_engine_lp_transformations # for ngraphFunctions
         ADD_CPPLINT
         LABELS

From 2619269496dfc182b96e0a5bb289b9d3f7e9272e Mon Sep 17 00:00:00 2001
From: Steve Yoo <steve.yoo@intel.com>
Date: Thu, 16 Dec 2021 03:42:24 +0900
Subject: [PATCH 07/27] Create TopK-3 (#9106)

---
 .../tests/functional/op_reference/topk.cpp    | 481 ++++++++++++------
 1 file changed, 321 insertions(+), 160 deletions(-)

diff --git a/docs/template_plugin/tests/functional/op_reference/topk.cpp b/docs/template_plugin/tests/functional/op_reference/topk.cpp
index 40907722f27..f571096dabb 100644
--- a/docs/template_plugin/tests/functional/op_reference/topk.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/topk.cpp
@@ -4,8 +4,8 @@
 
 #include <gtest/gtest.h>
 
-#include "openvino/op/topk.hpp"
-#include "openvino/op/constant.hpp"
+#include "openvino/opsets/opset3.hpp"
+#include "openvino/opsets/opset1.hpp"
 #include "base_reference_test.hpp"
 
 using namespace reference_tests;
@@ -15,7 +15,7 @@ namespace {
 struct TopKParams {
     TopKParams(
         const Tensor& A, const Tensor& k, const int64_t axis,
-        const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort,
+        const opset1::TopK::Mode mode, const opset1::TopK::SortType sort,
         const Tensor& result0, const Tensor& result1, const size_t outIdx,
         const std::string& testcaseName = "") :
         A(A), k(k), axis(axis), mode(mode), sort(sort),
@@ -25,8 +25,8 @@ struct TopKParams {
     Tensor A;
     Tensor k;
     int64_t axis;
-    op::v1::TopK::Mode mode;
-    op::v1::TopK::SortType sort;
+    opset1::TopK::Mode mode;
+    opset1::TopK::SortType sort;
     Tensor result0;
     Tensor result1;
     size_t outIdx;
@@ -71,7 +71,6 @@ struct TopKParamsResnet50 {
     std::string testcaseName;
 };
 
-
 class ReferenceTopKTestResnet50 : public testing::TestWithParam<TopKParamsResnet50>, public CommonReferenceTest {
 public:
     void SetUp() override {
@@ -101,18 +100,18 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto B = std::make_shared<op::v1::TopK>(A,
-                                                      op::v0::Constant::create(element::i64, {}, {5}),
+        const auto B = std::make_shared<opset1::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {5}),
                                                       1,
-                                                      op::v1::TopK::Mode::MAX,
-                                                      op::v1::TopK::SortType::SORT_VALUES);
-        const auto C = std::make_shared<op::v1::TopK>(A,
-                                                      op::v0::Constant::create(element::i64, {}, {1}),
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+        const auto C = std::make_shared<opset1::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {1}),
                                                       1,
-                                                      op::v1::TopK::Mode::MAX,
-                                                      op::v1::TopK::SortType::SORT_VALUES);
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
 
         const auto out5_value = B->output(0);
         const auto out5_index = B->output(1);
@@ -220,12 +219,12 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                 params.k.shape,
                                                 params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
         const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
         return f;
     }
@@ -253,8 +252,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::NONE,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -292,8 +291,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -331,8 +330,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -366,8 +365,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -401,8 +400,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -440,8 +439,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             }({128, 1000})),
             Tensor(ET2, {}, std::vector<T2>{5}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                 std::vector<T> expected_value;
                 for (size_t i = 0; i < rshape[0]; i++) {
@@ -467,8 +466,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
             0,
@@ -478,8 +477,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
             0,
@@ -489,8 +488,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
             0,
@@ -500,8 +499,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
             0,
@@ -536,7 +535,7 @@ std::vector<TopKParams> generateCombinedParamsMaxMinSort() {
 INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort,
     testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName);
 
-class ReferenceTopKTestV3 : public ReferenceTopKTest {
+class ReferenceTopKTestBackend : public ReferenceTopKTest {
 public:
     void SetUp() override {
         auto params = GetParam();
@@ -547,18 +546,18 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                 params.k.shape,
                                                 params.k.data.data());
-        const auto B = std::make_shared<op::v3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
         const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
         return f;
     }
 };
 
-TEST_P(ReferenceTopKTestV3, CompareWithRefs) {
+TEST_P(ReferenceTopKTestBackend, CompareWithRefs) {
     Exec();
 }
 
@@ -572,8 +571,8 @@ std::vector<TopKParams> generateParamsV3() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
             0,
@@ -583,8 +582,8 @@ std::vector<TopKParams> generateParamsV3() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
             0,
@@ -594,8 +593,8 @@ std::vector<TopKParams> generateParamsV3() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
             0,
@@ -605,8 +604,8 @@ std::vector<TopKParams> generateParamsV3() {
             Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
             Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
             0,
@@ -615,7 +614,7 @@ std::vector<TopKParams> generateParamsV3() {
     return params;
 }
 
-std::vector<TopKParams> generateCombinedParamsV3() {
+std::vector<TopKParams> generateCombinedParamsBackend() {
     const std::vector<std::vector<TopKParams>> generatedParams {
         generateParamsMaxMinSort<element::Type_t::i8, element::Type_t::i64, element::Type_t::i32>(),
         generateParamsMaxMinSort<element::Type_t::i16, element::Type_t::i64, element::Type_t::i32>(),
@@ -638,8 +637,8 @@ std::vector<TopKParams> generateCombinedParamsV3() {
     return combinedParams;
 }
 
-INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3,
-    testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend,
+    testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName);
 
 class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest {
 public:
@@ -673,12 +672,12 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                 params.k.shape,
                                                 params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
         const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
         return f;
     }
@@ -698,8 +697,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{6}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
             0,
@@ -709,8 +708,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{6}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
             1,
@@ -720,8 +719,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
             0,
@@ -731,8 +730,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
             1,
@@ -742,8 +741,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1}, std::vector<T>{6}),
             Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
             0,
@@ -753,8 +752,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1}, std::vector<T>{6}),
             Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
             1,
@@ -764,8 +763,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{6}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
             0,
@@ -775,8 +774,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{6}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
             Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
             1,
@@ -786,8 +785,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
             0,
@@ -797,8 +796,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{3}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
             Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
             1,
@@ -808,8 +807,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1}, std::vector<T>{1}),
             Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
             0,
@@ -819,8 +818,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1}, std::vector<T>{1}),
             Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
             1,
@@ -832,8 +831,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
             }),
@@ -849,8 +848,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
             }),
@@ -882,8 +881,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
                 169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
                 187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@@ -923,8 +922,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
                 169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
                 187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@@ -948,8 +947,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 2}, std::vector<T>{
                 10, 12, 9, 4, 11, 7, 6, 3
             }),
@@ -965,8 +964,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 2}, std::vector<T>{
                 10, 12, 9, 4, 11, 7, 6, 3
             }),
@@ -982,8 +981,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1, 2}, std::vector<T>{
                 10, 12, 11, 7
             }),
@@ -999,8 +998,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1, 2}, std::vector<T>{
                 10, 12, 11, 7
             }),
@@ -1016,8 +1015,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
             }),
@@ -1033,8 +1032,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
             }),
@@ -1050,8 +1049,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 2}, std::vector<T>{
                 8, 2, 10, 4, 5, 1, 6, 3
             }),
@@ -1067,8 +1066,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 2}, std::vector<T>{
                 8, 2, 10, 4, 5, 1, 6, 3
             }),
@@ -1084,8 +1083,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1, 2}, std::vector<T>{
                 8, 2, 5, 1
             }),
@@ -1101,8 +1100,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1, 2}, std::vector<T>{
                 8, 2, 5, 1
             }),
@@ -1118,8 +1117,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{4}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {4, 3}, std::vector<T>{
                 12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
             }),
@@ -1135,8 +1134,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{4}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {4, 3}, std::vector<T>{
                 12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
             }),
@@ -1152,8 +1151,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3}, std::vector<T>{
                 12, 11, 10, 9, 8, 7
             }),
@@ -1169,8 +1168,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3}, std::vector<T>{
                 12, 11, 10, 9, 8, 7
             }),
@@ -1186,8 +1185,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1, 3}, std::vector<T>{
                 12, 11, 10
             }),
@@ -1203,8 +1202,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {1, 3}, std::vector<T>{
                 12, 11, 10
             }),
@@ -1220,8 +1219,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1}, std::vector<T>{
                 4, 3
             }),
@@ -1237,8 +1236,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 1}, std::vector<T>{
                 4, 3
             }),
@@ -1254,8 +1253,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{4}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {4, 3}, std::vector<T>{
                 3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
             }),
@@ -1271,8 +1270,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{4}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {4, 3}, std::vector<T>{
                 3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
             }),
@@ -1288,8 +1287,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3}, std::vector<T>{
                 3, 1, 4, 6, 2, 5
             }),
@@ -1305,8 +1304,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{2}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3}, std::vector<T>{
                 3, 1, 4, 6, 2, 5
             }),
@@ -1322,8 +1321,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
             Tensor(ET, {1, 3}, std::vector<T>{
                 3, 1, 4
             }),
@@ -1339,8 +1338,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
             }),
             Tensor(ET2, {}, std::vector<T2>{1}),
             0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
             Tensor(ET, {1, 3}, std::vector<T>{
                 3, 1, 4
             }),
@@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM
 class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin {
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                 params.k.shape,
                                                 params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A,
+        const auto B = std::make_shared<opset1::TopK>(A,
                                                       k,
                                                       params.axis,
                                                       params.mode,
@@ -1412,8 +1411,8 @@ std::vector<TopKParams> generateParamsInt64() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
             }),
@@ -1428,8 +1427,8 @@ std::vector<TopKParams> generateParamsInt64() {
             }),
             Tensor(ET2, {}, std::vector<T2>{3}),
             1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 3, 2}, std::vector<T>{
                 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
             }),
@@ -1468,12 +1467,12 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                            params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                 params.k.shape,
                                                 params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
         const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
         return f;
     }
@@ -1493,8 +1492,8 @@ std::vector<TopKParams> generateParamsSingleOutput() {
             Tensor(ET, {2, 3, 2}, std::vector<T>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}),
             Tensor(ET2, {}, std::vector<T2>{2}),
             1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
             Tensor(ET, {2, 2, 2}, std::vector<T>{}),
             Tensor(ET_OUT, {2, 2, 2}, std::vector<T_OUT>{2, 0, 1, 2, 1, 0, 0, 1}),
             0,
@@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle
     testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName);
 
 TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) {
-    const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto k = op::v0::Constant::create(element::i64, Shape{}, {1});
-    EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
-    EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
+    EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
+    EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
 }
 
 TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) {
-    const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2});
-    EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
-    const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f});
-    EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
-    const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1});
-    EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
+    EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
+    EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
+    EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
+}
+
+
+
+
+class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto B = std::make_shared<opset3::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {5}),
+                                                      1,
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+        const auto C = std::make_shared<opset3::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {1}),
+                                                      1,
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+
+        const auto out5_value = B->output(0);
+        const auto out5_index = B->output(1);
+        const auto out1_value = C->output(0);
+        const auto out1_index = C->output(1);
+        const auto f = std::make_shared<Model>(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3,
+    testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName);
+
+class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3,
+    testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName);
+
+class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3,
+    testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName);
+
+class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3,
+    testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName);
+
+class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A,
+                                                      k,
+                                                      params.axis,
+                                                      params.mode,
+                                                      params.sort,
+                                                      element::i64);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3,
+    testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName);
+
+class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3,
+    testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName);
+
+TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) {
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
+    EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
+    EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
+}
+
+TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) {
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
+    EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
+    EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
+    EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
 }
 } // namespace

From 93698483b5b023fd2d54ac16f0eab5dd6a94b637 Mon Sep 17 00:00:00 2001
From: Steve Yoo <steve.yoo@intel.com>
Date: Thu, 16 Dec 2021 03:45:18 +0900
Subject: [PATCH 08/27] Migrate ExtractImagePatches-3 (#8981)

---
 .../op_reference/extract_image_patches.cpp    | 246 ++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp

diff --git a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
new file mode 100644
index 00000000000..95f5571cc41
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
@@ -0,0 +1,246 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/opsets/opset3.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct ExtractImagePatchesParams {
+    Tensor data;
+    Shape sizes;
+    Strides strides;
+    Shape rates;
+    op::PadType autoPad;
+    Tensor expectedResult;
+    std::string testcaseName;
+};
+
+struct Builder : ParamsBuilder<ExtractImagePatchesParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, data);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
+class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractImagePatchesParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.data.data};
+        refOutData = {params.expectedResult.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<ExtractImagePatchesParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "dType=" << param.data.type;
+        result << "_dShape=" << param.data.shape;
+        result << "_sizes=" << param.sizes;
+        result << "_strides=" << param.strides;
+        result << "_rates=" << param.rates;
+        result << "_autoPad=" << param.autoPad;
+        result << "_eType=" << param.expectedResult.type;
+        result << "_eShape=" << param.expectedResult.shape;
+        if (param.testcaseName != "") {
+            result << "_=" << param.testcaseName;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const ExtractImagePatchesParams& params) {
+        const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
+        const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
+                                                                                         params.sizes,
+                                                                                         params.strides,
+                                                                                         params.rates,
+                                                                                         params.autoPad);
+        const auto f = std::make_shared<Function>(extrace_image_patches, ParameterVector{data});
+        return f;
+    }
+};
+
+TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET>
+std::vector<ExtractImagePatchesParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<ExtractImagePatchesParams> params {
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({3, 3})
+            .strides({5, 5})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
+                1,  6,  51, 56,
+                2,  7,  52, 57,
+                3,  8,  53, 58,
+                11, 16, 61, 66,
+                12, 17, 62, 67,
+                13, 18, 63, 68,
+                21, 26, 71, 76,
+                22, 27, 72, 77,
+                23, 28, 73, 78}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({4, 4})
+            .strides({8, 8})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 16, 1, 1}, std::vector<T>{
+                1,  2,  3,  4,
+                11, 12, 13, 14,
+                21, 22, 23, 24,
+                31, 32, 33, 34}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({4, 4})
+            .strides({9, 9})
+            .rates({1, 1})
+            .autoPad(op::PadType::SAME_UPPER)
+            .expectedResult({ET, {1, 16, 2, 2}, std::vector<T>{
+                0,  0,  0,  89,
+                0,  0,  81, 90,
+                0,  0,  82, 0,
+                0,  0,  83, 0,
+                0,  9,  0,  99,
+                1,  10, 91, 100,
+                2,  0,  92, 0,
+                3,  0,  93, 0,
+                0,  19, 0,  0,
+                11, 20, 0,  0,
+                12, 0,  0,  0,
+                13, 0,  0,  0,
+                0,  29, 0,  0,
+                21, 30, 0,  0,
+                22, 0,  0,  0,
+                23, 0,  0,  0}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({3, 3})
+            .strides({5, 5})
+            .rates({2, 2})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
+                1,  6,  51, 56,
+                3,  8,  53, 58,
+                5,  10, 55, 60,
+                21, 26, 71, 76,
+                23, 28, 73, 78,
+                25, 30, 75, 80,
+                41, 46, 91, 96,
+                43, 48, 93, 98,
+                45, 50, 95, 100}}),
+
+        Builder {}
+            .data({ET, {1, 2, 5, 5}, std::vector<T>{
+                1,  2,  3,  4,  5,
+                6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15,
+                16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25,
+                26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35,
+                36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45,
+                46, 47, 48, 49, 50}})
+            .sizes({2, 2})
+            .strides({3, 3})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 8, 2, 2}, std::vector<T>{
+                1,  4,  16, 19,
+                26, 29, 41, 44,
+                2,  5,  17, 20,
+                27, 30, 42, 45,
+                6,  9,  21, 24,
+                31, 34, 46, 49,
+                7,  10, 22, 25,
+                32, 35, 47, 50}}),
+    };
+    return params;
+}
+
+std::vector<ExtractImagePatchesParams> generateCombinedParams() {
+    const std::vector<std::vector<ExtractImagePatchesParams>> generatedParams {
+        generateParams<element::Type_t::i8>(),
+        generateParams<element::Type_t::i16>(),
+        generateParams<element::Type_t::i32>(),
+        generateParams<element::Type_t::i64>(),
+        generateParams<element::Type_t::u8>(),
+        generateParams<element::Type_t::u16>(),
+        generateParams<element::Type_t::u32>(),
+        generateParams<element::Type_t::u64>(),
+        generateParams<element::Type_t::bf16>(),
+        generateParams<element::Type_t::f16>(),
+        generateParams<element::Type_t::f32>(),
+        generateParams<element::Type_t::f64>(),
+    };
+    std::vector<ExtractImagePatchesParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
+} // namespace
\ No newline at end of file

From d9ecb108f115c5f84556768be42e6dd9db2b831b Mon Sep 17 00:00:00 2001
From: Steve Yoo <steve.yoo@intel.com>
Date: Thu, 16 Dec 2021 03:46:10 +0900
Subject: [PATCH 09/27] Create Einsum-7 (#8982)

---
 .../tests/functional/op_reference/einsum.cpp  | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 docs/template_plugin/tests/functional/op_reference/einsum.cpp

diff --git a/docs/template_plugin/tests/functional/op_reference/einsum.cpp b/docs/template_plugin/tests/functional/op_reference/einsum.cpp
new file mode 100644
index 00000000000..31460fd2ccf
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/einsum.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/opsets/opset7.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct EinsumParams {
+    std::vector<Tensor> inputs;
+    std::string equation;
+    Tensor expectedResult;
+    std::string testcaseName;
+};
+
+struct Builder : ParamsBuilder<EinsumParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
+class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        for (const auto& input_tensor : params.inputs) {
+            inputData.push_back(input_tensor.data);
+        }
+        refOutData = {params.expectedResult.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<EinsumParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "iType=" << param.inputs[0].type;
+        result << "_iShape=" << param.inputs[0].shape;
+        result << "_equation=" << param.equation;
+        result << "_eType=" << param.expectedResult.type;
+        result << "_eShape=" << param.expectedResult.shape;
+        if (param.testcaseName != "") {
+            result << "_=" << param.testcaseName;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const EinsumParams& params) {
+        OutputVector output_vector;
+        ParameterVector param_vector;
+        for (const auto& input_tensor : params.inputs) {
+            auto param = std::make_shared<opset1::Parameter>(input_tensor.type, input_tensor.shape);
+            output_vector.push_back(param);
+            param_vector.push_back(param);
+        }
+        const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
+        const auto f = std::make_shared<Function>(OutputVector{einsum}, param_vector);
+        return f;
+    }
+};
+
+TEST_P(ReferenceEinsumTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET>
+std::vector<EinsumParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<EinsumParams> params {
+        Builder {}
+            .inputs({{ET, {1, 2}, std::vector<T>{1, 2}},
+                     {ET, {3, 4}, std::vector<T>{3, 4, 5, 6,
+                                                 7, 8, 9, 10,
+                                                 11, 12, 13, 14}}})
+            .equation("ab,cd->abcd")
+            .expectedResult({ET, {1, 2, 3, 4}, std::vector<T>{3,  4,  5,  6,  7,  8,  9,  10,
+                                                              11, 12, 13, 14, 6,  8,  10, 12,
+                                                              14, 16, 18, 20, 22, 24, 26, 28}})
+            .testcaseName("einsum_no_reduction"),
+        Builder {}
+            .inputs({{ET, {1, 2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ijk->kij")
+            .expectedResult({ET, {3, 1, 2}, std::vector<T>{1, 4, 2, 5, 3, 6}})
+            .testcaseName("einsum_transpose"),
+
+        Builder {}
+            .inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ab->a")
+            .expectedResult({ET, {2}, std::vector<T>{6, 15}})
+            .testcaseName("einsum_reduce"),
+
+        Builder {}
+            .inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}},
+                     {ET, {3, 2}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ab,bc->ac")
+            .expectedResult({ET, {2, 2}, std::vector<T>{22, 28, 49, 64}})
+            .testcaseName("einsum_matrix_multiplication"),
+
+        Builder {}
+            .inputs({{ET, {2, 4}, std::vector<T>{1, 3, 2, 7, 5, 6, 0, 1}},
+                     {ET, {4, 3, 1}, std::vector<T>{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}},
+                     {ET, {4, 3}, std::vector<T>{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}})
+            .equation("ab,bcd,bc->ca")
+            .expectedResult({ET, {3, 2}, std::vector<T>{145, 171, 703, 231, 85, 91}})
+            .testcaseName("einsum_multiple_multiplication"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
+            .equation("a...->...")
+            .expectedResult({ET, {2, 3}, std::vector<T>{4, 8, 4, 8, 5, 13}})
+            .testcaseName("einsum_ellipsis_one_input_reduction"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
+            .equation("a...->...a")
+            .expectedResult({ET, {2, 3, 2}, std::vector<T>{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}})
+            .testcaseName("einsum_ellipsis_one_input_transpose"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
+                     {ET, {1}, std::vector<T>{2}}})
+            .equation("ab...,...->ab...")
+            .expectedResult({ET, {2, 2, 3}, std::vector<T>{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}})
+            .testcaseName("einsum_ellipsis_mul_by_1dscalar"),
+
+        Builder {}
+            .inputs({{ET, {1, 1, 4, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
+                     {ET, {3, 4, 2, 1}, std::vector<T>{3, 1, 6, 2, 3, 10, 9,  8, 2, 9, 3, 2,
+                                                       4, 2, 3, 1, 9, 1,  11, 4, 7, 2, 3, 1}}})
+            .equation("a...j,j...->a...")
+            .expectedResult({ET, {1, 4, 2, 4}, std::vector<T>{27, 85,  37, 66, 30, 58, 50, 8,
+                                                              37, 123, 55, 83, 16, 48, 24, 30,
+                                                              29, 83,  43, 52, 20, 92, 44, 24,
+                                                              24, 96,  48, 30, 13, 67, 31, 15}})
+            .testcaseName("einsum_ellipsis_complex_mul"),
+
+        Builder {}
+            .inputs({{ET, {1, 3, 3}, std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8, 9}}})
+            .equation("kii->ki")
+            .expectedResult({ET, {1, 3}, std::vector<T>{1, 5, 9}})
+            .testcaseName("einsum_diagonal"),
+
+        Builder {}
+            .inputs({{ET, {2, 3, 3, 2, 4}, std::vector<T>{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4,
+                                                          4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2,
+                                                          2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4,
+                                                          2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3,
+                                                          4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4,
+                                                          4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3,
+                                                          5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3,
+                                                          3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}},
+                     {ET, {3, 2, 1}, std::vector<T>{1, 4, 4, 5, 3, 3}}})
+            .equation("abbac,bad->ad")
+            .expectedResult({ET, {2, 1}, std::vector<T>{123, 129}})
+            .testcaseName("einsum_diagonal_with_matmul"),
+    };
+    return params;
+}
+
+std::vector<EinsumParams> generateCombinedParams() {
+    const std::vector<std::vector<EinsumParams>> generatedParams {
+        generateParams<element::Type_t::i32>(),
+        generateParams<element::Type_t::f32>(),
+    };
+    std::vector<EinsumParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
+} // namespace
\ No newline at end of file

From 38bbc30a29680f7690fc086c4b08d642927aa4ce Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Wed, 15 Dec 2021 21:24:35 +0100
Subject: [PATCH 10/27] [ONNX] Fix memleak caused by shared_ptr cyclic
 dependency (#9236)

ONNXFrameworkNode had it own copy of shared_ptr<Graph> so in convert phase,
it can be used to produce real ngraph nodes (by graph->make_ng_nodes(..)).
But Graph also keeps ONNXFrameworkNodes in its cache and in consequence
its own shared_ptr, which is causing a dependency cycle.

This change removes shared_ptr<Graph> from ONNXFrameworkNode class
and moves it to decoded function runtime info, so Graph is in a single
place now and its lifetime ends when decoded function is destroyed.
---
 .../tests/test_frontend/test_frontend_onnx.py | 50 ++++++++++++++++---
 src/core/tests/onnx/onnx_import.in.cpp        |  6 +--
 .../onnx/frontend/src/core/graph.cpp          | 16 +++---
 .../onnx/frontend/src/core/graph.hpp          |  2 +
 .../onnx/frontend/src/onnx_framework_node.cpp |  6 ++-
 .../onnx/frontend/src/onnx_framework_node.hpp | 42 +++++++---------
 .../onnx/frontend/src/utils/onnx_internal.cpp |  9 +++-
 7 files changed, 89 insertions(+), 42 deletions(-)

diff --git a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py
index 7c36999c9cd..f691e55bdb6 100644
--- a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py
+++ b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py
@@ -26,6 +26,32 @@ def create_onnx_model():
     return make_model(graph, producer_name="ngraph ONNX Importer")
 
 
+def create_onnx_model_with_subgraphs():
+    A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [3])
+    B = onnx.helper.make_tensor_value_info("B", onnx.TensorProto.FLOAT, [3])
+    add_out = onnx.helper.make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, [3])
+    sub_out = onnx.helper.make_tensor_value_info("sub_out", onnx.TensorProto.FLOAT, [3])
+
+    add = onnx.helper.make_node("Add", inputs=["A", "B"], outputs=["add_out"])
+    sub = onnx.helper.make_node("Sub", inputs=["A", "B"], outputs=["sub_out"])
+
+    then_body = make_graph([add], "then_body", [], [add_out])
+    else_body = make_graph([sub], "else_body", [], [sub_out])
+
+    if_node = onnx.helper.make_node(
+        "If",
+        inputs=["cond"],
+        outputs=["res"],
+        then_branch=then_body,
+        else_branch=else_body
+    )
+    cond = onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, [])
+    res = onnx.helper.make_tensor_value_info("res", onnx.TensorProto.FLOAT, [3])
+
+    graph = make_graph([if_node], "graph", [cond, A, B], [res])
+    return make_model(graph, producer_name="ngraph ONNX Importer")
+
+
 def run_function(function, *inputs, expected):
     runtime = get_runtime()
     computation = runtime.computation(function)
@@ -37,15 +63,18 @@ def run_function(function, *inputs, expected):
 
 fem = FrontEndManager()
 onnx_model_filename = "model.onnx"
+onnx_model_with_subgraphs_filename = "model_subgraphs.onnx"
 ONNX_FRONTEND_NAME = "onnx"
 
 
 def setup_module():
     onnx.save_model(create_onnx_model(), onnx_model_filename)
+    onnx.save_model(create_onnx_model_with_subgraphs(), onnx_model_with_subgraphs_filename)
 
 
 def teardown_module():
     os.remove(onnx_model_filename)
+    os.remove(onnx_model_with_subgraphs_filename)
 
 
 def skip_if_onnx_frontend_is_disabled():
@@ -72,17 +101,29 @@ def test_convert():
     run_function(function, a, b, expected=[expected])
 
 
-def test_decode_and_convert():
+@pytest.mark.parametrize("model_filename, inputs, expected", [
+    [onnx_model_filename,
+     [np.array([[1, 2], [3, 4]], dtype=np.float32),
+      np.array([[2, 3], [4, 5]], dtype=np.float32)],
+     np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)],
+    [onnx_model_with_subgraphs_filename,
+     [np.array(False, dtype=bool),
+      np.array([1, 2, 3], dtype=np.float32),
+      np.array([2, 3, 5], dtype=np.float32)],
+     np.array([-1, -1, -2], dtype=np.float32)],
+])
+def test_decode_and_convert(model_filename, inputs, expected):
     skip_if_onnx_frontend_is_disabled()
 
     fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
     assert fe
 
-    model = fe.load(onnx_model_filename)
+    model = fe.load(model_filename)
     assert model
 
     decoded_function = fe.decode(model)
     assert decoded_function
+
     for op in decoded_function.get_ordered_ops():
         assert op.get_type_name() in ["Parameter", "Constant", "ONNXFrameworkNode",
                                       "ONNXSubgraphFrameworkNode", "Result"]
@@ -92,10 +133,7 @@ def test_decode_and_convert():
     for op in decoded_function.get_ordered_ops():
         assert op.get_type_name() not in ["ONNXFrameworkNode", "ONNXSubgraphFrameworkNode"]
 
-    a = np.array([[1, 2], [3, 4]], dtype=np.float32)
-    b = np.array([[2, 3], [4, 5]], dtype=np.float32)
-    expected = np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)
-    run_function(decoded_function, a, b, expected=[expected])
+    run_function(decoded_function, *inputs, expected=[expected])
 
 
 def test_load_by_model():
diff --git a/src/core/tests/onnx/onnx_import.in.cpp b/src/core/tests/onnx/onnx_import.in.cpp
index efac638d743..100c8a2d720 100644
--- a/src/core/tests/onnx/onnx_import.in.cpp
+++ b/src/core/tests/onnx/onnx_import.in.cpp
@@ -380,7 +380,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input) {
     test_case.run();
 }
 
-NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function) {
     const auto function = onnx_import::import_onnx_model(
         file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx"));
 
@@ -392,7 +392,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) {
     test_case.run();
 }
 
-NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
     const auto function = onnx_import::import_onnx_model(
         file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/greater_or_equal.onnx"));
 
@@ -403,7 +403,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_sub
     test_case.run();
 }
 
-NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_context_dependent_function) {
+NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) {
     auto function = onnx_import::import_onnx_model(
         file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/softmax_crossentropy_consumed.onnx"));
 
diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp
index 2c76c62d377..212766fc550 100644
--- a/src/frontends/onnx/frontend/src/core/graph.cpp
+++ b/src/frontends/onnx/frontend/src/core/graph.cpp
@@ -199,9 +199,10 @@ void Graph::decode_to_framework_nodes() {
         if (node.has_subgraphs()) {
             const auto& subgraphs = node.get_subgraphs();
             auto inputs = node.get_ng_inputs();
+            std::vector<std::shared_ptr<Function>> functions;
             for (const auto& kv : subgraphs) {
                 auto& subgraph = kv.second;
-                subgraph->decode();
+                functions.push_back(subgraph->decode());
                 for (const auto& input : subgraph->get_inputs_from_parent()) {
                     const auto& name = input.get_node()->get_friendly_name();
                     if (std::find_if(inputs.begin(), inputs.end(), [&name](const Output<ngraph::Node>& n) -> bool {
@@ -211,10 +212,9 @@ void Graph::decode_to_framework_nodes() {
                     }
                 }
             }
-            framework_node =
-                std::make_shared<ngraph::frontend::ONNXSubgraphFrameworkNode>(shared_from_this(), node, inputs);
+            framework_node = std::make_shared<frontend::ONNXSubgraphFrameworkNode>(node, functions, inputs);
         } else {
-            framework_node = std::make_shared<ngraph::frontend::ONNXFrameworkNode>(shared_from_this(), node);
+            framework_node = std::make_shared<frontend::ONNXFrameworkNode>(node);
         }
         OutputVector ng_nodes{framework_node->outputs()};
         set_friendly_names(node, ng_nodes);
@@ -240,7 +240,10 @@ std::shared_ptr<Function> Graph::create_function() {
 
 std::shared_ptr<Function> Graph::decode() {
     decode_to_framework_nodes();
-    return create_function();
+    auto function = create_function();
+    auto& rt_info = function->get_rt_info();
+    rt_info[ONNX_GRAPH_RT_ATTRIBUTE] = shared_from_this();
+    return function;
 }
 
 bool Graph::is_ng_node_in_cache(const std::string& name) const {
@@ -399,7 +402,8 @@ void Subgraph::find_inputs_from_parent() {
         for (const auto& out_name : node_proto.output()) {
             if (m_cache->contains(out_name)) {
                 auto node_to_replace_input = m_cache->get_node(out_name).get_node();
-                if (!dynamic_cast<op::util::MultiSubGraphOp*>(node_to_replace_input))
+                if (!ov::is_type<op::util::MultiSubGraphOp>(node_to_replace_input) &&
+                    !ov::is_type<frontend::ONNXSubgraphFrameworkNode>(node_to_replace_input))
                     continue;
                 auto inputs = node_to_replace_input->input_values();
                 for (size_t i = 0; i < inputs.size(); i++) {
diff --git a/src/frontends/onnx/frontend/src/core/graph.hpp b/src/frontends/onnx/frontend/src/core/graph.hpp
index a7a983b038a..0e5c2378d32 100644
--- a/src/frontends/onnx/frontend/src/core/graph.hpp
+++ b/src/frontends/onnx/frontend/src/core/graph.hpp
@@ -121,6 +121,8 @@ inline std::ostream& operator<<(std::ostream& outs, const Graph& graph) {
     return (outs << "<Graph: " << graph.get_name() << ">");
 }
 
+static const char* const ONNX_GRAPH_RT_ATTRIBUTE = "onnx_graph";
+
 }  // namespace onnx_import
 
 }  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/onnx_framework_node.cpp b/src/frontends/onnx/frontend/src/onnx_framework_node.cpp
index bac360586d3..22eb9b56299 100644
--- a/src/frontends/onnx/frontend/src/onnx_framework_node.cpp
+++ b/src/frontends/onnx/frontend/src/onnx_framework_node.cpp
@@ -21,10 +21,14 @@ namespace frontend {
 NGRAPH_RTTI_DEFINITION(ONNXFrameworkNode, "ONNXFrameworkNode", 1);
 
 std::shared_ptr<Node> ONNXFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const {
-    return std::make_shared<ONNXFrameworkNode>(m_graph, m_node, inputs);
+    return std::make_shared<ONNXFrameworkNode>(m_node, inputs);
 }
 
 NGRAPH_RTTI_DEFINITION(ONNXSubgraphFrameworkNode, "ONNXSubgraphFrameworkNode", 1);
 
+std::shared_ptr<Node> ONNXSubgraphFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const {
+    return std::make_shared<ONNXSubgraphFrameworkNode>(m_node, m_functions, inputs);
+}
+
 }  // namespace frontend
 }  // namespace ngraph
diff --git a/src/frontends/onnx/frontend/src/onnx_framework_node.hpp b/src/frontends/onnx/frontend/src/onnx_framework_node.hpp
index 852a3f07b09..8e52dd3dd18 100644
--- a/src/frontends/onnx/frontend/src/onnx_framework_node.hpp
+++ b/src/frontends/onnx/frontend/src/onnx_framework_node.hpp
@@ -38,20 +38,16 @@ class ONNXFrameworkNode : public ov::op::util::FrameworkNode {
 public:
     NGRAPH_RTTI_DECLARATION;
 
-    ONNXFrameworkNode(std::shared_ptr<onnx_import::Graph> graph, const onnx_import::Node& node)
+    ONNXFrameworkNode(const onnx_import::Node& node)
         : ov::op::util::FrameworkNode(node.get_ng_inputs(), node.get_outputs_size()),
-          m_node(node),
-          m_graph(graph) {}
+          m_node(node) {}
 
-    ONNXFrameworkNode(std::shared_ptr<onnx_import::Graph> graph,
-                      const onnx_import::Node& node,
-                      const OutputVector& inputs)
+    ONNXFrameworkNode(const onnx_import::Node& node, const OutputVector& inputs)
         : ov::op::util::FrameworkNode(inputs, node.get_outputs_size()),
-          m_node(node),
-          m_graph(graph) {}
+          m_node(node) {}
 
-    OutputVector get_ng_nodes() const {
-        OutputVector ng_nodes{m_graph->make_ng_nodes(m_node)};
+    OutputVector get_ng_nodes(const std::shared_ptr<onnx_import::Graph>& graph) const {
+        OutputVector ng_nodes{graph->make_ng_nodes(m_node)};
         if (ng_nodes.size() > get_output_size()) {
             ng_nodes.resize(get_output_size());
         }
@@ -71,35 +67,31 @@ public:
 
 protected:
     onnx_import::Node m_node;
-
-private:
-    std::shared_ptr<onnx_import::Graph> m_graph;
 };
 
 class ONNXSubgraphFrameworkNode : public ONNXFrameworkNode {
 public:
     NGRAPH_RTTI_DECLARATION;
 
-    ONNXSubgraphFrameworkNode(std::shared_ptr<onnx_import::Graph> graph,
-                              const onnx_import::Node& node,
+    ONNXSubgraphFrameworkNode(const onnx_import::Node& node,
+                              const std::vector<std::shared_ptr<Function>>& functions,
                               const OutputVector& inputs)
-        : ONNXFrameworkNode(graph, node, inputs) {}
+        : ONNXFrameworkNode(node, inputs),
+          m_functions(functions) {}
 
     void infer_inputs_from_parent() {
         for (auto& subgraph : m_node.get_subgraphs())
             subgraph.second->infer_inputs_from_parent();
     }
 
-    std::vector<std::shared_ptr<Function>> get_subgraph_functions() const {
-        std::vector<std::shared_ptr<Function>> ret;
-        for (const auto& kv : m_node.get_subgraphs()) {
-            auto& subgraph = kv.second;
-            ret.push_back(std::make_shared<Function>(subgraph->get_ng_outputs(),
-                                                     subgraph->get_ng_parameters(),
-                                                     subgraph->get_name()));
-        }
-        return ret;
+    const std::vector<std::shared_ptr<Function>>& get_subgraph_functions() const {
+        return m_functions;
     }
+
+    virtual std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
+
+private:
+    std::vector<std::shared_ptr<Function>> m_functions;
 };
 
 }  // namespace frontend
diff --git a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp
index 930c8fab619..aff727c9fa8 100644
--- a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp
+++ b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp
@@ -60,6 +60,12 @@ void apply_transformations(ONNX_NAMESPACE::ModelProto& model_proto, const std::s
 }  // namespace
 
 void convert_decoded_function(std::shared_ptr<Function> function) {
+    auto& rt_info = function->get_rt_info();
+    auto it = rt_info.find(ONNX_GRAPH_RT_ATTRIBUTE);
+    OPENVINO_ASSERT(it != rt_info.end(),
+                    "Could not find '" + std::string(ONNX_GRAPH_RT_ATTRIBUTE) +
+                        "' attribute in decoded model. Model probably wasn't created by FrontEnd::decode function.");
+    auto onnx_graph = it->second.as<std::shared_ptr<onnx_import::Graph>>();
     for (const auto& node : function->get_ordered_ops()) {
         if (auto raw_node = std::dynamic_pointer_cast<frontend::ONNXFrameworkNode>(node)) {
             if (auto subgraph_node = std::dynamic_pointer_cast<frontend::ONNXSubgraphFrameworkNode>(node)) {
@@ -68,7 +74,7 @@ void convert_decoded_function(std::shared_ptr<Function> function) {
                     convert_decoded_function(function);
                 }
             }
-            auto ng_nodes = raw_node->get_ng_nodes();
+            auto ng_nodes = raw_node->get_ng_nodes(onnx_graph);
             replace_node(raw_node, ng_nodes);
         } else {
             // Have to revalidate node because new intpus can affect shape/type
@@ -76,6 +82,7 @@ void convert_decoded_function(std::shared_ptr<Function> function) {
             node->revalidate_and_infer_types();
         }
     }
+    rt_info.erase(it);
     detail::remove_dangling_parameters(function);
     detail::remove_dangling_results(function);
 }

From b64329430071ab8af0cb7de75e025859b517b463 Mon Sep 17 00:00:00 2001
From: Andrey Noskov <andrey.noskov@intel.com>
Date: Wed, 15 Dec 2021 23:39:31 +0300
Subject: [PATCH 11/27] [GNA] Added import/export test (#8769)

* [GNA] Added import/export test
   - still need fixes

* Fixed inputs and const vals

* Parametrized input shape for import base test

* Deleted commented code

* Fixed input shape in test

* Fixed SF for import
---
 .../import_export_batch_size.cpp              | 91 +++++++++++++++++++
 .../import_export_multi_inputs.cpp            | 11 ++-
 .../import_reshape_permute_conv.cpp           |  6 ++
 .../import_export_tests/import_nonzero.cpp    |  3 +
 .../import_export_base/import_export_base.hpp |  1 +
 .../import_export_base/import_export_base.cpp |  4 +-
 .../import_export_tests/import_nonzero.cpp    |  5 +-
 .../import_reshape_permute_conv.cpp           |  5 +-
 8 files changed, 119 insertions(+), 7 deletions(-)
 create mode 100644 src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp

diff --git a/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp
new file mode 100644
index 00000000000..da0e3c88a12
--- /dev/null
+++ b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp
@@ -0,0 +1,91 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <fstream>
+
+#include <ie_core.hpp>
+#include <ie_layouts.h>
+
+#include "ngraph_functions/builders.hpp"
+#include "base/import_export_base/import_export_base.hpp"
+
+namespace LayerTestDefinitions {
+
+class ImportBatchTest : public FuncTestUtils::ImportNetworkTestBase {
+protected:
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 0.2f, -0.1f);
+    }
+
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        std::string _;
+        std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, _) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+        auto mul_const_1 = ngraph::builder::makeConstant<float>(ngPrc, { inputShape[1], 2048 },
+            CommonTestUtils::generate_float_numbers(2048 * inputShape[1], -0.1f, 0.1f), false);
+
+        auto matmul_1 = std::make_shared<ngraph::op::MatMul>(params[0], mul_const_1);
+        auto sigmoid_1 = std::make_shared<ngraph::op::Sigmoid>(matmul_1);
+
+        auto mul_const_2 = ngraph::builder::makeConstant<float>(ngPrc, { 2048, 3425 },
+            CommonTestUtils::generate_float_numbers(2048 * 3425, -0.1f, 0.1f), false);
+
+        auto matmul_2 = std::make_shared<ngraph::op::MatMul>(sigmoid_1, mul_const_2);
+
+        function = std::make_shared<ngraph::Function>(matmul_2, params, "ExportImportNetwork");
+    }
+};
+
+TEST_P(ImportBatchTest, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+    {1, 440},
+    {2, 440},
+    {4, 128}
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> exportConfigs = {
+        {
+                {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+                {"GNA_SCALE_FACTOR_0", "327.67"}
+        }
+};
+
+const std::vector<std::map<std::string, std::string>> importConfigs = {
+        {
+                {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+        }
+};
+
+const std::vector<std::string> appHeader = {
+        ""
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkBatchCase, ImportBatchTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                ::testing::ValuesIn(exportConfigs),
+                                ::testing::ValuesIn(importConfigs),
+                                ::testing::ValuesIn(appHeader)),
+                        ImportBatchTest::getTestCaseName);
+} // namespace LayerTestDefinitions
diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp
index c2e381b5542..487dcf3f4e1 100644
--- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp
@@ -17,11 +17,12 @@ namespace LayerTestsDefinitions {
 class ImportMultiInput : public FuncTestUtils::ImportNetworkTestBase {
 protected:
     void SetUp() override {
+        std::vector<size_t> inputShape;
         InferenceEngine::Precision netPrecision;
-        std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
+        std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto input = ngraph::builder::makeParams(ngPrc, {{1, 10}, {1, 10}});
+        auto input = ngraph::builder::makeParams(ngPrc, {inputShape, inputShape});
         auto mul1 = ngraph::builder::makeEltwise(input[0], input[1], ngraph::helpers::EltwiseTypes::ADD);
         auto result = std::make_shared<ngraph::opset7::Result>(mul1);
 
@@ -40,6 +41,10 @@ TEST_P(ImportMultiInputChanged, CompareWithRefImpl) {
     TestRun(true);
 };
 
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 10}
+};
+
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32
 };
@@ -98,6 +103,7 @@ const std::vector<std::map<std::string, std::string>> importConfigsUnchanged = {
 
 INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputUnchanged,
                         ::testing::Combine(
+                            ::testing::ValuesIn(inputShape),
                             ::testing::ValuesIn(netPrecisions),
                             ::testing::Values(CommonTestUtils::DEVICE_GNA),
                             ::testing::ValuesIn(exportConfigs),
@@ -107,6 +113,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputUnchanged,
 
 INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputChanged,
                         ::testing::Combine(
+                            ::testing::ValuesIn(inputShape),
                             ::testing::ValuesIn(netPrecisions),
                             ::testing::Values(CommonTestUtils::DEVICE_GNA),
                             ::testing::ValuesIn(exportConfigs),
diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp
index 110b0fd66fe..0f8bd3d1fc9 100644
--- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp
@@ -52,6 +52,10 @@ TEST_P(ImportExportGNAModelChanged, ReshapePermuteConv) {
     TestRun(true);
 };
 
+const std::vector<std::vector<size_t>> inputShapes = {
+    {1, 336}
+};
+
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
         InferenceEngine::Precision::FP16
@@ -92,6 +96,7 @@ const std::vector<std::string> appHeaders = {
 
 INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged,
                         ::testing::Combine(
+                            ::testing::ValuesIn(inputShapes),
                             ::testing::ValuesIn(netPrecisions),
                             ::testing::Values(CommonTestUtils::DEVICE_GNA),
                             ::testing::ValuesIn(exportConfigs),
@@ -101,6 +106,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged,
 
 INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelChanged,
                         ::testing::Combine(
+                            ::testing::ValuesIn(inputShapes),
                             ::testing::ValuesIn(netPrecisions),
                             ::testing::Values(CommonTestUtils::DEVICE_GNA),
                             ::testing::ValuesIn(exportConfigs),
diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp
index 4555ac1ec9d..888ac6eb712 100644
--- a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp
+++ b/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp
@@ -26,8 +26,11 @@ const std::vector<std::string> appHeaders = {
         "APPLICATION_HEADER"
 };
 
+std::vector<size_t> inputShape = ngraph::Shape{1000};
+
 INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkCase, ImportNonZero,
                         ::testing::Combine(
+                            ::testing::Values(inputShape),
                             ::testing::ValuesIn(netPrecisions),
                             ::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
                             ::testing::ValuesIn(exportConfigs),
diff --git a/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp b/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp
index 8795dced06f..e6ce7a46165 100644
--- a/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp
+++ b/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp
@@ -9,6 +9,7 @@
 #include <ie_core.hpp>
 
 typedef std::tuple<
+    std::vector<size_t>,                // Input Shape
     InferenceEngine::Precision,         // Network Precision
     std::string,                        // Target Device
     std::map<std::string, std::string>, // Export Configuration
diff --git a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp b/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
index 3f7e6e0f149..be782eb6687 100644
--- a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
+++ b/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
@@ -9,14 +9,16 @@
 namespace FuncTestUtils {
 
 std::string ImportNetworkTestBase::getTestCaseName(testing::TestParamInfo<exportImportNetworkParams> obj) {
+    std::vector<size_t> inputShape;
     InferenceEngine::Precision netPrecision;
     std::string targetDevice;
     std::map<std::string, std::string> exportConfiguration;
     std::map<std::string, std::string> importConfiguration;
     std::string appHeader;
-    std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, appHeader) = obj.param;
+    std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, appHeader) = obj.param;
 
     std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
     result << "netPRC=" << netPrecision.name() << "_";
     result << "targetDevice=" << targetDevice << "_";
     for (auto const& configItem : exportConfiguration) {
diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp
index 44ed3eff75c..64d3ad1192f 100644
--- a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp
+++ b/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp
@@ -10,10 +10,11 @@ namespace LayerTestsDefinitions {
 
 void ImportNonZero::SetUp() {
     InferenceEngine::Precision netPrecision;
-    std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
+    ngraph::Shape inputShape;
+    std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
     const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
-    const auto parameter = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1000});
+    const auto parameter = std::make_shared<ngraph::opset5::Parameter>(ngPrc, inputShape);
     const auto nonZero = std::make_shared<ngraph::opset5::NonZero>(parameter);
 
     function = std::make_shared<ngraph::Function>(nonZero->outputs(), ngraph::ParameterVector{parameter}, "ExportImportNetwork");
diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp
index 5a6cb6b6ba6..664aa444854 100644
--- a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp
+++ b/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp
@@ -9,11 +9,12 @@
 namespace LayerTestsDefinitions {
 
 void ImportReshapePermuteConv::SetUp() {
+    std::vector<size_t> inputShape;
     InferenceEngine::Precision netPrecision;
-    std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
+    std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam();
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
-    auto params = ngraph::builder::makeParams(ngPrc, { {1, 336} });
+    auto params = ngraph::builder::makeParams(ngPrc, { inputShape });
 
     std::vector<size_t> outFormShapes1 = { 1, 1, 168, 2 };
     auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);

From 0b9158c2b82f5ec44eeaef76599614a2b29f8991 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dawid=20Ko=C5=BCykowski?= <dawid.kozykowski@intel.com>
Date: Wed, 15 Dec 2021 21:40:43 +0100
Subject: [PATCH 12/27] Extend ONNX FE for operation Softmax-8 (#9189)

---
 .../onnx/softmax_axis_1_opset11.prototxt      |  56 ++++++
 .../softmax_axis_negative_1_opset11.prototxt  |  56 ++++++
 .../softmax_axis_negative_1_opset13.prototxt  |  56 ++++++
 src/core/tests/onnx/onnx_import.in.cpp        | 162 ++++++++++++++----
 .../onnx/frontend/src/op/softmax.cpp          |  25 +--
 5 files changed, 296 insertions(+), 59 deletions(-)
 create mode 100644 src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt
 create mode 100644 src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt
 create mode 100644 src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt

diff --git a/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt b/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt
new file mode 100644
index 00000000000..947b381db0b
--- /dev/null
+++ b/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt
@@ -0,0 +1,56 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    output: "y"
+    op_type: "Softmax"
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "test_softmax_axis_1"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
diff --git a/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt b/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt
new file mode 100644
index 00000000000..ad9a4b72603
--- /dev/null
+++ b/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt
@@ -0,0 +1,56 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    output: "y"
+    op_type: "Softmax"
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+  }
+  name: "test_softmax_axis_0"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
diff --git a/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt b/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt
new file mode 100644
index 00000000000..aff3afc2c52
--- /dev/null
+++ b/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt
@@ -0,0 +1,56 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    output: "y"
+    op_type: "Softmax"
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+  }
+  name: "test_softmax_axis_0"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 13
+}
diff --git a/src/core/tests/onnx/onnx_import.in.cpp b/src/core/tests/onnx/onnx_import.in.cpp
index 100c8a2d720..73f02233e0f 100644
--- a/src/core/tests/onnx/onnx_import.in.cpp
+++ b/src/core/tests/onnx/onnx_import.in.cpp
@@ -690,19 +690,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) {
 }
 namespace {
 // common input for all Softmax 3D test cases (Shape = {3,4,5})
+// clang-format off
 const std::vector<float> SOFTMAX_INPUT = {
-    2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118, 0.21004745,  1.38337255,
-    1.19030397,  2.0940445,   -0.03551657, -0.78686039, 1.992782,    0.04300319,  -0.29230777,
-    -0.56797112, -1.26732165, -0.61935399, 0.57670432,  0.92844898,  2.82469233,
+    2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118,
+    0.21004745,  1.38337255,  1.19030397,  2.0940445,   -0.03551657,
+    -0.78686039, 1.992782,    0.04300319,  -0.29230777, -0.56797112,
+    -1.26732165, -0.61935399, 0.57670432,  0.92844898,  2.82469233,
 
-    0.98721677,  -0.05100663, -1.21178917, -0.17530157, 1.40051805,  -0.13259761, -1.14313018,
-    0.2673723,   -0.87996154, 1.29053106,  1.55,        0.8396538,   1.20729817,  0.23727845,
-    -0.89113606, -1.70909842, 0.26460363,  -0.70566808, 2.383518,    1.07024615,
+    0.98721677,  -0.05100663, -1.21178917, -0.17530157, 1.40051805,
+    -0.13259761, -1.14313018, 0.2673723,   -0.87996154, 1.29053106,
+    1.55,        0.8396538,   1.20729817,  0.23727845,  -0.89113606,
+    -1.70909842, 0.26460363,  -0.70566808, 2.383518,    1.07024615,
 
-    -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,  0.75425957,  -2.43721014,
-    -1.24478184, 2.65316853,  1.19509542,  -0.95523998, 0.5149006,   -0.01151649, 0.68327026,
-    -0.4589638,  -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
+    -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,
+    0.75425957,  -2.43721014, -1.24478184, 2.65316853,  1.19509542,
+    -0.95523998, 0.5149006,   -0.01151649, 0.68327026,  -0.4589638,
+    -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
 }  // namespace
+// clang-format on
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) {
     auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_0.onnx"));
@@ -710,19 +715,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) {
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(SOFTMAX_INPUT);
 
+    // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, 0.00757665, 0.02449322,
-         0.02019284, 0.04985249, 0.00592694, 0.00279593, 0.04505148, 0.00641108, 0.00458466,
-         0.00348007, 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497,
+        {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823,
+         0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694,
+         0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007,
+         0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497,
 
-         0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, 0.00537859, 0.00195794,
-         0.00802367, 0.00254737, 0.0223216,  0.02893419, 0.0142204,  0.02053893, 0.00778581,
-         0.00251907, 0.00111174, 0.00800149, 0.0030324,  0.06658917, 0.0179084,
+         0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679,
+         0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216,
+         0.02893419, 0.0142204,  0.02053893, 0.00778581, 0.00251907,
+         0.00111174, 0.00800149, 0.0030324,  0.06658917, 0.0179084,
 
-         0.00181811, 0.01407243, 0.01072611, 0.0069699,  0.01158077, 0.01305647, 0.00053677,
-         0.0017687,  0.08719896, 0.02028982, 0.00236265, 0.01027717, 0.0060709,  0.01216173,
-         0.00388087, 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337});
+         0.00181811, 0.01407243, 0.01072611, 0.0069699,  0.01158077,
+         0.01305647, 0.00053677, 0.0017687,  0.08719896, 0.02028982,
+         0.00236265, 0.01027717, 0.0060709,  0.01216173, 0.00388087,
+         0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337});
+    // clang-format on
 
     test_case.run(6);
 }
@@ -733,35 +743,113 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) {
     auto test_case = test::TestCase(function, s_device);
     test_case.add_input<float>(SOFTMAX_INPUT);
 
+    // clang-format off
     test_case.add_expected_output<float>(
         Shape{3, 4, 5},
-        {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188,  0.0178066,  0.05756383,
-         0.04745709, 0.11716303, 0.01392945, 0.00657097, 0.10587974, 0.01506727, 0.01077484,
-         0.00817884, 0.00406413, 0.00776921, 0.0256932,  0.03652405, 0.24328028,
+        {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188,
+         0.0178066,  0.05756383, 0.04745709, 0.11716303, 0.01392945,
+         0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884,
+         0.00406413, 0.00776921, 0.0256932,  0.03652405, 0.24328028,
 
-         0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, 0.02028993, 0.00738604,
-         0.03026811, 0.00960958, 0.08420492, 0.10914991, 0.05364435, 0.07748005, 0.02937079,
-         0.0095028,  0.00419387, 0.03018442, 0.01143929, 0.2511977,  0.06755678,
+         0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488,
+         0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492,
+         0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028,
+         0.00419387, 0.03018442, 0.01143929, 0.2511977,  0.06755678,
 
-         0.00587593, 0.04548053, 0.0346656,  0.02252594, 0.03742775, 0.04219705, 0.00173478,
-         0.00571623, 0.2818174,  0.06557446, 0.00763582, 0.03321466, 0.01962049, 0.03930537,
-         0.01254255, 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617});
+         0.00587593, 0.04548053, 0.0346656,  0.02252594, 0.03742775,
+         0.04219705, 0.00173478, 0.00571623, 0.2818174,  0.06557446,
+         0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255,
+         0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617});
+    // clang-format on
 
     test_case.run(4);
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D) {
-    ASSERT_THROW(
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_1D.onnx")),
-        ngraph::ngraph_error)
-        << "Softmax model with invalid axis was successfully imported while it should have thrown.";
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) {
+    auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_1_opset11.onnx"));
+
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>(SOFTMAX_INPUT);
+
+    // clang-format off
+    test_case.add_expected_output<float>(
+        Shape{3, 4, 5},
+        {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
+         0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
+         0.02566661, 0.5885689,  0.12453075, 0.06257374, 0.03019055,
+         0.01587475, 0.0431878,  0.21235381, 0.21210944, 0.89802015,
+
+         0.31752626, 0.19442629, 0.0546935,  0.06279221, 0.36823282,
+         0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
+         0.55743381, 0.473766,   0.61451431, 0.09486084, 0.03722801,
+         0.02141829, 0.26657706, 0.090728,   0.81131024, 0.26465935,
+
+         0.08619648, 0.43343993, 0.3877785,  0.04523505, 0.15625437,
+         0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
+         0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
+         0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
+    // clang-format on
+
+    test_case.run(4);
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_3D) {
-    ASSERT_THROW(
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_3D.onnx")),
-        ngraph::ngraph_error)
-        << "Softmax model with invalid axis was successfully imported while it should have thrown.";
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset11.onnx"));
+
+    auto test_case = test::TestCase(function);
+    test_case.add_input<float>(SOFTMAX_INPUT);
+
+    // clang-format off
+    test_case.add_expected_output<float>(
+        Shape{3, 4, 5},
+        {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
+         0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
+         0.02566661, 0.5885689,  0.12453075, 0.06257374, 0.03019055,
+         0.01587475, 0.0431878,  0.21235381, 0.21210944, 0.89802015,
+
+         0.31752626, 0.19442629, 0.0546935,  0.06279221, 0.36823282,
+         0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
+         0.55743381, 0.473766,   0.61451431, 0.09486084, 0.03722801,
+         0.02141829, 0.26657706, 0.090728,   0.81131024, 0.26465935,
+
+         0.08619648, 0.43343993, 0.3877785,  0.04523505, 0.15625437,
+         0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
+         0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
+         0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
+    // clang-format on
+
+    test_case.run(6);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) {
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset13.onnx"));
+
+    auto test_case = test::TestCase(function);
+    test_case.add_input<float>(SOFTMAX_INPUT);
+
+    // clang-format off
+    test_case.add_expected_output<float>(
+        Shape{3, 4, 5},
+        {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
+         0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
+         0.02566661, 0.5885689,  0.12453075, 0.06257374, 0.03019055,
+         0.01587475, 0.0431878,  0.21235381, 0.21210944, 0.89802015,
+
+         0.31752626, 0.19442629, 0.0546935,  0.06279221, 0.36823282,
+         0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
+         0.55743381, 0.473766,   0.61451431, 0.09486084, 0.03722801,
+         0.02141829, 0.26657706, 0.090728,   0.81131024, 0.26465935,
+
+         0.08619648, 0.43343993, 0.3877785,  0.04523505, 0.15625437,
+         0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
+         0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
+         0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
+    // clang-format on
+
+    test_case.run(6);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub) {
diff --git a/src/frontends/onnx/frontend/src/op/softmax.cpp b/src/frontends/onnx/frontend/src/op/softmax.cpp
index 3aa517f3c12..ce609a52e44 100644
--- a/src/frontends/onnx/frontend/src/op/softmax.cpp
+++ b/src/frontends/onnx/frontend/src/op/softmax.cpp
@@ -37,17 +37,8 @@ OutputVector softmax(const Node& node) {
         result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1});
         break;
     }
-    case 1: {
-        // checks if the axis belongs to the allowed values set (-1 and 0 for 1D)
-        ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
-        result = std::make_shared<default_opset::Softmax>(data, 0);
-        break;
-    }
     default: {
-        const auto normalized_axis =
-            ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
-
-        result = onnx_softmax(data, normalized_axis);
+        result = onnx_softmax(data, axis);
         break;
     }
     }
@@ -69,17 +60,8 @@ OutputVector softmax(const Node& node) {
         result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1});
         break;
     }
-    case 1: {
-        // checks if the axis belongs to the allowed values set (-1 and 0 for 1D)
-        ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
-        result = std::make_shared<default_opset::Softmax>(data, 0);
-        break;
-    }
     default: {
-        const auto normalized_axis =
-            ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
-
-        result = std::make_shared<default_opset::Softmax>(data, normalized_axis);
+        result = std::make_shared<ov::op::v8::Softmax>(data, axis);
         break;
     }
     }
@@ -92,9 +74,8 @@ OutputVector softmax(const Node& node) {
     const auto data = node.get_ng_inputs().at(0);
 
     const auto axis = node.get_attribute_value<int64_t>("axis", -1);
-    const auto normalized_axis = ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
 
-    return {std::make_shared<default_opset::Softmax>(data, normalized_axis)};
+    return {std::make_shared<ov::op::v8::Softmax>(data, axis)};
 }
 }  // namespace set_13
 }  // namespace op

From d1e54d996112cb0eb425b65dc08904aa1ded7855 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Wed, 15 Dec 2021 23:43:24 +0300
Subject: [PATCH 13/27] [IE TESTS] Fix filters in report (#9232)

---
 .../layer_tests_summary/template/filters.js      | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js
index 6005190e7cc..beb86e1b65f 100644
--- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js
+++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js
@@ -99,15 +99,15 @@ function filterTable() {
     if (implementation != 0) {
         if (implementation == 'ni') {
             $("#report #data tr:not(:hidden)").filter(function () {
-                $(this).toggle($(this).find('td').hasClass("not_impl"))
+                $(this).toggle($(this).find('td').hasClass("value " + device + " not_impl"))
             });
         } else if (implementation == 'i') {
             $("#report #data tr:not(:hidden)").filter(function () {
-                $(this).toggle($(this).find('td').hasClass("impl"));
+                $(this).toggle($(this).find('td').hasClass("value " + device + " impl"));
             });
         } else {
             $("#report #data tr:not(:hidden)").filter(function () {
-                $(this).toggle(!$(this).find('td').hasClass("not_impl") && !$(this).find('td').hasClass("impl"));
+                $(this).toggle(!$(this).find('td').hasClass("value"));
             });
         }
     }
@@ -116,19 +116,19 @@ function filterTable() {
         selector = [];
         select.forEach(item => {
             if (item == '100p') {
-               selector.push('.value:visible[crashed="0"][failed="0"][skipped="0"]');
+               selector.push('.value:visible[crashed="0"][failed="0"][skipped="0"][value!="---"]');
             }
             if (item == '100f') {
-               selector.push('.value:visible[passed="0"]');
+               selector.push('.value:visible[passed="0"][value!="---"]');
             }
             if (item == 'p') {
-                selector.push('.value:visible[passed!="0"]');
+                selector.push('.value:visible[passed!="0"][value!="---"]');
             }
             if (item == 'f') {
-                selector.push('.value:visible[failed!="0"]');
+                selector.push('.value:visible[failed!="0"][value!="---"]');
             }
             if (item == 'c') {
-                selector.push('.value:visible[crashed!="0"]');
+                selector.push('.value:visible[crashed!="0"][value!="---"]');
             }
             if (item == 's') {
                 selector.push('.value:visible[value!="---"][skipped!="0"]');

From 40f668140ec6536b4d47e05f5185eb8ac5accf7e Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Wed, 15 Dec 2021 23:52:26 +0100
Subject: [PATCH 14/27] Fix compilation error in template_plugin tests (#9248)

---
 .../tests/functional/op_reference/einsum.cpp              | 8 ++++----
 .../functional/op_reference/extract_image_patches.cpp     | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/template_plugin/tests/functional/op_reference/einsum.cpp b/docs/template_plugin/tests/functional/op_reference/einsum.cpp
index 31460fd2ccf..625521dc42b 100644
--- a/docs/template_plugin/tests/functional/op_reference/einsum.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/einsum.cpp
@@ -30,7 +30,7 @@ class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public
 public:
     void SetUp() override {
         auto params = GetParam();
-        function = CreateFunction(params);
+        function = CreateModel(params);
         for (const auto& input_tensor : params.inputs) {
             inputData.push_back(input_tensor.data);
         }
@@ -52,7 +52,7 @@ public:
     }
 
 private:
-    static std::shared_ptr<Function> CreateFunction(const EinsumParams& params) {
+    static std::shared_ptr<Model> CreateModel(const EinsumParams& params) {
         OutputVector output_vector;
         ParameterVector param_vector;
         for (const auto& input_tensor : params.inputs) {
@@ -61,7 +61,7 @@ private:
             param_vector.push_back(param);
         }
         const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
-        const auto f = std::make_shared<Function>(OutputVector{einsum}, param_vector);
+        const auto f = std::make_shared<Model>(OutputVector{einsum}, param_vector);
         return f;
     }
 };
@@ -179,4 +179,4 @@ std::vector<EinsumParams> generateCombinedParams() {
 
 INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
     testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
-} // namespace
\ No newline at end of file
+} // namespace
diff --git a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
index 95f5571cc41..503880ce8ac 100644
--- a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
@@ -36,7 +36,7 @@ class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractIm
 public:
     void SetUp() override {
         auto params = GetParam();
-        function = CreateFunction(params);
+        function = CreateModel(params);
         inputData = {params.data.data};
         refOutData = {params.expectedResult.data};
     }
@@ -59,14 +59,14 @@ public:
     }
 
 private:
-    static std::shared_ptr<Function> CreateFunction(const ExtractImagePatchesParams& params) {
+    static std::shared_ptr<Model> CreateModel(const ExtractImagePatchesParams& params) {
         const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
         const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
                                                                                          params.sizes,
                                                                                          params.strides,
                                                                                          params.rates,
                                                                                          params.autoPad);
-        const auto f = std::make_shared<Function>(extrace_image_patches, ParameterVector{data});
+        const auto f = std::make_shared<Model>(extrace_image_patches, ParameterVector{data});
         return f;
     }
 };
@@ -243,4 +243,4 @@ std::vector<ExtractImagePatchesParams> generateCombinedParams() {
 
 INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
     testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
-} // namespace
\ No newline at end of file
+} // namespace

From 9b71a5fb704acc7f3360c1c7b80b1905737cada2 Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Thu, 16 Dec 2021 07:52:59 +0300
Subject: [PATCH 15/27] Fixed python tests (#9238)

---
 src/bindings/python/tests/test_utils/test_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/bindings/python/tests/test_utils/test_utils.py b/src/bindings/python/tests/test_utils/test_utils.py
index b312689aaa7..d8364635813 100644
--- a/src/bindings/python/tests/test_utils/test_utils.py
+++ b/src/bindings/python/tests/test_utils/test_utils.py
@@ -1,9 +1,9 @@
 # Copyright (C) 2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from openvino.runtime import Function
-from openvino.runtime.impl import Shape, Type
-from openvino.runtime.impl.op import Parameter
+from openvino.runtime import Model
+from openvino.runtime import Shape, Type
+from openvino.runtime.op import Parameter
 import openvino.runtime.opset8 as ops
 
 
@@ -11,7 +11,7 @@ def get_test_function():
     element_type = Type.f32
     param = Parameter(element_type, Shape([1, 3, 22, 22]))
     relu = ops.relu(param)
-    func = Function([relu], [param], "test")
+    func = Model([relu], [param], "test")
     assert func is not None
     return func
 

From ea3f34c3516775724d61f926d44eeea49ecc6421 Mon Sep 17 00:00:00 2001
From: Indira Salyahova <indira.salyahova@intel.com>
Date: Thu, 16 Dec 2021 08:51:24 +0300
Subject: [PATCH 16/27] Temporary revert test layout (#9242)

* Update test_image_loading.py

* Update test_image_loading.py
---
 tools/pot/tests/test_image_loading.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/pot/tests/test_image_loading.py b/tools/pot/tests/test_image_loading.py
index ff82d73c3d6..fc520c19d48 100644
--- a/tools/pot/tests/test_image_loading.py
+++ b/tools/pot/tests/test_image_loading.py
@@ -46,15 +46,17 @@ def test_check_image(tmp_path, models, model_name, model_framework):
     assert num_images_from_data_loader == num_images_in_dir
 
 
-TEST_MODELS_LAYOUT = [('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)),
-                      ('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)),
-                      ('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)),
-                      ('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3))]
+TEST_MODELS_LAYOUT = [
+    #('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)),
+    #('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)),
+    #('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)),
+    #('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3))
+]
 
 
 @pytest.mark.parametrize(
-    'model_name, model_framework, layout, reference_shape', TEST_MODELS,
-    ids=['{}_{}'.format(m[0], m[1]) for m in TEST_MODELS])
+    'model_name, model_framework, layout, reference_shape', TEST_MODELS_LAYOUT,
+    ids=['{}_{}_{}_{}'.format(m[0], m[1], m[2], m[3]) for m in TEST_MODELS_LAYOUT])
 def test_check_layout(tmp_path, models, model_name, model_framework, layout, reference_shape):
     test_dir = Path(__file__).parent
     path_image_data = os.path.join(test_dir, "data/image_data")

From d5f84ad783bbd7b1478c98ce691401dea2afa4e7 Mon Sep 17 00:00:00 2001
From: Maxim Andronov <maxim.andronov@intel.com>
Date: Thu, 16 Dec 2021 09:53:14 +0300
Subject: [PATCH 17/27] [CPU] Deconvolution dynamism support (#8512)

---
 .../src/mkldnn_plugin/mkldnn_graph.cpp        |   1 -
 .../src/mkldnn_plugin/mkldnn_node.cpp         |   6 +-
 .../src/mkldnn_plugin/mkldnn_node.h           |  10 +-
 .../src/mkldnn_plugin/mkldnn_primitive.h      |   1 -
 .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp  |   2 +-
 .../nodes/mkldnn_deconv_node.cpp              | 611 +++++++++++---
 .../mkldnn_plugin/nodes/mkldnn_deconv_node.h  | 113 ++-
 .../nodes/mkldnn_fullyconnected_node.cpp      |   2 +-
 .../nodes/mkldnn_matmul_node.cpp              |   2 +-
 .../skip_tests_config.cpp                     |   4 +
 .../convolution_backprop_data.cpp             | 679 +++++++++++-----
 .../group_convolution_backprop_data.cpp       | 744 +++++++++++++-----
 .../src/base/ov_subgraph.cpp                  |   2 +
 .../src/convolution_backprop_data.cpp         |   8 +-
 .../src/group_convolution_backprop_data.cpp   |   8 +-
 .../unit/cpu/mkldnn_memory_desc_test.cpp      |   2 +-
 16 files changed, 1693 insertions(+), 502 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index e4a5aa18f2f..6765a1c0b67 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() {
     SortTopologically();
 
     InitDescriptors();
-    RemoveDroppedEdges();
 
     InitOptimalPrimitiveDescriptors();
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index 44ea9e933d2..a0a10a93ec4 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -717,7 +717,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
     selectedPD->setConfig(rightConfig);
 }
 
-void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
+void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
     for (size_t i = 0; i < getChildEdges().size(); i++) {
         auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
         if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@@ -1049,7 +1049,9 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
     }
 }
 
-void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr) {
+void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
+                                  std::unordered_map<int, mkldnn::memory>& primArgs,
+                                  const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs) {
     auto post_ops = attr.get_post_ops();
     int idx = 0;
     for (int i = 0; i < post_ops.len(); i++) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index aee4f876806..97517d54fbc 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -208,7 +208,9 @@ public:
         return 1;
     }
 
-    void appendPostOpArgs(const mkldnn::primitive_attr& attr);
+    static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
+                                 std::unordered_map<int, mkldnn::memory>& primArgs,
+                                 const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs);
 
     bool isFusedWith(Type type) const;
 
@@ -425,7 +427,7 @@ public:
                 if (impl_type == selected_pd->getImplementationType() &&
                     descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
                     descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
-                    prepareMemory(selected_pd, itpd);
+                    prepareMemory(itpd);
                     PD prim_desc = createPd<PD, D, FPD>(desc);
                     return {itpd.get()};
                 }
@@ -722,6 +724,8 @@ protected:
         supportedPrimitiveDescriptors.push_back({config, implType});
     }
 
+    void prepareMemory(mkldnn::primitive_desc_iterator& itpd);
+
     bool isDynamic = false;
 
     bool inputShapesDefined() const;
@@ -746,6 +750,7 @@ protected:
     }
 
     std::vector<VectorDims> lastInputDims = {};
+
     std::shared_ptr<ngraph::Node> opToShapeInfer;
 
 private:
@@ -788,7 +793,6 @@ private:
         return PD(*selected_desc_ptr, engine);
     }
 
-    void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
     enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
     ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
index ffd43ee8dbd..d7e9c05a6fe 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
@@ -18,7 +18,6 @@ public:
     operator bool() const;
     MKLDNNPrimitive& operator=(const std::shared_ptr<mkldnn::primitive>& primitive);
     mkldnn::primitive operator*();
-
     void reset(mkldnn::primitive* primitive);
 
 private:
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index e2f01e85cef..03f7700e7af 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -979,7 +979,7 @@ void MKLDNNConvolutionNode::prepareParams() {
         primArgs[DNNL_ARG_BIAS] = getBias();
     }
 
-    appendPostOpArgs(*pAttrLocal);
+    appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs);
 }
 
 void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 6a2c6332e38..f81e4601eab 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -13,34 +13,38 @@
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
 #include "utils/general_utils.h"
-#include <ngraph/opsets/opset1.hpp>
 #include <cpu/x64/cpu_isa_traits.hpp>
 #include <nodes/common/cpu_memcpy.h>
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "utils/cpu_utils.hpp"
 
+#include <ngraph/opsets/opset1.hpp>
+#include <utils/shape_inference/static_shape.hpp>
+#include <utils/shape_inference/shape_inference.hpp>
+#include <ie_ngraph_utils.hpp>
+#include "convolution_shape_inference.hpp"
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
 bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-
         if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
                 std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
             errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
             return false;
         }
-        size_t ndims = op->get_input_shape(0).size();
+        size_t ndims = op->get_input_partial_shape(0).rank().get_length();
         if ((ndims < 3) || (ndims > 5)) {
             errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
             return false;
         }
+        if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) {
+            errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs";
+            return false;
+        }
     } catch (...) {
         return false;
     }
@@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
 
         auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
         auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
-        const auto dataShape = op->get_input_shape(0);
-        weightDims = op->get_input_shape(1);
-        const auto outShape = op->get_shape();
-        OC = outShape[1];
-        IC = dataShape[1];
+        const auto& weightDims = getWeightDims();
 
         if (convBackprop) {
             algorithm = DeconvolutionCommon;
 
+            IC = weightDims[0];
+            OC = weightDims[1];
+
             groupNum = 1;
             withGroups = false;
 
@@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
             }
             paddingL = convBackprop->get_pads_begin();
             paddingR = convBackprop->get_pads_end();
+
+            outputPadding = convBackprop->get_output_padding();
+
+            autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
         } else if (groupConvBackprop) {
             algorithm = DeconvolutionGrouped;
 
             groupNum = weightDims[0];
+            IC = groupNum * weightDims[1];
+            OC = groupNum * weightDims[2];
+
             withGroups = groupNum > 1;
             isDW = withGroups && groupNum == OC && groupNum == IC;
 
@@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
             }
             paddingL = groupConvBackprop->get_pads_begin();
             paddingR = groupConvBackprop->get_pads_end();
+
+            outputPadding = groupConvBackprop->get_output_padding();
+
+            autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
         }
         for (int i = 0; i < dilation.size(); i++) {
             kernel.push_back(weightDims[withGroups + 2 + i]);
         }
+
+        externOutShape = inputShapes.size() == 3;
+        if (externOutShape && isDynamicNode()) {
+            bool isConstOutShape = ngraph::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+            if (isConstOutShape) {
+                lastOutputSpatialDims = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2))->cast_vector<int32_t>();
+            }
+            const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
+            if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) {
+                IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum;
+            }
+        }
     } else {
         IE_THROW(NotImplemented) << errorMessage;
     }
@@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
     auto const blbSize = blb->GetSize();
 
     // WA: In int8 case, we are processing weights using internal blob.
-    // So we disconnect constant node containing weights from the graph and then don't use it.
-    if (getParentEdges().size() == 3) {
-        removeEdge(getParentEdgeAt(2));
-        inputShapes.erase(inputShapes.begin() + 2);
-    }
-    removeEdge(getParentEdgeAt(1));
-    inputShapes.erase(inputShapes.begin() + 1);
-
     InferenceEngine::SizeVector dimsForBlockedDesc{dims};
     std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);
 
@@ -160,13 +178,16 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
     if (!withGroups && stride.back() > 3)
         return false;
     if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
-        auto inDims = getOutputShapeAtPort(0).getStaticDims();
+        const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
+        if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
+            return false;
+        }
         // heuristicConst = 2^26
         // heuristicParam = IC^2 * SP
         auto heuristicConst = 67108864;
         auto heuristicParam = IC * IC;
-        for (int i = 2; i < inDims.size(); i++)
-            heuristicParam *= inDims[i];
+        for (int i = 2; i < inMaxDims.size(); i++)
+            heuristicParam *= inMaxDims[i];
         if (heuristicParam > heuristicConst)
             return false;
     }
@@ -203,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
     return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
 }
 
-void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
-    if (!descs_fwd.empty() && !descs_bwd.empty())
-        return;
+void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inDims, const std::vector<int32_t>& outSpDims) {
+    std::vector<ov::StaticShape> input_shapes{inDims.getStaticDims(), getWeightDims()};
+    ov::StaticShape output_shape_input;
+    if (externOutShape) {
+        IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]);
+        input_shapes.push_back({outSpDims.size()});
+        for (size_t i = 0; i < outSpDims.size(); i++) {
+            output_shape_input.push_back(outSpDims[i]);
+        }
+    }
 
+    if (getAlgorithm() == DeconvolutionCommon) {
+        auto deconv = ngraph::as_type_ptr<ngraph::op::v1::ConvolutionBackpropData>(op);
+        IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2));
+    } else if (getAlgorithm() == DeconvolutionGrouped) {
+        auto deconv = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolutionBackpropData>(op);
+        IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3));
+    }
+}
+
+std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
+    auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
+    auto outShape = getOutputShapeAtPort(0);
+
+    if (isDynamicNode()) {
+        if (externOutShape) {
+            if (lastOutputSpatialDims.empty()) {
+                const auto& shape = getOutputShapeAtPort(0);
+                lastOutputSpatialDims.resize(shape.getRank() - 2);
+
+                const auto& minDims = shape.getMinDims();
+                const auto& maxDims = shape.getMaxDims();
+                const auto& dims = shape.getDims();
+                for (size_t i = 0; i < dims.size() - 2; ++i) {
+                    lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2],
+                                                                                              std::max(minDims[i + 2], static_cast<Dim>(64))) : dims[i + 2];
+                }
+            }
+            ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL;
+            ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR;
+
+            auto inputDims = inShape.getStaticDims();
+            const auto& weightDims = getWeightDims();
+            const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
+            for (size_t i = 0; i < inputDims.size() - 2; i++) {
+                inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
+                                    (weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) /
+                                    stride[i] + 1;
+            }
+
+            inShape = Shape(inputDims);
+        }
+        initPadding(opToShapeInfer, inShape, lastOutputSpatialDims);
+        outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
+    }
+    return {inShape.getStaticDims(), outShape.getStaticDims()};
+}
+
+void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     isInt8 = canBeExecutedInInt8();
 
     InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@@ -236,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
-    for (int i = 0; i < paddingR.size(); i++) {
-        int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
-        int krn = weightDims[with_group + 2 + i];
-        int src = getOutputShapeAtPort(0).getStaticDims()[2 + i];
-        int dst = getInputShapeAtPort(0).getStaticDims()[2 + i];
-
-        krn = (krn - 1)*(dilation[i] + 1) + 1;
-        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
-        paddingR[i] = (dst - calc_dst) * stride[i];
-    }
+    VectorDims inDims, outDims;
+    std::tie(inDims, outDims) = makeDummyInOutShape();
+    inShape = Shape(inDims);
+    Shape outShape(outDims);
+    initPaddingR(inShape, outShape);
 
     if (isInt8) {
+        int8WeightDims = getWeightDims();
         //  WA: if int8 deconvolution is supported, we create internal weights blob in IO format
-        std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
-        internalBlobs.push_back(createWeiBlobAsIO(weightDims));
+        std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]);
+        internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims));
         auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
         MemoryDescPtr in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, format);
         MemoryDescPtr out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType, format);
@@ -262,18 +334,31 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
             createDescriptor({in_candidate}, {out_candidate});
         }
     }
-    setPostOps(attr);
+    setPostOps(attr, outShape.getStaticDims());
 }
 
-void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
+void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
+    for (int i = 0; i < paddingR.size(); i++) {
+        int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
+        const auto& weightDims = getWeightDims();
+        int krn = weightDims[with_group + 2 + i];
+        int src = outShape.getStaticDims()[2 + i];
+        int dst = inShape.getStaticDims()[2 + i];
+
+        krn = (krn - 1)*(dilation[i] + 1) + 1;
+        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
+        paddingR[i] = (dst - calc_dst) * stride[i];
+    }
+}
+
+void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
     mkldnn::post_ops ops;
 
     auto getBinPostOpShape = [&](){
-        const auto outShape = getOutputShapeAtPort(0).getStaticDims();
         const auto outShapeRank = getOutputShapeAtPort(0).getRank();
         const auto chIdx = getFusingAxis();
         std::vector<size_t> binaryShape(outShapeRank, 1);
-        binaryShape[chIdx] = outShape[chIdx];
+        binaryShape[chIdx] = dims[chIdx];
         return binaryShape;
     };
 
@@ -282,7 +367,7 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
             // TODO [DS]: change to shape from memory
             constexpr int align = 16;
             // use legacy depthwise since backprop convolution does not support binary post ops
-            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
+            eltwiseNode->appendPostOps(ops, dims, align);
             continue;
         }
         if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
@@ -339,80 +424,277 @@ bool MKLDNNDeconvolutionNode::created() const {
     return getType() == Deconvolution;
 }
 
-void MKLDNNDeconvolutionNode::createPrimitive() {
-    if (prim)
-        return;
-
-    if (isInt8) {
-        auto prim_desc = createPrimitiveDescriptor<deconvolution_forward::primitive_desc,
-                deconvolution_forward::desc>(attr);
-
-        prim.reset(new deconvolution_forward(prim_desc));
-
-        auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}};
-    } else {
-        auto prim_desc = createPrimitiveDescriptor<convolution_backward_data::primitive_desc,
-                convolution_backward_data::desc, convolution_forward::primitive_desc>(attr);
-
-        prim.reset(new convolution_backward_data(prim_desc));
-
-        auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
-        auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
+bool MKLDNNDeconvolutionNode::needShapeInfer() const {
+    if (inputShapesModified()) {
+        return true;
+    }
+    if (externOutShape) {
+        if (lastOutputSpatialDims != readOutputSpatialDims()) {
+            return true;
+        }
     }
 
-    appendPostOpArgs(attr);
+    return false;
 }
 
-void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
-                                               const std::vector<MemoryDescPtr> &outputDesc) {
-    const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]);
-    const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]);
+std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
+    const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    std::vector<int32_t> outSpDims;
+    if (externOutShape) {
+        outSpDims = readOutputSpatialDims();
+    }
+    return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
+}
 
-    // grouping and autoblicking is not compatible
-    if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
-        return;
+VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
+    std::vector<ov::StaticShape> inputShapes = {
+            inDims,
+            getWeightDims()
+    };
 
+    std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> inputValues;
+
+    if (externOutShape) {
+        if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) {
+            IE_THROW() << "Can't compute output shape for node with name: " << getName()
+                       << ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect";
+        }
+        inputShapes.push_back({outSpDims.size()});
+        inputValues.insert({2, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32,
+                                                                              inputShapes.back().to_shape(),
+                                                                              outSpDims.data())});
+    }
+
+    std::vector<ov::StaticShape> outputShapes(1);
+    shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues);
+
+    return outputShapes.back().to_shape();
+}
+
+void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
+    if (!execPtr) {
+        IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
+    }
+    execPtr->exec(strm);
+
+    if (externOutShape) {
+        lastOutputSpatialDims = readOutputSpatialDims();
+    }
+}
+
+std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                                         const mkldnn::memory::desc& wghDesc,
+                                                                                         const mkldnn::memory::desc& dstDesc,
+                                                                                         bool isWinograd) const {
+    mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
+    std::shared_ptr<convolution_backward_data::desc> deconv_desc;
+    std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
+    std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg);
+    if (fwd_conv_pd->get(true) == nullptr) {
+        IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
+    }
+    return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
+}
+
+std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                                      const mkldnn::memory::desc& wghDesc,
+                                                                                      const mkldnn::memory::desc& dstDesc) const {
+    return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
+}
+
+void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
+                                               MKLDNNMemoryPtr srcMemPtr,
+                                               MKLDNNMemoryPtr wghMemPtr,
+                                               MKLDNNMemoryPtr dstMemPtr,
+                                               AttrPtr attr,
+                                               impl_desc_type selectedImpl) {
+    auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
+
+    while (static_cast<bool>(itpd)) {
+        impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
+
+        if (impl_type == selectedImpl) {
+            if (isInt8) {
+                if (internalBlobMemory.empty()) {
+                    prepareMemory(itpd);
+                }
+                auto prim_desc = deconvolution_forward::primitive_desc(itpd.get());
+                execPtr = std::make_shared<DeconvExecutorInt8>(prim_desc, srcMemPtr, internalBlobMemory.front(), dstMemPtr, *attr,
+                                                               binaryPostOpsArgs, getEngine());
+            } else {
+                auto prim_desc = convolution_backward_data::primitive_desc(itpd.get());
+                execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr,
+                                                                  binaryPostOpsArgs, getEngine());
+            }
+            return;
+        }
+
+        if (!itpd.next_impl()) {
+            auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
+                                                                                       memory::data_type::f32,
+                                                                                       memory::format_tag::any);
+            auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
+                                                                                        memory::data_type::f32,
+                                                                                        memory::format_tag::any);
+            auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
+                                                                                        memory::data_type::f32,
+                                                                                        memory::format_tag::any);
+
+            std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
+            auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
+            if (static_cast<bool>(anyDeconvItpd)) {
+                auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
+                execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr,
+                                                                  binaryPostOpsArgs, getEngine());
+                return;
+            }
+        }
+    }
+    IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
+}
+
+void MKLDNNDeconvolutionNode::prepareParams() {
+    auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Destination memory didn't allocate.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Input memory didn't allocate.";
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
+    if (selected_pd == nullptr)
+        IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
+
+    auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
+    auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
+
+    auto initPrimitiveAttr = [&]() {
+        mkldnn::primitive_attr attr;
+        setPostOps(attr, dstMemPtr->getStaticDims());
+        return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
+    };
+
+    AttrPtr pAttrLocal;
+
+    if (isDynamicNode()) {
+        if (!pAttr) {
+            pAttr = initPrimitiveAttr();
+        }
+        pAttrLocal = pAttr;
+        if (autoPad || externOutShape) {
+            initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector<int32_t>{});
+        }
+        initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
+    } else {
+        pAttrLocal = initPrimitiveAttr();
+    }
+
+    const auto in_candidate = inMemoryDesc->getDnnlDesc();
+    const auto out_candidate = outMemoryDesc->getDnnlDesc();
+
+    mkldnn::memory::desc wgh_candidate;
+    if (isInt8) {
+        if (internalBlobMemory.empty()) {
+            wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
+        } else {
+            wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
+        }
+    } else {
+        wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
+    }
+
+    std::shared_ptr<MKLDNNDescriptor> desc;
+    if (isInt8) {
+        desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
+    } else {
+        desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
+                                             selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd);
+    }
+
+    createDeconvPrim(desc, srcMemPtr, getParentEdgesAtPort(1)[0]->getMemoryPtr(), dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
+}
+
+void MKLDNNDeconvolutionNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
+MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
+                                                                                                     const mkldnn::memory::desc& wgh_candidate,
+                                                                                                     const mkldnn::memory::desc& out_candidate,
+                                                                                                     mkldnn::algorithm alg) const {
     auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
         return memory::dims(orig_dims.begin(), orig_dims.end());
     };
 
+    std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
+    conv_desc = std::make_shared<convolution_forward::desc>(prop_kind::forward_inference, alg,
+                                                            out_candidate, wgh_candidate, in_candidate,
+                                                            convertDims(stride),
+                                                            convertDims(dilation),
+                                                            convertDims(paddingL),
+                                                            convertDims(paddingR));
+
+    std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
+    deconv_desc = std::make_shared<convolution_backward_data::desc>(alg, out_candidate, wgh_candidate,
+                                                                    in_candidate,
+                                                                    convertDims(stride),
+                                                                    convertDims(dilation),
+                                                                    convertDims(paddingL),
+                                                                    convertDims(paddingR));
+
+    auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
+
+    return {deconv_desc, fwd_conv_pd};
+}
+
+MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
+                                                                                                   const mkldnn::memory::desc& wgh_candidate,
+                                                                                                   const mkldnn::memory::desc& out_candidate) const {
+    auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
+        return memory::dims(orig_dims.begin(), orig_dims.end());
+    };
+
+    MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
+    deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
+                                                                        in_candidate, wgh_candidate, out_candidate,
+                                                                        convertDims(stride), convertDims(dilation),
+                                                                        convertDims(paddingL), convertDims(paddingR));
+    return deconv_desc;
+}
+
+void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
+                                               const std::vector<MemoryDescPtr> &outputDesc) {
+    auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
+    auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
+    auto in_candidate = dnnlInDesc.getDnnlDesc();
+
+    auto outDesc = outputDesc[0];
+    if (!outDesc->isDefined()) {
+        const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims);
+        outDesc = outDesc->cloneWithNewDims(outShape);
+    }
+    auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc);
+    auto out_candidate = dnnlOutDesc.getDnnlDesc();
+
+    // grouping and autoblocking is not compatible
+    if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended()))
+        return;
+
     if (isInt8) {
-        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any);
-        std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
-        deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
-                                                          in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(),
-                                                          convertDims(stride), convertDims(dilation),
-                                                          convertDims(paddingL), convertDims(paddingR)));
-        descs.emplace_back(deconv_desc);
+        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
+        descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
     } else {
-        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any);
+        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
+                                           dnnlInDesc.getDataType(), memory::format_tag::any);
         for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
-            std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
-            conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
-                                                          out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(),
-                                                          convertDims(stride),
-                                                          convertDims(dilation),
-                                                          convertDims(paddingL),
-                                                          convertDims(paddingR)));
-
-            std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
-            deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate,
-                                                                  in_candidate.getDnnlDesc(),
-                                                                  convertDims(stride),
-                                                                  convertDims(dilation),
-                                                                  convertDims(paddingL),
-                                                                  convertDims(paddingR)));
-            descs_fwd.push_back(conv_desc);
-            descs_bwd.push_back(deconv_desc);
-
-            auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
+            std::shared_ptr<convolution_backward_data::desc> deconv_desc;
+            std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
+            std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg);
             if (fwd_conv_pd->get(true) == nullptr)
                 continue;
-
             descs.emplace_back(deconv_desc, fwd_conv_pd);
         }
     }
@@ -420,15 +702,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
 
 std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
     if (idx == 2) {
-        return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims()));
+        return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
+    } else if (idx > 0 && isInt8) {
+        // we need to store 'weight' input as edge,
+        // because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order
+        return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims()));
     }
 
     auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
+    if (getInputShapeAtPort(idx).isDynamic()) {
+        return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
+    }
     return MKLDNNExtensionUtils::makeDescriptor(desc);
 }
 
 std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
     auto desc =  isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
+    if (getOutputShapeAtPort(idx).isDynamic()) {
+        return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
+    }
     return MKLDNNExtensionUtils::makeDescriptor(desc);
 }
 
@@ -446,4 +738,117 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
     return getMaxPrecision(inputPrecisions);
 }
 
+MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(MKLDNNMemoryPtr memFrom,
+                                                                      const mkldnn::memory::desc& descTo,
+                                                                      const mkldnn::engine& engine) : m_memFrom(memFrom) {
+    m_memTo = std::make_shared<MKLDNNMemory>(engine);
+    m_memTo->Create(MKLDNNExtensionUtils::makeDescriptor(descTo));
+    m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive());
+}
+
+MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descFrom,
+                                                                      MKLDNNMemoryPtr memTo,
+                                                                      const mkldnn::engine& engine) : m_memTo(memTo) {
+    m_memFrom = std::make_shared<MKLDNNMemory>(engine);
+    m_memFrom->Create(MKLDNNExtensionUtils::makeDescriptor(descFrom));
+    m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive());
+}
+
+void MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::exec(mkldnn::stream strm) {
+    auto src = m_memFrom->GetPrimitive();
+    auto dst = m_memTo->GetPrimitive();
+    m_reorder.execute(strm, src, dst);
+}
+
+void MKLDNNDeconvolutionNode::DeconvExecutor::exec(mkldnn::stream strm) {
+    for (auto &inReorder : inputReorders) {
+        inReorder.exec(strm);
+    }
+    (*execPrim).execute(strm, primArgs);
+    for (auto &outReorder : outputReorders) {
+        outReorder.exec(strm);
+    }
+}
+
+MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
+                                                                      MKLDNNMemoryPtr inMem,
+                                                                      MKLDNNMemoryPtr weightMem,
+                                                                      MKLDNNMemoryPtr outMem,
+                                                                      const mkldnn::primitive_attr &attr,
+                                                                      const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
+                                                                      const mkldnn::engine& engine) {
+    execPrim.reset(new mkldnn::convolution_backward_data(pd));
+
+    if (inMem->GetPrimitive().get_desc() != pd.diff_dst_desc()) {
+        inputReorders.push_back(IntermReorder(inMem, pd.diff_dst_desc(), engine));
+        primArgs[DNNL_ARG_DIFF_DST] = inputReorders.back().getToMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_DIFF_DST] = inMem->GetPrimitive();
+    }
+
+    if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) {
+        inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine));
+        primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive();
+    }
+
+    if (outMem->GetPrimitive().get_desc() != pd.diff_src_desc()) {
+        outputReorders.push_back(IntermReorder(pd.diff_src_desc(), outMem, engine));
+        primArgs[DNNL_ARG_DIFF_SRC] = outputReorders.back().getFromMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_DIFF_SRC] = outMem->GetPrimitive();
+    }
+    MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs);
+}
+
+MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
+                                                                MKLDNNMemoryPtr inMem,
+                                                                MKLDNNMemoryPtr weightMem,
+                                                                MKLDNNMemoryPtr outMem,
+                                                                const mkldnn::primitive_attr &attr,
+                                                                const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
+                                                                const mkldnn::engine& engine) {
+    execPrim.reset(new mkldnn::deconvolution_forward(pd));
+
+    if (inMem->GetPrimitive().get_desc() != pd.src_desc()) {
+        inputReorders.push_back(IntermReorder(inMem, pd.src_desc(), engine));
+        primArgs[DNNL_ARG_SRC] = inputReorders.back().getToMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_SRC] = inMem->GetPrimitive();
+    }
+
+    if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) {
+        inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine));
+        primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive();
+    }
+
+    if (outMem->GetPrimitive().get_desc() != pd.dst_desc()) {
+        outputReorders.push_back(IntermReorder(pd.dst_desc(), outMem, engine));
+        primArgs[DNNL_ARG_DST] = outputReorders.back().getFromMem()->GetPrimitive();
+    } else {
+        primArgs[DNNL_ARG_DST] = outMem->GetPrimitive();
+    }
+    MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs);
+}
+
+std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
+    if (getParentEdges().size() < 3) {
+        IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
+    }
+    const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
+    if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) {
+        IE_THROW() << "'output_shape' input memory is not allocated.";
+    }
+    const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
+    if (shapeMemPtr->getStaticDims()[0] != spDimsNum) {
+        IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements";
+    }
+    const int32_t *outShapePtr = reinterpret_cast<const int32_t *>(shapeMemPtr->GetPtr());
+    std::vector<int32_t> outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]);
+    return outSpDims;
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index 32837b4d59c..6af4a3d35ed 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -13,6 +13,10 @@
 namespace MKLDNNPlugin {
 
 class MKLDNNDeconvolutionNode : public MKLDNNNode {
+    using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
+                                         std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
+    using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
+
 public:
     MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
@@ -39,27 +43,120 @@ public:
     static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     bool canFuse(const MKLDNNNodePtr& node) const override;
 
-    const InferenceEngine::SizeVector& getWeightDims() { return weightDims; }
-    const std::vector<ptrdiff_t>& getStride() { return stride; }
+    const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
+    const std::vector<ptrdiff_t>& getStride() const { return stride; }
+
+    void prepareParams() override;
+    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    bool needShapeInfer() const override;
+    std::vector<VectorDims> shapeInfer() const override;
 
 private:
+    class DeconvExecutor {
+        protected:
+            class IntermReorder {
+                public:
+                    IntermReorder(MKLDNNMemoryPtr memFrom, const mkldnn::memory::desc& descTo, const mkldnn::engine& engine);
+                    IntermReorder(const mkldnn::memory::desc& descFrom, MKLDNNMemoryPtr memTo, const mkldnn::engine& engine);
+                    MKLDNNMemoryPtr getFromMem() const { return m_memFrom; }
+                    MKLDNNMemoryPtr getToMem() const { return m_memTo; }
+                    void exec(mkldnn::stream strm);
+
+                private:
+                    MKLDNNMemoryPtr m_memFrom;
+                    MKLDNNMemoryPtr m_memTo;
+                    mkldnn::reorder m_reorder;
+            };
+
+        public:
+            void exec(mkldnn::stream strm);
+            virtual ~DeconvExecutor() = default;
+
+        protected:
+            DeconvExecutor() = default;
+            std::vector<IntermReorder> inputReorders;
+            MKLDNNPrimitive execPrim;
+            std::vector<IntermReorder> outputReorders;
+            std::unordered_map<int, mkldnn::memory> primArgs;
+    };
+
+    using executorPtr = std::shared_ptr<DeconvExecutor>;
+    executorPtr execPtr = nullptr;
+
+    class DeconvExecutorDefault : public DeconvExecutor {
+        public:
+            DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
+                                  MKLDNNMemoryPtr inMem,
+                                  MKLDNNMemoryPtr weightMem,
+                                  MKLDNNMemoryPtr outMem,
+                                  const mkldnn::primitive_attr &attr,
+                                  const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
+                                  const mkldnn::engine& engine);
+    };
+
+    class DeconvExecutorInt8 : public DeconvExecutor {
+        public:
+            DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
+                               MKLDNNMemoryPtr inMem,
+                               MKLDNNMemoryPtr weightMem,
+                               MKLDNNMemoryPtr outMem,
+                               const mkldnn::primitive_attr &attr,
+                               const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
+                               const mkldnn::engine& engine);
+    };
+
     bool withGroups = false;
     bool isDW = false;
     bool isInt8 = false;
+    bool autoPad = false;
+    bool externOutShape = false;
     size_t groupNum = 1;
     size_t IC;
     size_t OC;
     std::vector<ptrdiff_t> kernel;
     std::vector<ptrdiff_t> stride;
     std::vector<ptrdiff_t> dilation;
-    std::vector<ptrdiff_t> paddingL;
-    std::vector<ptrdiff_t> paddingR;
-    InferenceEngine::SizeVector weightDims;
-    std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
-    std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
+    ov::CoordinateDiff paddingL;
+    ov::CoordinateDiff paddingR;
+    ov::CoordinateDiff outputPadding;
+    std::vector<int32_t> lastOutputSpatialDims;
+    VectorDims int8WeightDims;
+
+    Shape inShape;
+
+    AttrPtr pAttr;
 
     mkldnn::primitive_attr attr;
-    void setPostOps(mkldnn::primitive_attr &attr);
+    void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims);
+
+    VectorDims shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const;
+    void initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inShape, const std::vector<int32_t>& outSpDims);
+    void initPaddingR(const Shape &inShape, const Shape &outShape);
+    std::vector<int32_t> readOutputSpatialDims() const;
+    std::pair<VectorDims, VectorDims> makeDummyInOutShape();
+
+    DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
+                                                       const mkldnn::memory::desc& wgh_candidate,
+                                                       const mkldnn::memory::desc& out_candidate,
+                                                       mkldnn::algorithm alg) const;
+    Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
+                                                const mkldnn::memory::desc& wgh_candidate,
+                                                const mkldnn::memory::desc& out_candidate) const;
+    std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                    const mkldnn::memory::desc& wghDesc,
+                                                                    const mkldnn::memory::desc& dstDesc,
+                                                                    bool isWinograd) const;
+    std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                 const mkldnn::memory::desc& wghDesc,
+                                                                 const mkldnn::memory::desc& dstDesc) const;
+
+    void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
+                          MKLDNNMemoryPtr srcMemPtr,
+                          MKLDNNMemoryPtr wghMemPtr,
+                          MKLDNNMemoryPtr dstMemPtr,
+                          AttrPtr attr,
+                          impl_desc_type selectedImpl);
 
     std::string errorPrefix;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
index 8eaea33af95..97c509083a3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -147,7 +147,7 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
     else
         primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}};
 
-    appendPostOpArgs(*attr);
+    appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs);
 }
 
 void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
index 944f65ff5f0..c443eedf2c2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
@@ -421,7 +421,7 @@ void MKLDNNMatMulNode::prepareParams() {
     if (withBiases)
         primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive();
 
-    appendPostOpArgs(*attr);
+    appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs);
 }
 
 void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) {
diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 55772b5938a..e9d0e22bc4e 100644
--- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -152,6 +152,7 @@ std::vector<std::string> disabledTestPatterns() {
         // bad accuracy
         R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos.
             *IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
+
         // Issue: 71121
         R"(.*smoke_Proposal*.*TS=\(2.*)",
         // TODO : CVS-69533
@@ -165,6 +166,9 @@ std::vector<std::string> disabledTestPatterns() {
         // Failure happened on win and macos for current seeds.
         R"(.*CTCLossLayerTest.*CMR=1.*)",
         R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)",
+        // Issue: 71756
+        R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)",
+        R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)"
     };
 
 #define FIX_62820 0
diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp
index d4a2f3d414f..52d50c6f07e 100755
--- a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp
@@ -5,38 +5,78 @@
 #include "test_utils/cpu_test_utils.hpp"
 #include "test_utils/convolution_params.hpp"
 #include "test_utils/fusing_test_utils.hpp"
-#include "shared_test_classes/base/layer_test_utils.hpp"
-#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
 #include "ngraph_functions/builders.hpp"
 #include <shared_test_classes/single_layer/convolution_backprop_data.hpp>
+#include "openvino/core/preprocess/pre_post_process.hpp"
 
-
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
-using LayerTestsDefinitions::convBackpropDataSpecificParams;
-using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet;
 
-typedef std::tuple<
-    convBackpropDataLayerTestParamsSet,
-    CPUSpecificParams,
-    fusingSpecificParams,
-    std::map<std::string, std::string> > deconvLayerCPUTestParamsSet;
+using DeconvSpecParams = LayerTestsDefinitions::convBackpropDataSpecificParams;
 
-class DeconvolutionLayerCPUTest : public testing::WithParamInterface<deconvLayerCPUTestParamsSet>,
-    virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
+using DeconvInputData = std::tuple<InputShape,                           // data shape
+                                   ngraph::helpers::InputLayerType,      // 'output_shape' input type
+                                   std::vector<std::vector<int32_t>>>;   // values for 'output_shape'
+
+using DeconvLayerCPUTestParamsSet = std::tuple<DeconvSpecParams,
+                                               DeconvInputData,
+                                               ElementType,
+                                               fusingSpecificParams,
+                                               CPUSpecificParams,
+                                               std::map<std::string, std::string>>;
+
+class DeconvolutionLayerCPUTest : public testing::WithParamInterface<DeconvLayerCPUTestParamsSet>,
+                                  virtual public SubgraphBaseTest, public CpuTestWithFusing {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<deconvLayerCPUTestParamsSet> obj) {
-        convBackpropDataLayerTestParamsSet basicParamsSet;
-        CPUSpecificParams cpuParams;
+    static std::string getTestCaseName(testing::TestParamInfo<DeconvLayerCPUTestParamsSet> obj) {
+        DeconvSpecParams basicParamsSet;
+        DeconvInputData inputData;
+        ElementType prec;
         fusingSpecificParams fusingParams;
+        CPUSpecificParams cpuParams;
         std::map<std::string, std::string> additionalConfig;
-        std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param;
+        std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = obj.param;
+
+        ngraph::op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation;
+        std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
+        size_t convOutChannels;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet;
+
+        InputShape inputShape;
+        ngraph::helpers::InputLayerType outShapeType;
+        std::vector<std::vector<int32_t>> outShapeData;
+        std::tie(inputShape, outShapeType, outShapeData) = inputData;
 
         std::ostringstream result;
-        result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet>(
-            basicParamsSet, 0));
+        result << "IS=";
+        result << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
+        result << "TS=";
+        for (const auto& shape : inputShape.second) {
+            result << "(";
+            result << CommonTestUtils::vec2str(shape);
+            result << ")_";
+        }
+        result << "PRC=" << prec << "_";
+        result << "K=" << CommonTestUtils::vec2str(kernel) << "_";
+        result << "S=" << CommonTestUtils::vec2str(stride) << "_";
+        result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_";
+        result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_";
+        result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+        result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_";
+        result << "O=" << convOutChannels << "_";
+        result << "AP=" << padType << "_";
+        result << "OUT_SH=" << outShapeType << "_";
+        result << "OUT_D=";
+        for (const auto& data : outShapeData) {
+            result << "(";
+            result << CommonTestUtils::vec2str(data);
+            result << ")_";
+        }
 
         result << CPUTestsBase::getTestCaseName(cpuParams);
         result << CpuTestWithFusing::getTestCaseName(fusingParams);
@@ -50,53 +90,159 @@ public:
 
         return result.str();
     }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::runtime::Tensor tensor;
+
+            if (i == 1) {
+                tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], outShapeData[inferRequestNum].data());
+            } else {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+            }
+
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+        inferRequestNum++;
+    }
+
+    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() == 1) {
+            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
+        } else {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
+    }
+
+    void validate() override {
+        if (function->get_parameters().size() == 2) {
+            auto pos = std::find_if(inputs.begin(), inputs.end(),
+                [](const std::pair<std::shared_ptr<ov::Node>, ov::runtime::Tensor> &params) {
+                    return params.first->get_friendly_name() == "param_1";
+                });
+            IE_ASSERT(pos != inputs.end());
+            inputs.erase(pos);
+        }
+        SubgraphBaseTest::validate();
+    }
+
+    void configure_model() override {
+        ov::preprocess::PrePostProcessor p(function);
+        {
+            auto& params = function->get_parameters();
+            for (size_t i = 0; i < params.size(); i++) {
+                if (i > 0) {
+                    continue;
+                }
+                if (inType != ov::element::Type_t::undefined) {
+                    p.input(i).tensor().set_element_type(inType);
+                }
+            }
+        }
+        {
+            auto results = function->get_results();
+            for (size_t i = 0; i < results.size(); i++) {
+                if (outType != ov::element::Type_t::undefined) {
+                    p.output(i).tensor().set_element_type(outType);
+                }
+            }
+        }
+        function = p.build();
+    }
+
+    std::shared_ptr<ov::Model> createGraph(const std::vector<ov::PartialShape>& inShapes, ngraph::helpers::InputLayerType outShapeType) {
+        auto params = ngraph::builder::makeDynamicParams(prec, {inShapes.front()});
+        std::shared_ptr<ov::Node> outShapeNode;
+        if (!outShapeData.empty()) {
+            if (outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
+                IE_ASSERT(inputDynamicShapes.size() == 2);
+                auto outShapeParam = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i32, inputDynamicShapes.back());
+                params.push_back(outShapeParam);
+                outShapeNode = outShapeParam;
+            } else {
+                outShapeNode = ngraph::opset8::Constant::create(ngraph::element::i32, {outShapeData[inferRequestNum].size()}, outShapeData[inferRequestNum]);
+            }
+        }
+
+        for (size_t i = 0; i < params.size(); i++) {
+            params[i]->set_friendly_name(std::string("param_") + std::to_string(i));
+        }
+
+        std::shared_ptr<ov::Node> deconv;
+        if (!outShapeData.empty()) {
+            IE_ASSERT(outShapeNode != nullptr);
+            deconv = ngraph::builder::makeConvolutionBackpropData(params[0], outShapeNode, prec, kernel, stride, padBegin,
+                                                                  padEnd, dilation, padType, convOutChannels);
+        } else {
+            deconv = ngraph::builder::makeConvolutionBackpropData(params[0], prec, kernel, stride, padBegin,
+                                                                  padEnd, dilation, padType, convOutChannels, false, outPadding);
+        }
+
+        return makeNgraphFunction(prec, params, deconv, "DeconvCPU");
+    }
+
 protected:
     InferenceEngine::SizeVector kernel, stride;
+
     void SetUp() override {
-        convBackpropDataLayerTestParamsSet basicParamsSet;
-        CPUSpecificParams cpuParams;
+        rel_threshold = 1e-4f;
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        DeconvSpecParams basicParamsSet;
+        DeconvInputData inputData;
         fusingSpecificParams fusingParams;
+        CPUSpecificParams cpuParams;
         std::map<std::string, std::string> additionalConfig;
-        std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam();
+        std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = this->GetParam();
+
+        InputShape inputShape;
+        ngraph::helpers::InputLayerType outShapeType;
+        std::tie(inputShape, outShapeType, outShapeData) = inputData;
 
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
-
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
-        convBackpropDataSpecificParams convParams;
-        std::vector<size_t> inputShape;
-        std::vector<size_t> outputShape;
-        auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
-        std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet;
 
-        if (inPrc == Precision::UNSPECIFIED) {
-            selectedType += std::string("_") + Precision(Precision::FP32).name();
+        if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
+            inType = outType = prec = ElementType::bf16;
+            rel_threshold = 1e-2f;
         } else {
-            selectedType += std::string("_") + inPrc.name();
+            inType = outType = prec;
         }
 
-        ngraph::op::PadType padType;
-        InferenceEngine::SizeVector dilation;
-        std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
-        size_t convOutChannels;
-        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convParams;
-        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        selectedType = makeSelectedTypeStr(selectedType, prec);
 
-        auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape });
-        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
-
-        auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin,
-                padEnd, dilation, padType, convOutChannels, false, outPadding);
-
-        if (!outputShape.empty()) {
-            auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape);
-            deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), outShape, ngPrc, kernel, stride, padBegin,
-                padEnd, dilation, padType, convOutChannels);
+        std::vector<InputShape> paramsShapes;
+        paramsShapes.push_back(inputShape);
+        if (!outShapeData.empty() && outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
+            const auto outShapeDims = ov::Shape{outShapeData.front().size()};
+            paramsShapes.push_back(InputShape{outShapeDims, std::vector<ov::Shape>(inputShape.second.size(), outShapeDims)});
         }
 
-        function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData");
+        init_input_shapes(paramsShapes);
+
+        function = createGraph(inputDynamicShapes, outShapeType);
     }
+
+private:
+    ElementType prec;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
+    size_t convOutChannels;
+    ngraph::helpers::InputLayerType outShapeType;
+    std::vector<std::vector<int32_t>> outShapeData;
+    size_t inferRequestNum = 0;
 };
 
 TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) {
@@ -113,7 +259,7 @@ TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) {
         }
     }
 
-    Run();
+    run();
     CheckPluginRelatedResults(executableNetwork, "Deconvolution");
 }
 
@@ -126,29 +272,29 @@ const std::vector<fusingSpecificParams> fusingParamsSet{
 };
 
 const std::map<std::string, std::string> cpuEmptyPluginConfig;
-const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };
-const std::vector<SizeVector> emptyOutputShape = { {} };
+const std::map<std::string, std::string>cpuBF16PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16,
+                                                                  InferenceEngine::PluginConfigParams::YES } };
 const std::vector<std::vector<ptrdiff_t>> emptyOutputPadding = { {} };
 
 /* ============= Deconvolution params (planar layout) ============= */
-const SizeVector numOutChannels_Planar = { 6 };
+const InferenceEngine::SizeVector numOutChannels_Planar = { 6 };
 
 /* ============= Deconvolution params (blocked layout) ============= */
-const SizeVector numOutChannels_Blocked = { 64 };
+const InferenceEngine::SizeVector numOutChannels_Blocked = { 64 };
 
 /* ============= Deconvolution params (2D) ============= */
-const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
-const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
+const std::vector<InferenceEngine::SizeVector> kernels2d = { {3, 3}, {1, 1} };
+const std::vector<InferenceEngine::SizeVector> strides2d = { {1, 1}, {2, 2} };
 const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
 const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
-const std::vector<SizeVector> dilations2d = { {1, 1} };
+const std::vector<InferenceEngine::SizeVector> dilations2d = { {1, 1} };
 
 /* ============= Deconvolution params (3D) ============= */
-const std::vector<SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
-const std::vector<SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
+const std::vector<InferenceEngine::SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
+const std::vector<InferenceEngine::SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
 const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0} };
 const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
-const std::vector<SizeVector> dilations3d = { {1, 1, 1} };
+const std::vector<InferenceEngine::SizeVector> dilations3d = { {1, 1, 1} };
 /* ============= */
 
 /* INSTANCES */
@@ -164,41 +310,99 @@ const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine(
     ::testing::ValuesIn(emptyOutputPadding)
 );
 
+const std::vector<DeconvInputData> Planar_2D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 12, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {9, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Planar_2D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    }
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Planar_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        convParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Planar_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        convParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
         ::testing::Values(cpuBF16PluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
-/* ============= GroupDeconvolution (Planar 3D) ============= */
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= Deconvolution (Planar 3D) ============= */
+const std::vector<DeconvInputData> Planar_3D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 12, 7, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15, 15}, {9, 10, 10}, {9, 9, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Planar_3D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15, 15}}
+    }
+};
+
 const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
     ::testing::ValuesIn(kernels3d),
     ::testing::ValuesIn(strides3d),
@@ -212,39 +416,71 @@ const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Planar_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        convParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Planar_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        convParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
         ::testing::Values(cpuBF16PluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
-/* ============= GroupDeconvolution (Blocked 2D) ============= */
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= Deconvolution (Blocked 2D) ============= */
+const std::vector<DeconvInputData> Blocked_2D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 67, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {9, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Blocked_2D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    }
+};
+
 const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
     ::testing::ValuesIn(kernels2d),
     ::testing::ValuesIn(strides2d),
@@ -258,92 +494,129 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Blocked_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        convParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Blocked_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        convParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
         ::testing::Values(cpuBF16PluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
-/* ============= GroupDeconvolution (Blocked 3D) ============= */
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= Deconvolution (Blocked 3D) ============= */
+const std::vector<DeconvInputData> Blocked_3D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 35, 7, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{7, 7, 7}, {7, 9, 7}}
+    }
+};
+
+const std::vector<DeconvInputData> Blocked_3D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{7, 7, 7}}
+    }
+};
+
 const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
     ::testing::ValuesIn(kernels3d),
     ::testing::ValuesIn(strides3d),
     ::testing::ValuesIn(padBegins3d),
     ::testing::ValuesIn(padEnds3d),
     ::testing::ValuesIn(dilations3d),
-    ::testing::ValuesIn(numOutChannels_Blocked),
+    ::testing::Values(32),
     ::testing::Values(ngraph::op::PadType::EXPLICIT),
     ::testing::ValuesIn(emptyOutputPadding)
 );
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Blocked_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        convParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_Blocked_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        convParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        convParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
         ::testing::Values(cpuBF16PluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= Kernel_1x1 (2D) ============= */
-
 const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
-        ::testing::Values(SizeVector({1, 1})),
-        ::testing::Values(SizeVector({1, 1})),
+        ::testing::Values(InferenceEngine::SizeVector({1, 1})),
+        ::testing::Values(InferenceEngine::SizeVector({1, 1})),
         ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
         ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
-        ::testing::Values(SizeVector({1, 1})),
+        ::testing::Values(InferenceEngine::SizeVector({1, 1})),
         ::testing::ValuesIn(numOutChannels_Blocked),
         ::testing::Values(ngraph::op::PadType::EXPLICIT),
         ::testing::ValuesIn(emptyOutputPadding)
@@ -351,39 +624,89 @@ const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_1x1_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Precision::UNSPECIFIED),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
+        convParams_ExplicitPadding_1x1_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
         ::testing::Values(cpuEmptyPluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            convParams_ExplicitPadding_1x1_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Precision::BF16),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
+        convParams_ExplicitPadding_1x1_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
         ::testing::Values(cpuBF16PluginConfig)),
     DeconvolutionLayerCPUTest::getTestCaseName);
 
-/* ========= */
+/* ============= Reorder + Deconvolution ============= */
+INSTANTIATE_TEST_SUITE_P(smoke_reorder_Deconv_2D, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        ::testing::Combine(::testing::ValuesIn(kernels2d),
+                           ::testing::Values(InferenceEngine::SizeVector{1, 1}),
+                           ::testing::ValuesIn(padBegins2d),
+                           ::testing::ValuesIn(padEnds2d),
+                           ::testing::ValuesIn(dilations2d),
+                           ::testing::ValuesIn(numOutChannels_Blocked),
+                           ::testing::Values(ngraph::op::PadType::EXPLICIT),
+                           ::testing::ValuesIn(emptyOutputPadding)),
+        ::testing::Values(DeconvInputData{InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+                                                     ngraph::helpers::InputLayerType::PARAMETER,
+                                                     {{15, 15}, {9, 10}, {9, 9}}}),
+        ::testing::Values(ElementType::f32),
+        ::testing::Values(emptyFusingSpec),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= Deconvolution auto padding tests ============= */
+const std::vector<DeconvInputData> inputs_2D_AutoPadding = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 67, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    },
+    DeconvInputData{
+        InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {9, 9}}
+    }
+};
+
+const auto deconvParams_AutoPadding_2D = ::testing::Combine(
+    ::testing::ValuesIn(kernels2d),
+    ::testing::ValuesIn(strides2d),
+    ::testing::ValuesIn(padBegins2d),
+    ::testing::ValuesIn(padEnds2d),
+    ::testing::ValuesIn(dilations2d),
+    ::testing::ValuesIn(numOutChannels_Blocked),
+    ::testing::Values(ngraph::op::PadType::SAME_UPPER, ngraph::op::PadType::SAME_LOWER),
+    ::testing::ValuesIn(emptyOutputPadding)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_AutoPadding_FP32, DeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        deconvParams_AutoPadding_2D,
+        ::testing::ValuesIn(inputs_2D_AutoPadding),
+        ::testing::Values(ElementType::f32),
+        ::testing::Values(emptyFusingSpec),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D, conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    DeconvolutionLayerCPUTest::getTestCaseName);
 
 } // namespace
+
 } // namespace CPULayerTestsDefinitions
diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp
index fb13870bc17..5ddfa950376 100755
--- a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp
@@ -2,38 +2,82 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <shared_test_classes/single_layer/group_convolution_backprop_data.hpp>
 #include "test_utils/cpu_test_utils.hpp"
 #include "test_utils/convolution_params.hpp"
 #include "test_utils/fusing_test_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include <shared_test_classes/single_layer/group_convolution_backprop_data.hpp>
+#include "openvino/core/preprocess/pre_post_process.hpp"
 
-using namespace InferenceEngine;
 using namespace CPUTestUtils;
+using namespace ov::test;
 
 namespace CPULayerTestsDefinitions {
 
-using groupConvBackpropDataLayerTestParamsSet = LayerTestsDefinitions::groupConvBackpropLayerTestParamsSet;
-using groupConvBackpropDataSpecificParams = LayerTestsDefinitions::groupConvBackpropSpecificParams;
+using GroupDeconvSpecParams = LayerTestsDefinitions::groupConvBackpropSpecificParams;
 
-typedef std::tuple<
-        groupConvBackpropDataLayerTestParamsSet,
-        CPUSpecificParams,
-        fusingSpecificParams,
-        std::map<std::string, std::string>> groupDeconvLayerCPUTestParamsSet;
+using DeconvInputData = std::tuple<InputShape,                           // data shape
+                                   ngraph::helpers::InputLayerType,      // 'output_shape' input type
+                                   std::vector<std::vector<int32_t>>>;   // values for 'output_shape'
 
-class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface<groupDeconvLayerCPUTestParamsSet>,
-                                     virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
+using GroupDeconvLayerCPUTestParamsSet = std::tuple<GroupDeconvSpecParams,
+                                                    DeconvInputData,
+                                                    ElementType,
+                                                    fusingSpecificParams,
+                                                    CPUSpecificParams,
+                                                    std::map<std::string, std::string>>;
+
+class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface<GroupDeconvLayerCPUTestParamsSet>,
+                                       virtual public SubgraphBaseTest, public CpuTestWithFusing {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<groupDeconvLayerCPUTestParamsSet> obj) {
-        groupConvBackpropDataLayerTestParamsSet basicParamsSet;
+    static std::string getTestCaseName(testing::TestParamInfo<GroupDeconvLayerCPUTestParamsSet> obj) {
+        GroupDeconvSpecParams basicParamsSet;
+        DeconvInputData inputData;
+        ElementType prec;
         CPUSpecificParams cpuParams;
         fusingSpecificParams fusingParams;
         std::map<std::string, std::string> additionalConfig;
-        std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param;
+        std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = obj.param;
+
+        ngraph::op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation;
+        std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
+        size_t convOutChannels, groupNum;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, groupNum, padType, outPadding) = basicParamsSet;
+
+        InputShape inputShape;
+        ngraph::helpers::InputLayerType outShapeType;
+        std::vector<std::vector<int32_t>> outShapeData;
+        std::tie(inputShape, outShapeType, outShapeData) = inputData;
 
         std::ostringstream result;
-        result << LayerTestsDefinitions::GroupConvBackpropLayerTest::getTestCaseName(testing::TestParamInfo<groupConvBackpropDataLayerTestParamsSet>(
-                basicParamsSet, 0));
+        result << "IS=";
+        result << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
+        result << "TS=";
+        for (const auto& shape : inputShape.second) {
+            result << "(";
+            result << CommonTestUtils::vec2str(shape);
+            result << ")_";
+        }
+        result << "PRC=" << prec << "_";
+        result << "K=" << CommonTestUtils::vec2str(kernel) << "_";
+        result << "S=" << CommonTestUtils::vec2str(stride) << "_";
+        result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_";
+        result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_";
+        result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+        result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_";
+        result << "O=" << convOutChannels << "_";
+        result << "G=" << groupNum << "_";
+        result << "AP=" << padType << "_";
+        result << "OUT_SH=" << outShapeType << "_";
+        result << "OUT_D=";
+        for (const auto& data : outShapeData) {
+            result << "(";
+            result << CommonTestUtils::vec2str(data);
+            result << ")_";
+        }
 
         result << CPUTestsBase::getTestCaseName(cpuParams);
         result << CpuTestWithFusing::getTestCaseName(fusingParams);
@@ -48,54 +92,159 @@ public:
         return result.str();
     }
 
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::runtime::Tensor tensor;
+
+            if (i == 1) {
+                tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], outShapeData[inferRequestNum].data());
+            } else {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
+            }
+
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+        inferRequestNum++;
+    }
+
+    void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        if (function->get_parameters().size() == 1) {
+            ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
+        } else {
+            // WA: output_shape depends on 3rd deconvolution input data
+            // but the reference implementation doesn't implement shape inference
+            // so we need to build a new ngraph function and replace the 3rd input parameter with a constant
+            // to get valid output shapes
+            funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
+        }
+    }
+
+    void validate() override {
+        if (function->get_parameters().size() == 2) {
+            auto pos = std::find_if(inputs.begin(), inputs.end(),
+                [](const std::pair<std::shared_ptr<ov::Node>, ov::runtime::Tensor> &params) {
+                    return params.first->get_friendly_name() == "param_1";
+                });
+            IE_ASSERT(pos != inputs.end());
+            inputs.erase(pos);
+        }
+        SubgraphBaseTest::validate();
+    }
+
+    void configure_model() override {
+        ov::preprocess::PrePostProcessor p(function);
+        {
+            auto& params = function->get_parameters();
+            for (size_t i = 0; i < params.size(); i++) {
+                if (i > 0) {
+                    continue;
+                }
+                if (inType != ov::element::Type_t::undefined) {
+                    p.input(i).tensor().set_element_type(inType);
+                }
+            }
+        }
+        {
+            auto results = function->get_results();
+            for (size_t i = 0; i < results.size(); i++) {
+                if (outType != ov::element::Type_t::undefined) {
+                    p.output(i).tensor().set_element_type(outType);
+                }
+            }
+        }
+        function = p.build();
+    }
+
+    std::shared_ptr<ov::Model> createGraph(const std::vector<ov::PartialShape>& inShapes, ngraph::helpers::InputLayerType outShapeType) {
+        auto params = ngraph::builder::makeDynamicParams(prec, {inShapes.front()});
+        std::shared_ptr<ov::Node> outShapeNode;
+        if (!outShapeData.empty()) {
+            if (outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
+                IE_ASSERT(inputDynamicShapes.size() == 2);
+                auto outShapeParam = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i32, inputDynamicShapes.back());
+                params.push_back(outShapeParam);
+                outShapeNode = outShapeParam;
+            } else {
+                outShapeNode = ngraph::opset8::Constant::create(ngraph::element::i32, {outShapeData[inferRequestNum].size()}, outShapeData[inferRequestNum]);
+            }
+        }
+
+        for (size_t i = 0; i < params.size(); i++) {
+            params[i]->set_friendly_name(std::string("param_") + std::to_string(i));
+        }
+
+        std::shared_ptr<ov::Node> deconv;
+        if (!outShapeData.empty()) {
+            IE_ASSERT(outShapeNode != nullptr);
+            deconv = ngraph::builder::makeGroupConvolutionBackpropData(params[0], outShapeNode, prec, kernel, stride, padBegin,
+                                                                       padEnd, dilation, padType, convOutChannels, groupNum);
+        } else {
+            deconv = ngraph::builder::makeGroupConvolutionBackpropData(params[0], prec, kernel, stride, padBegin,
+                                                                       padEnd, dilation, padType, convOutChannels, groupNum, false, outPadding);
+        }
+
+        return makeNgraphFunction(prec, params, deconv, "GroupDeconvCPU");
+    }
+
 protected:
     InferenceEngine::SizeVector kernel, stride;
+
     void SetUp() override {
-        groupConvBackpropDataLayerTestParamsSet basicParamsSet;
+        rel_threshold = 1e-4f;
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        GroupDeconvSpecParams basicParamsSet;
+        DeconvInputData inputData;
         CPUSpecificParams cpuParams;
         fusingSpecificParams fusingParams;
         std::map<std::string, std::string> additionalConfig;
-        std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam();
+        std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = this->GetParam();
 
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
-
-        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
-        groupConvBackpropDataSpecificParams groupConvParams;
-        std::vector<size_t> inputShape, outputShape;
-        auto netPrecision   = InferenceEngine::Precision::UNSPECIFIED;
-        std::tie(groupConvParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, groupNum,  padType, outPadding) = basicParamsSet;
 
-        if (inPrc == Precision::UNSPECIFIED) {
-            selectedType += std::string("_") + Precision(Precision::FP32).name();
+        InputShape inputShape;
+        ngraph::helpers::InputLayerType outShapeType;
+        std::tie(inputShape, outShapeType, outShapeData) = inputData;
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
+            inType = outType = prec = ElementType::bf16;
+            rel_threshold = 1e-2f;
         } else {
-            selectedType += std::string("_") + inPrc.name();
+            inType = outType = prec;
         }
 
-        ngraph::op::PadType padType;
-        InferenceEngine::SizeVector dilation;
-        std::vector<ptrdiff_t> padBegin, padEnd, outputPadding;
-        size_t convOutChannels, numGroups;
-        std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, outputPadding) = groupConvParams;
+        selectedType = makeSelectedTypeStr(selectedType, prec);
 
-        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
-        auto paramOuts = ngraph::helpers::convert2OutputVector(
-                ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-        std::shared_ptr<ngraph::op::v1::GroupConvolutionBackpropData> groupConv;
-        if (!outputShape.empty()) {
-            auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape);
-            groupConv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
-            ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin,
-                                                            padEnd, dilation, padType, convOutChannels, numGroups, false, outputPadding));
-        } else {
-            groupConv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
-            ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin,
-                                                padEnd, dilation, padType, convOutChannels, numGroups, false, outputPadding));
+        std::vector<InputShape> paramsShapes;
+        paramsShapes.push_back(inputShape);
+        if (!outShapeData.empty() && outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
+            const auto outShapeDims = ov::Shape{outShapeData.front().size()};
+            paramsShapes.push_back(InputShape{outShapeDims, std::vector<ov::Shape>(inputShape.second.size(), outShapeDims)});
         }
-        function = makeNgraphFunction(ngPrc, params, groupConv, "groupConvolutionBackpropData");
+
+        init_input_shapes(paramsShapes);
+
+        function = createGraph(inputDynamicShapes, outShapeType);
     }
+
+private:
+    ElementType prec;
+    ngraph::op::PadType padType;
+    InferenceEngine::SizeVector dilation;
+    std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
+    size_t convOutChannels, groupNum;
+    ngraph::helpers::InputLayerType outShapeType;
+    std::vector<std::vector<int32_t>> outShapeData;
+    size_t inferRequestNum = 0;
 };
 
 TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) {
@@ -112,75 +261,80 @@ TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) {
         }
     }
 
-    Run();
+    run();
     CheckPluginRelatedResults(executableNetwork, "Deconvolution");
 }
 
 namespace {
 
-/* GROUP CONV TEST UTILS */
-std::vector<groupDeconvLayerCPUTestParamsSet> filterParamsSetForDevice(std::vector<groupDeconvLayerCPUTestParamsSet> paramsSet) {
-    std::vector<groupDeconvLayerCPUTestParamsSet> resParamsSet;
-    const int cpuParamsIndex = 1;
-    const int selectedTypeIndex = 3;
-
-    for (auto param : paramsSet) {
-        auto cpuParams = std::get<cpuParamsIndex>(param);
-        auto selectedTypeStr = std::get<selectedTypeIndex>(cpuParams);
-
-        if (selectedTypeStr.find("jit") != std::string::npos && !with_cpu_x86_sse42())
-            continue;
-        if (selectedTypeStr.find("avx512") != std::string::npos && !with_cpu_x86_avx512f())
-            continue;
-
-        resParamsSet.push_back(param);
-    }
-
-    return resParamsSet;
-}
-/* ===================== */
-
 /* COMMON PARAMS */
 std::vector<fusingSpecificParams> fusingParamsSet {
         emptyFusingSpec,
         fusingScaleShift,
 };
 const std::map<std::string, std::string> cpuEmptyPluginConfig;
-const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };
+const std::map<std::string, std::string> cpuBF16PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16,
+                                                                   InferenceEngine::PluginConfigParams::YES } };
 
 const std::vector<std::vector<size_t >> emptyOutputShape = {{}};
 const std::vector<std::vector<ptrdiff_t>> emptyOutputPadding = {{}};
 
 /* ============= GroupConvolution params (planar layout) ============= */
-const SizeVector numOutChannels_Planar = {6};
-const SizeVector numGroups_Planar = {2, 3};
+const InferenceEngine::SizeVector numOutChannels_Planar = {6};
+const InferenceEngine::SizeVector numGroups_Planar = {2, 3};
 
 /* ============= GroupConvolution params (blocked layout) ============= */
-const SizeVector numOutChannels_Blocked = {64};
-const SizeVector numGroups_Blocked = {2, 4};
+const InferenceEngine::SizeVector numOutChannels_Blocked = {64};
+const InferenceEngine::SizeVector numGroups_Blocked = {2, 4};
 
 /* ============= GroupConvolution params (DW) ============= */
-const SizeVector numOutChannels_DW = {32};
-const SizeVector numGroups_DW = {32};
+const InferenceEngine::SizeVector numOutChannels_DW = {32};
+const InferenceEngine::SizeVector numGroups_DW = {32};
 
 /* ============= GroupConvolution params (2D) ============= */
-const std::vector<SizeVector> kernels2d = {{3, 3}, {1, 1}};
-const std::vector<SizeVector> strides2d = {{1, 1}, {2, 2}};
+const std::vector<InferenceEngine::SizeVector> kernels2d = {{3, 3}, {1, 1}};
+const std::vector<InferenceEngine::SizeVector> strides2d = {{1, 1}, {2, 2}};
 const std::vector<std::vector<ptrdiff_t>> padBegins2d = {{0, 0}};
 const std::vector<std::vector<ptrdiff_t>> padEnds2d = {{0, 0}};
-const std::vector<SizeVector> dilations2d = {{1, 1}};
+const std::vector<InferenceEngine::SizeVector> dilations2d = {{1, 1}};
 
 /* ============= GroupConvolution params (3D) ============= */
-const std::vector<SizeVector> kernels3d = {{3, 3, 3}, {1, 1, 1}};
-const std::vector<SizeVector> strides3d = {{1, 1, 1}, {2, 2, 2}};
+const std::vector<InferenceEngine::SizeVector> kernels3d = {{3, 3, 3}, {1, 1, 1}};
+const std::vector<InferenceEngine::SizeVector> strides3d = {{1, 1, 1}, {2, 2, 2}};
 const std::vector<std::vector<ptrdiff_t>> padBegins3d = {{0, 0, 0}};
 const std::vector<std::vector<ptrdiff_t>> padEnds3d = {{0, 0, 0}};
-const std::vector<SizeVector> dilations3d = {{1, 1, 1}};
+const std::vector<InferenceEngine::SizeVector> dilations3d = {{1, 1, 1}};
 /* ============= */
 
 
 /* INSTANCES */
 /* ============= GroupConvolution (Planar 2D) ============= */
+const std::vector<DeconvInputData> Planar_2D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 12, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {9, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Planar_2D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    }
+};
+
 const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine(
         ::testing::ValuesIn(kernels2d),
         ::testing::ValuesIn(strides2d),
@@ -195,39 +349,71 @@ const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Planar_FP32, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Planar_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        groupConvParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Planar_BF16, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Planar_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        groupConvParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Planar_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Planar_BF16, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Planar_2D,
+        ::testing::ValuesIn(Planar_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
         ::testing::Values(cpuBF16PluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= GroupConvolution (Planar 3D) ============= */
+const std::vector<DeconvInputData> Planar_3D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 12, 7, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15, 15}, {9, 10, 10}, {9, 9, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Planar_3D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15, 15}}
+    }
+};
+
 const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine(
         ::testing::ValuesIn(kernels3d),
         ::testing::ValuesIn(strides3d),
@@ -242,39 +428,71 @@ const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Planar_FP32, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Planar_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        groupConvParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Planar_BF16, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Planar_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        groupConvParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Planar_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Planar_BF16, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Planar_3D,
+        ::testing::ValuesIn(Planar_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
         ::testing::Values(cpuBF16PluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= GroupConvolution (Blocked 2D) ============= */
+const std::vector<DeconvInputData> Blocked_2D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 64, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 5}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {19, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> Blocked_2D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    }
+};
+
 const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
         ::testing::ValuesIn(kernels2d),
         ::testing::ValuesIn(strides2d),
@@ -289,39 +507,71 @@ const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Blocked_FP32, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Blocked_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        groupConvParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Blocked_BF16, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Blocked_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        groupConvParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Blocked_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Blocked_BF16, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Blocked_2D,
+        ::testing::ValuesIn(Blocked_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
         ::testing::Values(cpuBF16PluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= GroupConvolution (Blocked 3D) ============= */
+const std::vector<DeconvInputData> Blocked_3D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 64, 7, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{7, 7, 7}, {7, 9, 7}}
+    }
+};
+
+const std::vector<DeconvInputData> Blocked_3D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{7, 7, 7}}
+    }
+};
+
 const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
         ::testing::ValuesIn(kernels3d),
         ::testing::ValuesIn(strides3d),
@@ -336,39 +586,71 @@ const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Blocked_FP32, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Blocked_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 64, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        groupConvParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Blocked_BF16, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_Blocked_3D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 64, 7, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        groupConvParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Blocked_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Blocked_BF16, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_Blocked_3D,
+        ::testing::ValuesIn(Blocked_3D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
         ::testing::Values(cpuBF16PluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 /* ============= GroupConvolution (DW 2D) ============= */
+const std::vector<DeconvInputData> dw_2D_inputs_smoke = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 32, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{7, 7}, {7, 9}}
+    }
+};
+
+const std::vector<DeconvInputData> dw_2D_inputs_nightly = {
+    DeconvInputData{
+        InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{7, 7}}
+    }
+};
+
 const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
         ::testing::ValuesIn(kernels2d),
         ::testing::ValuesIn(strides2d),
@@ -383,37 +665,111 @@ const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_DW_FP32, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_DW_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 32, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
+        groupConvParams_ExplicitPadding_DW_2D,
+        ::testing::ValuesIn(dw_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
         ::testing::Values(cpuEmptyPluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_DW_BF16, GroupDeconvolutionLayerCPUTest,
     ::testing::Combine(
-        ::testing::Combine(
-            groupConvParams_ExplicitPadding_DW_2D,
-            ::testing::Values(Precision::FP32),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Precision::BF16),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(InferenceEngine::Layout::ANY),
-            ::testing::Values(std::vector<size_t >({ 2, 32, 7, 7 })),
-            ::testing::ValuesIn(emptyOutputShape),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
+        groupConvParams_ExplicitPadding_DW_2D,
+        ::testing::ValuesIn(dw_2D_inputs_smoke),
+        ::testing::Values(ElementType::f32),
         ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
         ::testing::Values(cpuBF16PluginConfig)),
     GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_DW_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_DW_2D,
+        ::testing::ValuesIn(dw_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_DW_BF16, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupConvParams_ExplicitPadding_DW_2D,
+        ::testing::ValuesIn(dw_2D_inputs_nightly),
+        ::testing::Values(ElementType::f32),
+        ::testing::ValuesIn(fusingParamsSet),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})),
+        ::testing::Values(cpuBF16PluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= Reorder + GroupDeconvolution ============= */
+INSTANTIATE_TEST_SUITE_P(smoke_reorder_GroupDeconv_2D, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        ::testing::Combine(::testing::ValuesIn(kernels2d),
+                           ::testing::Values(InferenceEngine::SizeVector{1, 1}),
+                           ::testing::ValuesIn(padBegins2d),
+                           ::testing::ValuesIn(padEnds2d),
+                           ::testing::ValuesIn(dilations2d),
+                           ::testing::ValuesIn(numOutChannels_Blocked),
+                           ::testing::ValuesIn(numGroups_Blocked),
+                           ::testing::Values(ngraph::op::PadType::EXPLICIT),
+                           ::testing::ValuesIn(emptyOutputPadding)),
+        ::testing::Values(DeconvInputData{InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}},
+                                                     ngraph::helpers::InputLayerType::PARAMETER,
+                                                     {{15, 15}, {9, 10}, {9, 9}}}),
+        ::testing::Values(ElementType::f32),
+        ::testing::Values(emptyFusingSpec),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
+/* ============= GroupDeconvolution auto padding tests ============= */
+const std::vector<DeconvInputData> inputs_2D_AutoPadding = {
+    DeconvInputData{
+        InputShape{{}, {{ 2, 64, 7, 7 }}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}},
+        ngraph::helpers::InputLayerType::CONSTANT,
+        {{15, 15}}
+    },
+    DeconvInputData{
+        InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 5}}},
+        ngraph::helpers::InputLayerType::PARAMETER,
+        {{15, 15}, {9, 10}, {19, 9}}
+    }
+};
+
+const auto groupDeconvParams_AutoPadding_2D = ::testing::Combine(
+        ::testing::ValuesIn(kernels2d),
+        ::testing::ValuesIn(strides2d),
+        ::testing::ValuesIn(padBegins2d),
+        ::testing::ValuesIn(padEnds2d),
+        ::testing::ValuesIn(dilations2d),
+        ::testing::ValuesIn(numOutChannels_Blocked),
+        ::testing::ValuesIn(numGroups_Blocked),
+        ::testing::Values(ngraph::op::PadType::SAME_UPPER, ngraph::op::PadType::SAME_LOWER),
+        ::testing::ValuesIn(emptyOutputPadding)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_AutoPadding_FP32, GroupDeconvolutionLayerCPUTest,
+    ::testing::Combine(
+        groupDeconvParams_AutoPadding_2D,
+        ::testing::ValuesIn(inputs_2D_AutoPadding),
+        ::testing::Values(ElementType::f32),
+        ::testing::Values(emptyFusingSpec),
+        ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D, conv_avx512_2D})),
+        ::testing::Values(cpuEmptyPluginConfig)),
+    GroupDeconvolutionLayerCPUTest::getTestCaseName);
+
 } // namespace
 
-} // namespace CPULayerTestsDefinitions
+} // namespace CPULayerTestsDefinitions
\ No newline at end of file
diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
index 851a2dad329..d6cf3cfd997 100644
--- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp
@@ -55,6 +55,8 @@ void SubgraphBaseTest::run() {
             try {
                 if (!inputDynamicShapes.empty()) {
                     // resize ngraph function according new target shape
+                    // Note: output shapes of some nodes depend on the input data
+                    // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes
                     init_ref_function(functionRefs, targetStaticShapeVec);
                 }
                 generate_inputs(targetStaticShapeVec);
diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp
index 91d6c0fc085..0280895ba88 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp
@@ -24,8 +24,8 @@ std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in
                                                   const std::vector<float> &filterWeights,
                                                   const std::vector<float> &biasesWeights) {
     bool randomFilterWeights = filterWeights.empty();
-    auto shape = in.get_shape();
-    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    auto shape = in.get_partial_shape();
+    std::vector<size_t> filterWeightsShape = {static_cast<size_t>(shape[1].get_length()), numOutChannels};
     filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
     auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
 
@@ -74,8 +74,8 @@ std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in
                                                   const std::vector<float> &filterWeights,
                                                   const std::vector<float> &biasesWeights) {
     bool randomFilterWeights = filterWeights.empty();
-    auto shape = in.get_shape();
-    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    auto shape = in.get_partial_shape();
+    std::vector<size_t> filterWeightsShape = {static_cast<size_t>(shape[1].get_length()), numOutChannels};
     filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
     auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
 
diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp
index 84c141f8a20..e7642633e92 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp
@@ -25,8 +25,8 @@ std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node
                                                        const std::vector<float> &filterWeights,
                                                        const std::vector<float> &biasesWeights) {
     bool randomFilterWeights = filterWeights.empty();
-    auto shape = in.get_shape();
-    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    auto shape = in.get_partial_shape();
+    std::vector<size_t> filterWeightsShape = {static_cast<size_t>(shape[1].get_length()), numOutChannels};
     if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups)
         throw std::runtime_error("incorrect shape for GroupConvolutionBackpropData");
     filterWeightsShape[0] /= numGroups;
@@ -81,8 +81,8 @@ std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node
                                                        const std::vector<float> &filterWeights,
                                                        const std::vector<float> &biasesWeights) {
     bool randomFilterWeights = filterWeights.empty();
-    auto shape = in.get_shape();
-    std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+    auto shape = in.get_partial_shape();
+    std::vector<size_t> filterWeightsShape = {static_cast<size_t>(shape[1].get_length()), numOutChannels};
     if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups)
         throw std::runtime_error("incorrect shape for GroupConvolutionBackpropData");
     filterWeightsShape[0] /= numGroups;
diff --git a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp
index c4ff9feb0cb..4e25879f191 100644
--- a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp
+++ b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp
@@ -541,7 +541,7 @@ TEST(cloneWithParamsChange, UndefinedAndDefaultParams) {
     }
 }
 
-TEST(makeDummyDesc, LowerBoundMoreThenDummyValie) {
+TEST(makeDummyDesc, LowerBoundMoreThanDummyValue) {
     Shape shape(ngraph::PartialShape{1, 3, 85, {144, 1444}});
     auto desc = std::make_shared<DnnlBlockedMemoryDesc>(shape, mkldnn::memory::data_type::f32, mkldnn::memory::format_tag::nchw);
     ASSERT_FALSE(desc->isDefined());

From 58be795970b3a32107c027d14a77185f0adc10ac Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Thu, 16 Dec 2021 10:24:57 +0300
Subject: [PATCH 18/27] [CPU] Added BF16 support for CumSum as well (#9030)

---
 .../nodes/mkldnn_cum_sum_node.cpp             |  48 ++---
 .../mkldnn_plugin/nodes/mkldnn_cum_sum_node.h |   7 +
 .../plugin/cpu/single_layer_tests/cum_sum.cpp | 183 ++++++------------
 3 files changed, 75 insertions(+), 163 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
index 68fd40d9a56..a99e30a1db0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
@@ -12,6 +12,7 @@
 #include "ie_precision.hpp"
 #include <ie_ngraph_utils.hpp>
 #include "mkldnn_cum_sum_node.h"
+#include "utils/bfloat16.hpp"
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
         return;
 
     dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA);
-    if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
-        dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+    if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64))
         IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
 
     if (inputShapes.size() == numOfInputs) {
@@ -95,43 +95,17 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
     if (inputShapes.size() == numOfInputs)
         axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
 
-    switch (dataPrecision) {
-        case Precision::I8   : {
-            exec<int8_t>();
-            break;
-        }
-        case Precision::U8   : {
-            exec<uint8_t>();
-            break;
-        }
-        case Precision::I16  : {
-            exec<int16_t>();
-            break;
-        }
-        case Precision::I32  : {
-            exec<int32_t>();
-            break;
-        }
-        case Precision::FP32 : {
-            exec<float>();
-            break;
-        }
-        case Precision::I64  : {
-            exec<int64_t>();
-            break;
-        }
-        case Precision::U64  : {
-            exec<uint64_t>();
-            break;
-        }
-        default : {
-            std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name();
-            IE_THROW() << errorMsg;
-        }
-    }
+    OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision,
+              OV_CASE(Precision::I8, int8_t),
+              OV_CASE(Precision::U8, uint8_t),
+              OV_CASE(Precision::I16, int16_t),
+              OV_CASE(Precision::BF16, bfloat16_t),
+              OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::FP32, float),
+              OV_CASE(Precision::I64, int64_t),
+              OV_CASE(Precision::U64, uint64_t))
 }
 
-
 template <typename dataType>
 void MKLDNNCumSumNode::exec() {
     const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
index 2e5ebfaf7d8..f917a53ef34 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
@@ -47,6 +47,13 @@ private:
 
     InferenceEngine::Precision dataPrecision;
     std::string errorPrefix;
+
+    template<typename T>
+    struct CumSumExecute {
+        void operator()(MKLDNNCumSumNode* node) {
+            node->exec<T>();
+        }
+    };
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp
index 90a1d12b5df..bd84c142971 100644
--- a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp
@@ -9,57 +9,55 @@
 using namespace ngraph;
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
+using namespace ov;
+using namespace test;
 
 namespace CPULayerTestsDefinitions {
 
-using cumSumShape = std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>;
 using cumSumParams = std::tuple<
     ngraph::element::Type, // data precision
-    cumSumShape, // input shape
+    InputShape, // input shape
     std::int64_t, // axis
     bool, // exclusive
     bool>; // reverse
 
-class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>, public ov::test::SubgraphBaseTest, public CPUTestsBase {
+class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>,
+                           public SubgraphBaseTest, public CPUTestsBase {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<cumSumParams> obj) {
         ngraph::element::Type inputPrecision;
-        std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> shapes;
+        InputShape shapes;
         std::int64_t axis;
         bool exclusive;
         bool reverse;
         std::tie(inputPrecision, shapes, axis, exclusive, reverse) = obj.param;
 
-        std::ostringstream result;
-        result << inputPrecision << "_" << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_" << "TS=";
-        for (const auto& shape : shapes.second) {
-            result << "(";
-            for (const auto& item : shape) {
-                result << CommonTestUtils::vec2str(item) << "_";
-            }
-            result << ")_";
+        std::ostringstream results;
+        results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
+        results << "TS=";
+        for (const auto& item : shapes.second) {
+            results << CommonTestUtils::vec2str(item) << "_";
         }
-
-        result << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
-        return result.str();
+        results << "Prc=" << inputPrecision << "_";
+        results << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
+        return results.str();
     }
 
 protected:
     void SetUp() override {
         targetDevice = CommonTestUtils::DEVICE_CPU;
-        ngraph::element::Type inputPrecision;
-        std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> shapes;
+        InputShape shapes;
         std::int64_t axis;
         bool exclusive;
         bool reverse;
-        std::tie(inputPrecision, shapes, axis, exclusive, reverse) = this->GetParam();
+        std::tie(inType, shapes, axis, exclusive, reverse) = this->GetParam();
+        if (inType == ElementType::bf16)
+            rel_threshold = 0.05f;
 
-        for (size_t i = 0; i < shapes.second.size(); i++) {
-            targetStaticShapes.push_back(shapes.second[i]);
-        }
-        inputDynamicShapes = shapes.first;
+        selectedType = makeSelectedTypeStr("ref_any", inType);
+        init_input_shapes({shapes});
 
-        auto params = ngraph::builder::makeDynamicParams(inputPrecision, { inputDynamicShapes.front() });
+        auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
         auto axisNode = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{}, std::vector<int64_t>{axis})->output(0);
         auto cumSum = ngraph::builder::makeCumSum(params[0], axisNode, exclusive, reverse);
 
@@ -72,15 +70,12 @@ TEST_P(CumSumLayerCPUTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     run();
-    // TODO: Should be uncommented after updating the CheckPluginRelatedResults() method
-    //CheckPluginRelatedResults(executableNetwork, "CumSum");
+    CheckPluginRelatedResults(executableNetwork, "CumSum");
 }
 
 const ngraph::element::TypeVector inputPrecision = {
     ngraph::element::i8,
-    ngraph::element::u8,
-    ngraph::element::i16,
-    ngraph::element::i32,
+    ngraph::element::bf16,
     ngraph::element::f32
 };
 
@@ -90,97 +85,33 @@ const std::vector<int64_t> negativeAxes = { -1, -2, -3, -4, -5, -6 };
 const std::vector<bool> exclusive = { true, false };
 const std::vector<bool> reverse = { true, false };
 
-const std::vector<cumSumShape> inShapes = {
-    {
-        // dynamic
-        {
-            {-1}
-        },
-        // target
-        {
-            {{16}, {18}, {12}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1}
-        },
-        // target
-        {
-            {{9, 15}, {18, 12}, {12, 12}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1, -1}
-        },
-        // target
-        {
-            {{16, 10, 12}, {18, 12, 10}, {12, 18, 10}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1, -1, -1}
-        },
-        // target
-        {
-            {{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1, -1, -1, -1}
-        },
-        // target
-        {
-            {{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1, -1, -1, -1, -1}
-        },
-        // target
-        {
-            {{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {-1, -1, -1, -1, -1, -1, -1}
-        },
-        // target
-        {
-            {{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}}
-        },
-        // target
-        {
-            {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}
-        }
-    },
-    {
-        // dynamic
-        {
-            {{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1}
-        },
-        // target
-        {
-            {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}
-        }
-    },
+const std::vector<InputShape> inShapes = {
+    {{-1},
+     {{16}, {18}, {12}}},
+
+    {{-1, -1},
+     {{9, 15}, {18, 12}, {12, 12}}},
+
+    {{-1, -1, -1},
+     {{16, 10, 12}, {18, 12, 10}, {12, 18, 10}}},
+
+    {{-1, -1, -1, -1},
+     {{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}}},
+
+    {{-1, -1, -1, -1, -1},
+     {{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}}},
+
+    {{-1, -1, -1, -1, -1, -1},
+     {{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}}},
+
+    {{{-1, -1, -1, -1, -1, -1, -1}},
+     {{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}}},
+
+    {{{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}},
+     {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}},
+
+     {{{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1},
+      {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}}
 };
 
 const auto testCasesAxis_0 = ::testing::Combine(
@@ -193,7 +124,7 @@ const auto testCasesAxis_0 = ::testing::Combine(
 
 const auto testCasesAxis_1 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 1, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 1, inShapes.end())),
     ::testing::Values(axes[1]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -201,7 +132,7 @@ const auto testCasesAxis_1 = ::testing::Combine(
 
 const auto testCasesAxis_2 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 2, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 2, inShapes.end())),
     ::testing::Values(axes[2]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -209,7 +140,7 @@ const auto testCasesAxis_2 = ::testing::Combine(
 
 const auto testCasesAxis_3 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 3, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 3, inShapes.end())),
     ::testing::Values(axes[3]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -217,7 +148,7 @@ const auto testCasesAxis_3 = ::testing::Combine(
 
 const auto testCasesAxis_4 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 4, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 4, inShapes.end())),
     ::testing::Values(axes[4]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -225,7 +156,7 @@ const auto testCasesAxis_4 = ::testing::Combine(
 
 const auto testCasesAxis_5 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 5, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 5, inShapes.end())),
     ::testing::Values(axes[5]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -233,7 +164,7 @@ const auto testCasesAxis_5 = ::testing::Combine(
 
 const auto testCasesAxis_6 = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 6, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
     ::testing::Values(axes[6]),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)
@@ -241,7 +172,7 @@ const auto testCasesAxis_6 = ::testing::Combine(
 
 const auto testCasesAxis_negative = ::testing::Combine(
     ::testing::ValuesIn(inputPrecision),
-    ::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 6, inShapes.end())),
+    ::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
     ::testing::ValuesIn(negativeAxes),
     ::testing::ValuesIn(exclusive),
     ::testing::ValuesIn(reverse)

From 2068d5838ba5a851d3d95ab0c79927d6dcbebe43 Mon Sep 17 00:00:00 2001
From: Roman Lyamin <Roman.Lyamin@intel.com>
Date: Thu, 16 Dec 2021 10:27:27 +0300
Subject: [PATCH 19/27] [GPU] Add I420toRGB/I420toBGR operations (#9204)

---
 .../convert_color_kernel_base.cpp             |   3 +
 .../core/cl_kernels/convert_color_ref.cl      |  50 ++--
 .../test_cases/convert_color_gpu_test.cpp     | 226 ++++++++++++++++--
 src/core/include/openvino/op/i420_to_bgr.hpp  |   2 +
 src/core/include/openvino/op/i420_to_rgb.hpp  |   2 +
 src/core/src/op/i420_to_bgr.cpp               |   2 +
 src/core/src/op/i420_to_rgb.cpp               |   2 +
 .../intel_gpu/plugin/primitives_list.hpp      |   2 +
 .../src/plugin/ops/convert_color.cpp          |  16 +-
 .../intel_gpu/src/plugin/ops/parameter.cpp    |   4 +-
 .../intel_gpu/src/plugin/ops/result.cpp       |   4 +-
 .../single_layer_tests/convert_color_i420.cpp |  58 +++++
 12 files changed, 327 insertions(+), 44 deletions(-)
 create mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp
index 8cbd44375d2..88648ba1809 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp
@@ -43,6 +43,9 @@ JitConstants ConvertColorKernelBase::GetJitConstants(const convert_color_params&
         case color_format::NV12:
             jit.AddConstant(MakeJitConstant("CONVERT_FROM_NV12", ""));
             break;
+        case color_format::I420:
+            jit.AddConstant(MakeJitConstant("CONVERT_FROM_I420", ""));
+            break;
         default:
             IE_THROW() << "Not supported input color format";
     }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl
index 24703d15d90..2e24deb4236 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl
@@ -5,11 +5,14 @@
 #include "include/batch_headers/fetch_data.cl"
 #include "include/batch_headers/data_types.cl"
 
-#ifdef CONVERT_FROM_NV12
+#if defined(CONVERT_FROM_NV12) || defined(CONVERT_FROM_I420)
 #ifdef BUFFER_MEM
-KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
-#if INPUTS_COUNT == 2
-                          const __global INPUT1_TYPE* input_uv,
+KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1,
+#if INPUTS_COUNT > 1
+                          const __global INPUT1_TYPE* input2,
+#if INPUTS_COUNT == 3
+                          const __global INPUT2_TYPE* input3,
+#endif
 #endif
                           __global OUTPUT_TYPE* output) {
 
@@ -17,16 +20,19 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
     const uint y = get_global_id(1);
     const uint x = get_global_id(2);
 
-    float Y = input_y[GET_DATA_INDEX(INPUT0, b, 0, y, x)];
+    float Y = input1[GET_DATA_INDEX(INPUT0, b, 0, y, x)];
 
-#if INPUTS_COUNT == 2
-    float U = input_uv[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
-    float V = input_uv[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)];
+#if INPUTS_COUNT == 3
+    float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
+    float V = input3[GET_DATA_INDEX(INPUT2, b, 0, y / 2, x / 2)];
+#elif INPUTS_COUNT == 2
+    float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
+    float V = input2[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)];
 #else // Single plane
     uint input_uv_offset = INPUT0_SIZE_X * INPUT0_SIZE_Y / 3 * 2;
 
-    float U = input_y[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset];
-    float V = input_y[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset];
+    float U = input1[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset];
+    float V = input1[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset];
 #endif
 
     float Ycomponent = mad(Y, 1.164f, -18.624f);
@@ -57,9 +63,12 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
 
 
 #ifdef SURFACE_MEM
-KERNEL(convert_color_ref)(read_only image2d_t input_y,
-#if INPUTS_COUNT == 2
-                          read_only image2d_t input_uv,
+KERNEL(convert_color_ref)(read_only image2d_t input1,
+#if INPUTS_COUNT > 1
+                          read_only image2d_t input2,
+#if INPUTS_COUNT == 3
+                          read_only image2d_t input3,
+#endif
 #endif
                           __global OUTPUT_TYPE* output) {
 
@@ -67,17 +76,22 @@ KERNEL(convert_color_ref)(read_only image2d_t input_y,
     const uint y = get_global_id(1);
     const uint x = get_global_id(2);
 
-    float4 Y = read_imagef(input_y, (int2)(x, y));
+    float4 Y = read_imagef(input1, (int2)(x, y));
     float Ycomponent = mad(Y.x, 296.82f, -18.624f);
 
-#if INPUTS_COUNT == 2
-    float4 UV = read_imagef(input_uv, (int2)(x / 2, y / 2));
+#if INPUTS_COUNT == 3
+    float4 U = read_imagef(input2, (int2)(x / 2, y / 2));
+    float4 V = read_imagef(input3, (int2)(x / 2, y / 2));
+    float Ucomponent = mad(U.x, 255.0f, -128.f);
+    float Vcomponent = mad(V.x, 255.0f, -128.f);
+#elif INPUTS_COUNT == 2
+    float4 UV = read_imagef(input2, (int2)(x / 2, y / 2));
     float Ucomponent = mad(UV.x, 255.0f, -128.f);
     float Vcomponent = mad(UV.y, 255.0f, -128.f);
 #else // Single plane
     uint input_y_offset = INPUT0_SIZE_Y / 3 * 2;
-    float4 U = read_imagef(input_y, (int2)((x / 2) * 2,     y / 2 + input_y_offset));
-    float4 V = read_imagef(input_y, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset));
+    float4 U = read_imagef(input1, (int2)((x / 2) * 2,     y / 2 + input_y_offset));
+    float4 V = read_imagef(input1, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset));
     float Ucomponent = mad(U.x, 255.0f, -128.f);
     float Vcomponent = mad(V.x, 255.0f, -128.f);
 #endif
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp
index 7bb8947b1b8..f207235b761 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp
@@ -16,15 +16,15 @@ using namespace cldnn;
 using namespace ::tests;
 
 template <typename T, typename U>
-void createReferenceData(const T* arg_y, const T* arg_uv, U* out_ptr,
-                         size_t batch_size, size_t image_h, size_t image_w,
-                         size_t stride_y, size_t stride_uv, bool to_rgb) {
-    for (int batch = 0; batch < batch_size; batch++) {
+void createReferenceDataNV12(const T* arg_y, const T* arg_uv, U* out_ptr,
+                             size_t batch_size, size_t image_h, size_t image_w,
+                             size_t stride_y, size_t stride_uv, bool to_rgb) {
+    for (size_t batch = 0; batch < batch_size; ++batch) {
         U* out = out_ptr + batch * image_w * image_h * 3;
         auto y_ptr = arg_y + batch * stride_y;
         auto uv_ptr = arg_uv + batch * stride_uv;
-        for (int h = 0; h < image_h; h++) {
-            for (int w = 0; w < image_w; w++) {
+        for (size_t h = 0; h < image_h; ++h) {
+            for (size_t w = 0; w < image_w; ++w) {
                 auto y_index = h * image_w + w;
                 auto y_val = static_cast<float>(y_ptr[y_index]);
                 auto uv_index = (h / 2) * image_w + (w / 2) * 2;
@@ -87,8 +87,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<float, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
-                                      1, height, width, height * width, height * width / 2, true);
+    createReferenceDataNV12<float, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
+                                          1, height, width, height * width, height * width / 2, true);
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<float> output_ptr(output, get_test_stream());
 
@@ -126,8 +126,8 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
-                               1, height, width, height * width, height * width / 2, false);
+    createReferenceDataNV12<float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
+                                   1, height, width, height * width, height * width / 2, false);
 
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@@ -166,8 +166,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<uint8_t, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
-                                        1, height, width, height * width, height * width / 2, true);
+    createReferenceDataNV12<uint8_t, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
+                                            1, height, width, height * width, height * width / 2, true);
 
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());
@@ -206,8 +206,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<FLOAT16, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
-                                        1, height, width, height * width, height * width / 2, true);
+    createReferenceDataNV12<FLOAT16, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
+                                            1, height, width, height * width, height * width / 2, true);
 
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
@@ -243,8 +243,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<float, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
-                                      1, height, width, input_height * width, input_height * width, true);
+    createReferenceDataNV12<float, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
+                                          1, height, width, input_height * width, input_height * width, true);
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<float> output_ptr(output, get_test_stream());
 
@@ -279,8 +279,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) {
     auto outputs = network.execute();
 
     std::vector<float> ref_res(width * height * 3);
-    createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
-                                        1, height, width, input_height * width, input_height * width, true);
+    createReferenceDataNV12<uint8_t, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
+                                            1, height, width, input_height * width, input_height * width, true);
     auto output = outputs.at("convert_color").get_memory();
     cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());
 
@@ -356,8 +356,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) {
     auto outputs = network.execute();
 
     std::vector<float> reference_results(width * height * 3);
-    createReferenceData<uint8_t, float>(data.data(), data.data() + height * width, reference_results.data(),
-                                       1, height, width, height * width, height * width / 2, true);
+    createReferenceDataNV12<uint8_t, float>(data.data(), data.data() + height * width, reference_results.data(),
+                                            1, height, width, height * width, height * width / 2, true);
 
     auto output_prim = outputs.begin()->second.get_memory();
     cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
@@ -419,8 +419,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
     auto outputs = network.execute();
 
     std::vector<float> reference_results(width * height * 3);
-    createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, reference_results.data(),
-                                        1, height, width, input_height * width, input_height * width, true);
+    createReferenceDataNV12<uint8_t, float>(input_data.data(), input_data.data() + height * width, reference_results.data(),
+                                            1, height, width, input_height * width, input_height * width, true);
 
     auto output_prim = outputs.begin()->second.get_memory();
     cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
@@ -429,3 +429,185 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
     }
     checkStatus(clReleaseMemObject(nv12_image), "clReleaseMemObject");
 }
+
+template <typename T>
+std::tuple<T, T, T> yuv_pixel_to_rgb(float y_val, float u_val, float v_val) {
+    auto c = y_val - 16.f;
+    auto d = u_val - 128.f;
+    auto e = v_val - 128.f;
+    auto clip = [](float a) -> T {
+        if (std::is_integral<T>()) {
+            return static_cast<T>(std::min(std::max(std::round(a), 0.f), 255.f));
+        } else {
+            return static_cast<T>(std::min(std::max(a, 0.f), 255.f));
+        }
+    };
+    auto b = clip(1.164f * c + 2.018f * d);
+    auto g = clip(1.164f * c - 0.391f * d - 0.813f * e);
+    auto r = clip(1.164f * c + 1.596f * e);
+    return std::tuple<T, T, T>{r, g, b};
+}
+
+template <typename T, typename U>
+void createReferenceDataI420(const T* arg_y, const T* arg_u, const T* arg_v, U* out_ptr,
+                             size_t batch_size, size_t image_h, size_t image_w,
+                             size_t stride_y, size_t stride_uv, bool rgb_color_format) {
+    for (size_t batch = 0; batch < batch_size; ++batch) {
+        U* out = out_ptr + batch * image_w * image_h * 3;
+        auto y_ptr = arg_y + batch * stride_y;
+        auto u_ptr = arg_u + batch * stride_uv;
+        auto v_ptr = arg_v + batch * stride_uv;
+        for (size_t h = 0; h < image_h; ++h) {
+            for (size_t w = 0; w < image_w; ++w) {
+                auto y_index = h * image_w + w;
+                auto y_val = static_cast<float>(y_ptr[y_index]);
+                auto uv_index = (h / 2) * (image_w / 2) + (w / 2);
+                auto u_val = static_cast<float>(u_ptr[uv_index]);
+                auto v_val = static_cast<float>(v_ptr[uv_index]);
+                T r, g, b;
+                std::tie(r, g, b) = yuv_pixel_to_rgb<U>(y_val, u_val, v_val);
+                if (rgb_color_format) {
+                    out[y_index * 3] = r;
+                    out[y_index * 3 + 1] = g;
+                    out[y_index * 3 + 2] = b;
+                } else {
+                    out[y_index * 3] = b;
+                    out[y_index * 3 + 1] = g;
+                    out[y_index * 3 + 2] = r;
+                }
+            }
+        }
+    }
+}
+
+TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) {
+    auto& engine = get_test_engine();
+    int width = 224;
+    int height = 448;
+
+    auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } });
+    auto input_u = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } });
+    auto input_v = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } });
+
+    std::vector<float> input_y_data = generate_random_1d<float>(width * height, 0, 255);
+    std::vector<float> input_u_data = generate_random_1d<float>(width * height / 4, 0, 255);
+    std::vector<float> input_v_data = generate_random_1d<float>(width * height / 4, 0, 255);
+
+    set_values(input_y, input_y_data);
+    set_values(input_u, input_u_data);
+    set_values(input_v, input_v_data);
+
+    layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
+
+    topology topology;
+    topology.add(input_layout("input_y", input_y->get_layout()));
+    topology.add(input_layout("input_u", input_u->get_layout()));
+    topology.add(input_layout("input_v", input_v->get_layout()));
+    topology.add(convert_color("convert_color", { "input_y", "input_u", "input_v" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
+                               cldnn::convert_color::memory_type::buffer, output_layout));
+
+    network network(engine, topology);
+    network.set_input_data("input_y", input_y);
+    network.set_input_data("input_u", input_u);
+    network.set_input_data("input_v", input_v);
+
+    auto outputs = network.execute();
+
+    std::vector<float> ref_res(width * height * 3);
+    createReferenceDataI420<float, float>(input_y_data.data(), input_u_data.data(), input_v_data.data(), ref_res.data(),
+                                          1, height, width, height * width, height * width / 2, true);
+    auto output = outputs.at("convert_color").get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < ref_res.size(); ++i) {
+        EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f);
+    }
+}
+
+TEST(convert_color, i420_to_rgb_three_planes_surface_u8) {
+    int width = 224;
+    int height = 448;
+
+    auto ocl_instance = std::make_shared<OpenCL>();
+    device_query query(engine_types::ocl, runtime_types::ocl, static_cast<void*>(ocl_instance->_context.get()));
+    auto devices = query.get_available_devices();
+
+    auto engine_config = cldnn::engine_configuration();
+    auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config);
+
+    if (!engine->get_device_info().supports_image) {
+        GTEST_SKIP() << "Device doesn't support images";
+    }
+
+    int data_size = width * (height + height / 2);
+    std::vector<uint8_t> data = generate_random_1d<uint8_t>(data_size, 0, 255);
+
+    cl_int err;
+    cl_image_format image_format;
+    image_format.image_channel_order = CL_R;
+    image_format.image_channel_data_type = CL_UNORM_INT8;
+    cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0,
+                                 0, 0, 0, 0, 0, { nullptr } };
+
+    cl_mem i420_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
+    checkStatus(err, "Creating i420 image plane_y failed");
+
+    image_desc.image_width = width / 2;
+    image_desc.image_height = height / 2;
+
+    cl_mem i420_image_plane_u = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
+    checkStatus(err, "Creating i420 image plane_u failed");
+
+    cl_mem i420_image_plane_v = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
+    checkStatus(err, "Creating i420 image plane_v failed");
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
+    size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
+
+    err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr);
+    checkStatus(err, "Writing i420 image plane_y failed");
+
+    err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_u, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr);
+    checkStatus(err, "Writing i420 image plane_u failed");
+
+    err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_v, true, origin, uv_region, 0, 0, &data[width * (height + height / 4)], 0, nullptr, nullptr);
+    checkStatus(err, "Writing i420 image plane_v failed");
+
+    auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } });
+    auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } });
+    auto input3 = input_layout("input3", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } });
+    auto output_format = cldnn::format::byxf;
+    layout output_layout(data_types::f32, output_format, { 1, 3, width, height });
+
+    auto input_memory = engine->share_image(input.layout, i420_image_plane_y);
+    auto input_memory2 = engine->share_image(input2.layout, i420_image_plane_u);
+    auto input_memory3 = engine->share_image(input3.layout, i420_image_plane_v);
+
+    topology topology;
+    topology.add(input);
+    topology.add(input2);
+    topology.add(input3);
+    topology.add(convert_color("convert_color", { "input", "input2", "input3" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
+                               cldnn::convert_color::memory_type::image, output_layout));
+
+    network network(*engine, topology);
+    network.set_input_data("input", input_memory);
+    network.set_input_data("input2", input_memory2);
+    network.set_input_data("input3", input_memory3);
+
+    auto outputs = network.execute();
+
+    std::vector<float> reference_results(width * height * 3);
+    createReferenceDataI420<uint8_t, float>(data.data(), data.data() + height * width, data.data() + width * (height + height / 4), reference_results.data(),
+                                            1, height, width, height * width, height * width / 2, true);
+
+    auto output_prim = outputs.begin()->second.get_memory();
+    cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
+    for (auto i = 0; i < reference_results.size(); i++) {
+        EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f);
+    }
+    checkStatus(clReleaseMemObject(i420_image_plane_y), "clReleaseMemObject");
+    checkStatus(clReleaseMemObject(i420_image_plane_u), "clReleaseMemObject");
+    checkStatus(clReleaseMemObject(i420_image_plane_v), "clReleaseMemObject");
+}
diff --git a/src/core/include/openvino/op/i420_to_bgr.hpp b/src/core/include/openvino/op/i420_to_bgr.hpp
index 59f1e661fd2..473a8e73fd7 100644
--- a/src/core/include/openvino/op/i420_to_bgr.hpp
+++ b/src/core/include/openvino/op/i420_to_bgr.hpp
@@ -34,6 +34,8 @@ class OPENVINO_API I420toBGR : public util::ConvertColorI420Base {
 public:
     OPENVINO_OP("I420toBGR", "opset8", util::ConvertColorI420Base);
 
+    BWDCMP_RTTI_DECLARATION;
+
     I420toBGR() = default;
 
     /// \brief Constructs a conversion operation from input image in I420 format
diff --git a/src/core/include/openvino/op/i420_to_rgb.hpp b/src/core/include/openvino/op/i420_to_rgb.hpp
index c95f5de6b5c..cc0d1752de1 100644
--- a/src/core/include/openvino/op/i420_to_rgb.hpp
+++ b/src/core/include/openvino/op/i420_to_rgb.hpp
@@ -34,6 +34,8 @@ class OPENVINO_API I420toRGB : public util::ConvertColorI420Base {
 public:
     OPENVINO_OP("I420toRGB", "opset8", util::ConvertColorI420Base);
 
+    BWDCMP_RTTI_DECLARATION;
+
     I420toRGB() = default;
 
     /// \brief Constructs a conversion operation from input image in I420 format
diff --git a/src/core/src/op/i420_to_bgr.cpp b/src/core/src/op/i420_to_bgr.cpp
index b6eac7b3c35..4e38a102100 100644
--- a/src/core/src/op/i420_to_bgr.cpp
+++ b/src/core/src/op/i420_to_bgr.cpp
@@ -6,6 +6,8 @@
 
 #include "itt.hpp"
 
+BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toBGR);
+
 ov::op::v8::I420toBGR::I420toBGR(const Output<Node>& arg)
     : util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_BGR) {
     constructor_validate_and_infer_types();
diff --git a/src/core/src/op/i420_to_rgb.cpp b/src/core/src/op/i420_to_rgb.cpp
index 9375c6eb921..e9a293de2b4 100644
--- a/src/core/src/op/i420_to_rgb.cpp
+++ b/src/core/src/op/i420_to_rgb.cpp
@@ -6,6 +6,8 @@
 
 #include "itt.hpp"
 
+BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toRGB);
+
 ov::op::v8::I420toRGB::I420toRGB(const Output<Node>& arg)
     : util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_RGB) {
     constructor_validate_and_infer_types();
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
index 9285b5e7032..7ec7a460615 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -216,6 +216,8 @@ REGISTER_FACTORY(v8, GatherND);
 REGISTER_FACTORY(v8, DeformableConvolution);
 REGISTER_FACTORY(v8, NV12toRGB);
 REGISTER_FACTORY(v8, NV12toBGR);
+REGISTER_FACTORY(v8, I420toRGB);
+REGISTER_FACTORY(v8, I420toBGR);
 
 // --------------------------- Supported internal ops --------------------------- //
 REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
diff --git a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp
index a3ba0ee20a8..c867b58fe38 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp
@@ -44,16 +44,28 @@ static void CreateCommonConvertColorOp(Program& p, const std::shared_ptr<ngraph:
 
 static void CreateNV12toRGBOp(Program& p, const std::shared_ptr<ngraph::op::v8::NV12toRGB>& op) {
     p.ValidateInputs(op, {1, 2});
-    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12,  cldnn::convert_color::color_format::RGB);
+    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB);
 }
 
 static void CreateNV12toBGROp(Program& p, const std::shared_ptr<ngraph::op::v8::NV12toBGR>& op) {
     p.ValidateInputs(op, {1, 2});
-    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12,  cldnn::convert_color::color_format::BGR);
+    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR);
+}
+
+static void CreateI420toRGBOp(Program& p, const std::shared_ptr<ngraph::op::v8::I420toRGB>& op) {
+    p.ValidateInputs(op, {1, 3});
+    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB);
+}
+
+static void CreateI420toBGROp(Program& p, const std::shared_ptr<ngraph::op::v8::I420toBGR>& op) {
+    p.ValidateInputs(op, {1, 3});
+    CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::BGR);
 }
 
 REGISTER_FACTORY_IMPL(v8, NV12toRGB);
 REGISTER_FACTORY_IMPL(v8, NV12toBGR);
+REGISTER_FACTORY_IMPL(v8, I420toRGB);
+REGISTER_FACTORY_IMPL(v8, I420toBGR);
 
 }  // namespace intel_gpu
 }  // namespace runtime
diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
index 1e68ba52db9..53e83826b82 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
@@ -181,7 +181,9 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
     bool is_convert_color_input = false;
     for (auto& node : op->get_users()) {
         is_convert_color_input |= ngraph::is_type<ngraph::op::v8::NV12toRGB>(node) ||
-                                  ngraph::is_type<ngraph::op::v8::NV12toBGR>(node);
+                                  ngraph::is_type<ngraph::op::v8::NV12toBGR>(node) ||
+                                  ngraph::is_type<ngraph::op::v8::I420toRGB>(node) ||
+                                  ngraph::is_type<ngraph::op::v8::I420toBGR>(node);
     }
 
     if (is_convert_color_input) {
diff --git a/src/plugins/intel_gpu/src/plugin/ops/result.cpp b/src/plugins/intel_gpu/src/plugin/ops/result.cpp
index ed1f30dd482..dbe2737ffd9 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/result.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/result.cpp
@@ -41,7 +41,9 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
     auto outputlayout = outputDesc.getLayout();
 
     if (ngraph::is_type<ngraph::op::v8::NV12toRGB>(prev) ||
-        ngraph::is_type<ngraph::op::v8::NV12toBGR>(prev)) {
+        ngraph::is_type<ngraph::op::v8::NV12toBGR>(prev) ||
+        ngraph::is_type<ngraph::op::v8::I420toRGB>(prev) ||
+        ngraph::is_type<ngraph::op::v8::I420toBGR>(prev)) {
         outputlayout = NHWC;
     }
 
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp
new file mode 100644
index 00000000000..a74aed3f77d
--- /dev/null
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convert_color_i420.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<ov::Shape> inShapes_nhwc = {
+    {1, 10, 10, 1}
+};
+
+const std::vector<ov::element::Type> inTypes = {
+        ov::element::u8, ov::element::f32
+};
+
+const auto testCase_values = ::testing::Combine(
+    ::testing::ValuesIn(inShapes_nhwc),
+    ::testing::ValuesIn(inTypes),
+    ::testing::Bool(),
+    ::testing::Bool(),
+    ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420, ConvertColorI420LayerTest, testCase_values, ConvertColorI420LayerTest::getTestCaseName);
+
+const auto testCase_accuracy_values = ::testing::Combine(
+        ::testing::Values(ov::Shape{1, 16*6, 16, 1}),
+        ::testing::Values(ov::element::u8),
+        ::testing::Bool(),
+        ::testing::Bool(),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420_acc,
+                         ConvertColorI420AccuracyTest,
+                         testCase_accuracy_values,
+                         ConvertColorI420LayerTest::getTestCaseName);
+
+const auto testCase_accuracy_values_nightly = ::testing::Combine(
+        ::testing::Values(ov::Shape{1, 256*256, 256, 1}),
+        ::testing::Values(ov::element::u8),
+        ::testing::Values(false),
+        ::testing::Values(true),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorI420_acc,
+                         ConvertColorI420AccuracyTest,
+                         testCase_accuracy_values_nightly,
+                         ConvertColorI420LayerTest::getTestCaseName);
+
+}  // namespace

From 4dab2662aa7ab4b71127b1cb5ae5a24ee4172e01 Mon Sep 17 00:00:00 2001
From: Yuan Hu <yuan2.hu@intel.com>
Date: Thu, 16 Dec 2021 15:48:04 +0800
Subject: [PATCH 20/27] [Auto plugin] add KEY_NETWORK_PRIORITY (#8146)

* add KEY_NETWORK_PRIORITY

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* use uniqueName instead of fulldieviceName

use lamba instead of macro

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* test draft

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix compile issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add test config

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add origin select test

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* perf Counter as seperate member

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add null device test case for select device

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix cache test failed issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* use a function to register priority

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add const

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add some comments

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* initialize AutoContext in define

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* use lock guard

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* change get() to wait()

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add unregister in load failed

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add const on unique name

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix compile issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* add test for ParseMetaDevices and fix logic issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* change key name to AUTO_NETWORK_PRIORITY

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix compile issue caused by API change in master

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix key incorrect test in ci

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* go back to origin set config code

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix mock test issue after Parameter changed to Any

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix macos compile issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* remove question comments

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* remove duplicate config

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* update test case

from the hello_query_device
CPU
   FULL_DEVICE_NAME : Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
GPU
  FULL_DEVICE_NAME : Intel(R) Gen9 HD Graphics (iGPU)
MYRIAD.3.1-ma2480
  FULL_DEVICE_NAME : Intel Movidius Myriad X VPU
MYRIAD.3.2-ma2480
  FULL_DEVICE_NAME : Intel Movidius Myriad X VPU

currently GetMetric("MYRIAD","FULL_DEVICE_NAME")
will thrown No KEY_DEVICE_ID

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* only GPU use the fullDeviceName

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix rebase issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* change directory after ie test moved

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>

* fix winodws compile unit test issue

Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>
---
 .../ie/multi-device/multi_device_config.hpp   |   8 +
 src/plugins/auto/executable_network.cpp       |  72 +++---
 src/plugins/auto/executable_network.hpp       |   8 +
 src/plugins/auto/plugin.cpp                   | 238 ++++++++++-------
 src/plugins/auto/plugin.hpp                   |  10 +-
 .../behavior/plugin/configuration_tests.cpp   |  36 ++-
 .../behavior/plugin/configuration_tests.cpp   |  29 ++-
 .../auto/auto_select_device_failed_test.cpp   |   8 +-
 .../unit/auto/exec_network_get_metrics.cpp    |   4 +-
 .../unit/auto/key_network_priority_test.cpp   | 241 ++++++++++++++++++
 .../unit/auto/parse_meta_device_test.cpp      | 172 +++++++++++++
 .../auto/plugin/mock_auto_device_plugin.hpp   |   2 +-
 src/tests/unit/auto/select_device_test.cpp    | 214 ++++++++++++++++
 13 files changed, 891 insertions(+), 151 deletions(-)
 create mode 100644 src/tests/unit/auto/key_network_priority_test.cpp
 create mode 100644 src/tests/unit/auto/parse_meta_device_test.cpp
 create mode 100644 src/tests/unit/auto/select_device_test.cpp

diff --git a/src/inference/include/ie/multi-device/multi_device_config.hpp b/src/inference/include/ie/multi-device/multi_device_config.hpp
index 0f4f2dd829b..2426ec11d6f 100644
--- a/src/inference/include/ie/multi-device/multi_device_config.hpp
+++ b/src/inference/include/ie/multi-device/multi_device_config.hpp
@@ -34,5 +34,13 @@ namespace MultiDeviceConfigParams {
  */
 DECLARE_MULTI_CONFIG_KEY(DEVICE_PRIORITIES);
 
+/**
+ * @brief network priority config option, the range of value is from 0 to the max integer,
+ * when there are multi devices, the value is smaller, the priority is higher,
+ * 0 is the highest priority. Auto plugin dispatch the network to device
+ * according to priority value. when all devices are free, even if the priority value
+ * is not 0, the network will be dispatched to the strongest device.
+ */
+DECLARE_CONFIG_KEY(AUTO_NETWORK_PRIORITY);
 }  // namespace MultiDeviceConfigParams
 }  // namespace InferenceEngine
diff --git a/src/plugins/auto/executable_network.cpp b/src/plugins/auto/executable_network.cpp
index d3375ef1405..fc23320f30d 100644
--- a/src/plugins/auto/executable_network.cpp
+++ b/src/plugins/auto/executable_network.cpp
@@ -149,11 +149,13 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
                                                            const std::vector<DeviceInformation>&      metaDevices,
                                                            const std::string&                         strDevices,
                                                            MultiDeviceInferencePlugin*                plugin,
-                                                           const bool                                needPerfCounters)
+                                                           const AutoContext&                         context,
+                                                           const bool                                 needPerfCounters)
                                                            : _devicePriorities{metaDevices}
                                                            , _devicePrioritiesInitial{metaDevices}
                                                            , _needPerfCounters(needPerfCounters)
                                                            , _multiPlugin(plugin)
+                                                           , _context(context)
                                                            , _workModeIsAUTO(true) {
     if (_multiPlugin->GetCore() == nullptr) {
         IE_THROW() << "Please, work with MULTI device via InferencEngine::Core object";
@@ -173,7 +175,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
     _loadContext[ACTUALDEVICE].isEnabled = true;
     _loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(network);
     _loadContext[ACTUALDEVICE].metaDevices = metaDevices;
-    _loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, _loadContext[ACTUALDEVICE].networkPrecision);
+    _loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices,
+            _loadContext[ACTUALDEVICE].networkPrecision, _context.modelPriority);
     LOG_INFO("[AUTOPLUGIN]:select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
     bool isActualDevCPU =
         _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
@@ -292,6 +295,13 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
         return;
     }
 
+    // need to reload network, unregister it's priority
+    // there maybe potential issue.
+    // for example they are dGPU, VPUX, iGPU, customer want to LoadNetwork with
+    // configure 0 dGPU, 1 VPUX, if dGPU load failed,
+    // the result will be not sure, maybe two network are loaded into VPUX,
+    // maybe 0 is loaded to VPUX, 1 is loaded to iGPU
+    _multiPlugin->UnregisterPriority(_context.modelPriority, context.deviceInfo.uniqueName);
     // remove the current device from deviceList
     auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(),
             [device](DeviceInformation& d){
@@ -305,7 +315,8 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
 
     // select next candidate device
     try {
-        context.deviceInfo = _multiPlugin->SelectDevice(deviceList, context.networkPrecision);
+        context.deviceInfo = _multiPlugin->SelectDevice(deviceList,
+                context.networkPrecision, _context.modelPriority);
     }
     catch (const std::exception& e) {
         return;
@@ -382,7 +393,7 @@ void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const {
     // for every MultiDeviceExecutableNetwork instance
     std::call_once(_oc, [this] () {
                if (_loadContext[ACTUALDEVICE].future.valid()) {
-                   _loadContext[ACTUALDEVICE].future.get();
+                   _loadContext[ACTUALDEVICE].future.wait();
                }
                // if _loadContext[ACTUALDEVICE] load failed,  fall back to _loadContext[CPU]
                if (!_loadContext[ACTUALDEVICE].isAlready) {
@@ -460,13 +471,17 @@ void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
 }
 
 MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
-    // this is necessary to guarantee member destroyed after getting future
-    if (_workModeIsAUTO && _loadContext[CPU].isEnabled) {
-        _loadContext[CPU].future.get();
-        WaitActualNetworkReady();
-        // it's necessary to wait the loading network threads to stop here.
-        InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad");
-        _executor.reset();
+    if (_workModeIsAUTO) {
+        // this is necessary to guarantee member destroyed after getting future
+        if (_loadContext[CPU].isEnabled) {
+            _loadContext[CPU].future.wait();
+            WaitActualNetworkReady();
+            // it's necessary to wait the loading network threads to stop here.
+            InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad");
+            _executor.reset();
+        }
+        _multiPlugin->UnregisterPriority(_context.modelPriority,
+                _loadContext[ACTUALDEVICE].deviceInfo.uniqueName);
     }
     {
         std::lock_guard<std::mutex> lock(_mutex);
@@ -615,33 +630,32 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
             _devicePriorities = metaDevices;
 
             // update value in config
-            _confMutex.lock();
+            std::lock_guard<std::mutex> lockConf(_confMutex);
             _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
-            _confMutex.unlock();
         }
     }
 }
 
 InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::string &name) const {
-    _confMutex.lock();
-    auto it = _config.find(name);
-    if (it != _config.end()) {
-        _confMutex.unlock();
-        return it->second;
-    } else {
-        _confMutex.unlock();
-        // find config key among networks config keys
-        for (const auto& desc : _networksPerDevice) {
-            const auto& execNetwork = desc.second;
-            auto param = execNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
-            for (auto &&configKey : param.as<std::vector<std::string>>()) {
-                if (configKey == name) {
-                    return execNetwork->GetConfig(configKey);
-                }
+    {
+        std::lock_guard<std::mutex> lock(_confMutex);
+        auto it = _config.find(name);
+        if (it != _config.end()) {
+            return it->second;
+        }
+    }
+
+    // find config key among networks config keys
+    for (const auto& desc : _networksPerDevice) {
+        const auto& execNetwork = desc.second;
+        auto param = execNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+        for (auto &&configKey : param.as<std::vector<std::string>>()) {
+            if (configKey == name) {
+                return execNetwork->GetConfig(configKey);
             }
         }
-        IE_THROW(NotFound) << name <<" not found in the ExecutableNetwork config";
     }
+    IE_THROW(NotFound) << name <<" not found in the ExecutableNetwork config";
 }
 
 InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const {
diff --git a/src/plugins/auto/executable_network.hpp b/src/plugins/auto/executable_network.hpp
index 4034569211b..45efc0450c1 100644
--- a/src/plugins/auto/executable_network.hpp
+++ b/src/plugins/auto/executable_network.hpp
@@ -43,6 +43,12 @@ struct DeviceInformation {
     std::map<std::string, std::string> config;
     int numRequestsPerDevices;
     std::string defaultDeviceID;
+    DeviceName uniqueName;
+};
+
+struct AutoContext {
+    bool           needPerfCounters = {false};
+    unsigned int   modelPriority = 0;
 };
 
 struct AutoLoadContext {
@@ -153,6 +159,7 @@ public:
                                  const std::vector<DeviceInformation>&        metaDevices,
                                  const std::string&                           strDevices,
                                  MultiDeviceInferencePlugin*                  plugin,
+                                 const AutoContext&                           context,
                                  const bool                                   needPerfCounters = false);
 
     void SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config) override;
@@ -202,6 +209,7 @@ private:
     std::shared_ptr<InferenceEngine::ICore>                             _core;
     InferenceEngine::IStreamsExecutor::Ptr                              _executor;
     MultiDeviceInferencePlugin*                                         _multiPlugin;
+    AutoContext                                                         _context;
     bool                                                                _workModeIsAUTO = {false};
     mutable std::once_flag                                              _oc;
     std::once_flag                                                      _firstLoadOC;
diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp
index 4416d707e62..7e388cc7ff9 100644
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@@ -60,10 +60,15 @@ namespace {
                     res.push_back(CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO));
                     res.push_back(PluginConfigParams::KEY_PERF_COUNT);
                     res.push_back(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS);
+                    res.push_back(MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY);
                     return res;
                 }();
 }  // namespace
 
+
+std::mutex MultiDeviceInferencePlugin::_mtx;
+std::map<unsigned int, std::list<std::string>> MultiDeviceInferencePlugin::_priorityMap;
+
 std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
     const std::map<std::string, std::string> & config, const std::string & deviceName) const {
     std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
@@ -137,11 +142,31 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
 
         std::string defaultDeviceID = "";
         DeviceIDParser parsed{deviceName};
-        if (parsed.getDeviceID().empty())
+        std::string deviceid = parsed.getDeviceID();
+        if (deviceid.empty()) {
             defaultDeviceID = getDefaultDeviceID(deviceName);
+            deviceid = defaultDeviceID;
+        }
 
+        std::string fullDeviceName = "";
+        std::string uniqueName = "";
+        if (parsed.getDeviceName() == "GPU") {
+            std::vector<std::string> supportedMetrics = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_METRICS));
+            if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) {
+                fullDeviceName = GetCore()->GetMetric(deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
+            }
+        }
+
+        if (fullDeviceName.empty()) {
+            uniqueName = parsed.getDeviceName() + "_" + deviceid;
+        } else {
+            uniqueName = fullDeviceName + "_" + deviceid;
+        }
+
+        LOG_DEBUG("deviceName:%s, defaultDeviceID:%s, uniqueName:%s",
+              deviceName.c_str(), defaultDeviceID.c_str(), uniqueName.c_str());
         // create meta device
-        metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID });
+        metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID, uniqueName});
     }
 
     return metaDevices;
@@ -162,10 +187,9 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::stri
 }
 
 void MultiDeviceInferencePlugin::SetConfig(const std::map<std::string, std::string> & config) {
-    bool needPerfCounters = false;
+    AutoContext context;
     std::map<std::string, std::string> filterConfig;
-    CheckConfig(config, needPerfCounters, filterConfig);
-
+    CheckConfig(config, context, filterConfig);
     for (auto && kvp : config) {
         const auto& name = kvp.first;
         _config[name] = kvp.second;
@@ -237,10 +261,11 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
     if (workModeAuto) {
         // check the configure and check if need to set PerfCounters configure to device
         // and set filter configure
+
         OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl::AutoMode");
-        bool needPerfCounters = false;
+        AutoContext context;
         std::map<std::string, std::string> filterConfig;
-        CheckConfig(fullConfig, needPerfCounters, filterConfig);
+        CheckConfig(fullConfig, context, filterConfig);
         // filter the device that supports filter configure
         auto strDevices = GetDeviceList(fullConfig);
         auto metaDevices = ParseMetaDevices(strDevices, fullConfig);
@@ -269,7 +294,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
              strDevices += ((iter + 1) == supportDevices.end()) ? "" : ",";
         }
 
-        return std::make_shared<MultiDeviceExecutableNetwork>(modelPath, network, supportDevices, strDevices, this, needPerfCounters);
+        return std::make_shared<MultiDeviceExecutableNetwork>(modelPath, network, supportDevices, strDevices, this, context, context.needPerfCounters);
     }
     OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode");
     if (priorities == fullConfig.end()) {
@@ -377,20 +402,18 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork&
     return queryResult;
 }
 
-DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
+DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices,
+        const std::string& networkPrecision, unsigned int priority) {
     OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::SelectDevice");
     if (metaDevices.empty()) {
         IE_THROW(NotFound) << "No available device to select in " << GetName() <<  " plugin";
     }
-    if (metaDevices.size() == 1) {
-        return metaDevices.at(0);
-    }
 
-    std::vector<DeviceInformation> CPU;
-    std::vector<DeviceInformation> dGPU;
-    std::vector<DeviceInformation> iGPU;
-    std::vector<DeviceInformation> MYRIAD;
-    std::vector<DeviceInformation> VPUX;
+    std::list<DeviceInformation> CPU;
+    std::list<DeviceInformation> dGPU;
+    std::list<DeviceInformation> iGPU;
+    std::list<DeviceInformation> MYRIAD;
+    std::list<DeviceInformation> VPUX;
 
     for (auto& item : metaDevices) {
         if (item.deviceName.find("CPU") == 0) {
@@ -406,96 +429,103 @@ DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<Dev
             continue;
         }
         if (item.deviceName.find("GPU") == 0) {
-            auto gpuFullDeviceName = GetCore()->GetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
-            if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
+            auto& gpuUniqueName = item.uniqueName;
+            if (gpuUniqueName.find("iGPU") != std::string::npos) {
                 iGPU.push_back(item);
-            } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
+            } else if (gpuUniqueName.find("dGPU") != std::string::npos) {
                 dGPU.push_back(item);
             }
             continue;
         }
     }
 
-    if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
-        IE_THROW(NotFound) << "No available device found";
-    }
-
     // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
-    if (!dGPU.empty()) {
-        for (auto&& item : dGPU) {
-            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
-            if (supportNetwork != capability.end()) {
-                return item;
-            }
-        }
-    } else if (!VPUX.empty()) {
-        for (auto&& item : VPUX) {
-            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
-            if (supportNetwork != capability.end()) {
-                return item;
-            }
-        }
-    } else if (!iGPU.empty()) {
-        for (auto&& item : iGPU) {
-            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
-            if (supportNetwork != capability.end()) {
-                return item;
-            }
-        }
-    } else if (!MYRIAD.empty()) {
-        for (auto&& item : MYRIAD) {
-            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
-            if (supportNetwork != capability.end()) {
-                return item;
-            }
-        }
-    }
+    std::list<DeviceInformation>  devices;
+    devices.splice(devices.end(), dGPU);
+    devices.splice(devices.end(), VPUX);
+    devices.splice(devices.end(), iGPU);
+    devices.splice(devices.end(), MYRIAD);
 
-    // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
+    std::list<DeviceInformation> validDevices;
+
+    auto selectSupportDev = [this, &devices, &validDevices](const std::string& networkPrecision) {
+        for (auto iter = devices.begin(); iter != devices.end();) {
+            std::vector<std::string> capability = GetCore()->GetMetric(iter->deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+            auto supportNetwork = std::find(capability.begin(), capability.end(), (networkPrecision));
+            if (supportNetwork != capability.end()) {
+                validDevices.push_back(std::move(*iter));
+                devices.erase(iter++);
+                continue;
+            }
+            iter++;
+        }
+    };
+    selectSupportDev(networkPrecision);
+    // If network is FP32, continue to collect the device support FP16 but not support FP32.
     if (networkPrecision == "FP32") {
-        if (!dGPU.empty()) {
-            for (auto&& item : dGPU) {
-                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
-                if (supportNetwork != capability.end()) {
-                    return item;
-                }
-            }
-        } else if (!VPUX.empty()) {
-            for (auto&& item : VPUX) {
-                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
-                if (supportNetwork != capability.end()) {
-                    return item;
-                }
-            }
-        } else if (!iGPU.empty()) {
-            for (auto&& item : iGPU) {
-                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
-                if (supportNetwork != capability.end()) {
-                    return item;
-                }
-            }
-        } else if (!MYRIAD.empty()) {
-            for (auto&& item : MYRIAD) {
-                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
-                if (supportNetwork != capability.end()) {
-                    return item;
-                }
+       const std::string f16 = "FP16";
+       selectSupportDev(f16);
+    }
+    // add cpu devices if exist.
+    validDevices.splice(validDevices.end(), CPU);
+
+    if (validDevices.empty()) {
+         IE_THROW() << "Cannot select any device";
+    }
+    // all available Devices are in validDevices now
+    // need to remove higher priority devices
+    // save the last device first
+    DeviceInformation lastDevice = validDevices.back();
+    {
+        // begin to filter devices
+        std::lock_guard<std::mutex> lck(_mtx);
+        for (auto && kvp : _priorityMap) {
+            if (kvp.first >= priority) {
+                continue;
             }
+            auto& filterDevices = kvp.second;
+            auto sd = std::remove_if(validDevices.begin(), validDevices.end(), [&filterDevices](DeviceInformation device) {
+                    auto iter = std::find_if(filterDevices.begin(), filterDevices.end(), [&device](std::string uniqueName) {
+                            return (uniqueName == device.uniqueName);
+                            });
+                    return iter != filterDevices.end() ? true : false;
+                    });
+            validDevices.erase(sd, validDevices.end());
         }
     }
 
-    if (CPU.empty()) {
-        IE_THROW() << "Cannot select any device";
+    DeviceInformation* ptrSelectDevice =  NULL;
+    if (validDevices.empty()) {
+        // after remove higher priority device,but the available devices is null,
+        // so select the last device of all available Devices.
+        ptrSelectDevice = &lastDevice;
+    } else {
+        // select the first device in the rest of available devices.
+        ptrSelectDevice = &validDevices.front();
     }
-    return CPU[0];
+    //recode the device priority
+    RegisterPriority(priority, ptrSelectDevice->uniqueName);
+    return *ptrSelectDevice;
+}
+
+void MultiDeviceInferencePlugin::UnregisterPriority(const unsigned int& priority,
+        const std::string& deviceName) {
+    std::lock_guard<std::mutex> lck(_mtx);
+    auto& priorityDevices = _priorityMap[priority];
+    for (auto iter = priorityDevices.begin(); iter != priorityDevices.end();) {
+        if (*iter == deviceName) {
+            priorityDevices.erase(iter);
+            break;
+        }
+        iter++;
+    }
+}
+
+void MultiDeviceInferencePlugin::RegisterPriority(const unsigned int& priority,
+        const std::string& deviceName) {
+    std::lock_guard<std::mutex> lck(_mtx);
+    auto& priorityDevices = _priorityMap[priority];
+    priorityDevices.push_back(deviceName);
 }
 
 std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string, std::string>& config) const {
@@ -520,19 +550,17 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
 }
 
 void MultiDeviceInferencePlugin::CheckConfig(const std::map<std::string, std::string>& config,
-        bool& needPerfCounters, std::map<std::string, std::string>& filterConfig) {
+        AutoContext& context, std::map<std::string, std::string>& filterConfig) {
     // TODO need to optimize this code, too much duplicated code
 
     const auto perf_hints_configs = PerfHintsConfig::SupportedKeys();
     for (auto&& kvp : config) {
-        if (kvp.first.find("AUTO_") == 0) {
-            continue;
-        } else if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) {
+        if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) {
             if (kvp.second == PluginConfigParams::YES) {
-                needPerfCounters = true;
+                context.needPerfCounters = true;
                 filterConfig.insert({kvp.first, kvp.second});
             } else if (kvp.second == PluginConfigParams::NO) {
-                needPerfCounters = false;
+                context.needPerfCounters = false;
             } else {
                 IE_THROW() << "Unsupported config value: " << kvp.second
                            << " for key: " << kvp.first;
@@ -551,10 +579,24 @@ void MultiDeviceInferencePlugin::CheckConfig(const std::map<std::string, std::st
                    IE_THROW() << "Unsupported config value: " << kvp.second
                               << " for key: " << kvp.first;
                }
+        } else if (kvp.first == MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY) {
+            try {
+                int priority = std::stoi(kvp.second);
+                if (priority < 0) {
+                    IE_THROW() << "Unsupported config value: " << kvp.second
+                        << " for key: " << kvp.first;
+                }
+                context.modelPriority = priority;
+            } catch(...) {
+                IE_THROW() << "Unsupported config value: " << kvp.second
+                           << " for key: " << kvp.first;
+            }
         } else if (std::find(perf_hints_configs.begin(), perf_hints_configs.end(), kvp.first) != perf_hints_configs.end()) {
             PerfHintsConfig::CheckConfigAndValue(kvp);
         } else if (supported_configKeys.end() == std::find(supported_configKeys.begin(), supported_configKeys.end(), kvp.first)) {
             IE_THROW() << "Unsupported config key: " << kvp.first;
+        } else if (kvp.first.find("AUTO_") == 0) {
+            continue;
         }
     }
 }
diff --git a/src/plugins/auto/plugin.hpp b/src/plugins/auto/plugin.hpp
index dfa03f93af8..0329ec050f9 100644
--- a/src/plugins/auto/plugin.hpp
+++ b/src/plugins/auto/plugin.hpp
@@ -8,6 +8,7 @@
 #include <map>
 #include <vector>
 #include <string>
+#include <list>
 
 #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
@@ -45,7 +46,10 @@ public:
                                                                        const std::map<std::string, std::string> & config) const;
 
     std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
-    MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
+    MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices,
+            const std::string& networkPrecision = METRIC_VALUE(FP32), unsigned int priority = 0);
+    void UnregisterPriority(const unsigned int& priority, const std::string& deviceName);
+    void RegisterPriority(const unsigned int& priority, const std::string& deviceName);
 
 protected:
     std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
@@ -56,10 +60,12 @@ private:
                                                                        InferenceEngine::CNNNetwork network,
                                                                        const std::map<std::string, std::string>& config,
                                                                        const std::string &networkPrecision = METRIC_VALUE(FP32));
-    static void CheckConfig(const std::map<std::string, std::string>& config, bool& needPerfCounters,
+    static void CheckConfig(const std::map<std::string, std::string>& config, AutoContext& context,
                             std::map<std::string, std::string>& filterConfig);
     std::vector<DeviceInformation> FilterDevice(const std::vector<DeviceInformation>& metaDevices,
                                                 const std::map<std::string, std::string>& config);
+    static std::mutex _mtx;
+    static std::map<unsigned int, std::list<std::string>> _priorityMap;
 };
 
 }  // namespace MultiDevicePlugin
diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index ff240aa56ed..1becc193cf0 100644
--- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -96,7 +96,13 @@ namespace {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
              {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_DEBUG}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}}
+             {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}}
     };
 
     INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, CorrectConfigTests,
@@ -149,17 +155,6 @@ namespace {
                     {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}}
     };
 
-    const std::vector<std::map<std::string, std::string>> multiconf = {
-            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}},
-            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}},
-            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
-             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}},
-            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
-    };
-
     const std::vector<std::map<std::string, std::string>> autoinconfigs = {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
              {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, "DOESN'T EXIST"}},
@@ -176,7 +171,22 @@ namespace {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
                     {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}
+    };
+
+    const std::vector<std::map<std::string, std::string>> multiconf = {
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
+             {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
     };
 
     INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigTests,
diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
index 1cb9fc7a567..826d3f1fc47 100644
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@@ -68,6 +68,10 @@ namespace {
                     {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
                     {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
+                    {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
+                    {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
                  CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
                     {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, "DOESN'T EXIST"}},
@@ -92,7 +96,13 @@ namespace {
                     {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
                  CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
-                    {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}
+                    {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
+                 CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
+                 CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
+                {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}}
     };
     IE_SUPPRESS_DEPRECATED_END
 
@@ -177,6 +187,12 @@ namespace {
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
              {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
              {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
                  CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
              {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}},
@@ -204,7 +220,16 @@ namespace {
              {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_DEBUG}},
             {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
                  CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
-             {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}}
+             {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
+                 CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
+                 CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}},
+            {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES ,
+                 CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU},
+             {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}}
     };
 
     INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, DefaultValuesConfigTests,
diff --git a/src/tests/unit/auto/auto_select_device_failed_test.cpp b/src/tests/unit/auto/auto_select_device_failed_test.cpp
index 8925c63c301..51ade26194a 100644
--- a/src/tests/unit/auto/auto_select_device_failed_test.cpp
+++ b/src/tests/unit/auto/auto_select_device_failed_test.cpp
@@ -207,7 +207,7 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) {
         // set the return value of SelectDevice
         // for example if there are three device, if will return GPU on the first call, and then MYRIAD
         // at last CPU
-        ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize)), _))
+        ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize)), _, _))
             .WillByDefault(Return(metaDevices[deviceConfigs.size() - selDevsSize]));
         devicesStr += deviceName;
         devicesStr += ((++iter) == deviceConfigs.end()) ? "" : ",";
@@ -219,16 +219,16 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) {
     if (thrExcWheSelect) {
         selDevsSize = deviceConfigs.size();
         if (selDevsSize > 1) {
-            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize - 1)), _))
+            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize - 1)), _, _))
                 .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
         } else {
-            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(1)), _))
+            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(1)), _, _))
                 .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
         }
     }
 
     EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(AtLeast(1));
-    EXPECT_CALL(*plugin, SelectDevice(_, _)).Times(selectCount);
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(selectCount);
     EXPECT_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
                 ::testing::Matcher<const std::string&>(_),
                 ::testing::Matcher<const Config&>(_))).Times(loadCount);
diff --git a/src/tests/unit/auto/exec_network_get_metrics.cpp b/src/tests/unit/auto/exec_network_get_metrics.cpp
index 2916e6d9e1c..6a29fd508c1 100644
--- a/src/tests/unit/auto/exec_network_get_metrics.cpp
+++ b/src/tests/unit/auto/exec_network_get_metrics.cpp
@@ -173,10 +173,10 @@ TEST_P(ExecNetworkGetMetric, OPTIMAL_NUMBER_OF_INFER_REQUESTS) {
 
     metaDevices.push_back({CommonTestUtils::DEVICE_CPU, {}, cpuCustomerNum, ""});
     metaDevices.push_back({CommonTestUtils::DEVICE_GPU, {}, gpuCustomerNum, ""});
-    ON_CALL(*plugin, SelectDevice(_, _)).WillByDefault(Return(metaDevices[1]));
+    ON_CALL(*plugin, SelectDevice(_, _, _)).WillByDefault(Return(metaDevices[1]));
     ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
     EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1);
-    EXPECT_CALL(*plugin, SelectDevice(_, _)).Times(1);
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1);
 
     if (cpuSleep) {
         ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
diff --git a/src/tests/unit/auto/key_network_priority_test.cpp b/src/tests/unit/auto/key_network_priority_test.cpp
new file mode 100644
index 00000000000..2410ba52d72
--- /dev/null
+++ b/src/tests/unit/auto/key_network_priority_test.cpp
@@ -0,0 +1,241 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_metric_helpers.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include <ie_core.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "plugin/mock_auto_device_plugin.hpp"
+#include "cpp/ie_plugin.hpp"
+#include "mock_common.hpp"
+
+using ::testing::MatcherCast;
+using ::testing::AllOf;
+using ::testing::Throw;
+using ::testing::Matches;
+using ::testing::_;
+using ::testing::StrEq;
+using ::testing::Return;
+using ::testing::Property;
+using ::testing::Eq;
+using ::testing::ReturnRef;
+using ::testing::AtLeast;
+using ::testing::InvokeWithoutArgs;
+using Config = std::map<std::string, std::string>;
+using namespace MockMultiDevice;
+
+using PriorityParams = std::tuple<unsigned int, std::string>; //{priority, deviceUniquName}
+
+using ConfigParams = std::tuple<
+        std::string,                        // netPrecision
+        std::vector<PriorityParams>         // {{priority, expect device uniqueName}}
+        >;
+class KeyNetworkPriorityTest : public ::testing::TestWithParam<ConfigParams> {
+public:
+    std::shared_ptr<MockICore>                      core;
+    std::shared_ptr<MockMultiDeviceInferencePlugin> plugin;
+    std::vector<DeviceInformation>                  metaDevices;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        std::string netPrecision;
+        std::vector<PriorityParams> PriorityConfigs;
+        std::tie(netPrecision, PriorityConfigs) = obj.param;
+        std::ostringstream result;
+        for (auto& item : PriorityConfigs) {
+            result <<  "_priority_" << std::get<0>(item);
+            result <<  "_return_" << std::get<1>(item);
+        }
+        result << "netPrecision_" << netPrecision;
+        return result.str();
+    }
+
+    void TearDown() override {
+        core.reset();
+        plugin.reset();
+        metaDevices.clear();
+    }
+
+    void SetUp() override {
+       // prepare mockicore and cnnNetwork for loading
+       core  = std::shared_ptr<MockICore>(new MockICore());
+       auto* origin_plugin = new MockMultiDeviceInferencePlugin();
+       plugin  = std::shared_ptr<MockMultiDeviceInferencePlugin>(origin_plugin);
+       // replace core with mock Icore
+       plugin->SetCore(core);
+       metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01"},
+           {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01"},
+           {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01"},
+           {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01" },
+           {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01"}};
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, cpuCability, {"FP32", "FP16", "INT8", "BIN"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, gpuCability, {"FP32", "FP16", "BATCHED_BLOB", "BIN"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, myriadCability, {"FP16"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, vpuxCability, {"INT8"});
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(gpuCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_MYRIAD),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(myriadCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_KEEMBAY),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(vpuxCability));
+       ON_CALL(*plugin, SelectDevice).WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
+                   const std::string& netPrecision, unsigned int Priority) {
+               return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority);
+               });
+    }
+};
+
+TEST_P(KeyNetworkPriorityTest, SelectDevice) {
+    // get Parameter
+    std::string netPrecision;
+    std::vector<PriorityParams> PriorityConfigs;
+    std::tie(netPrecision, PriorityConfigs) = this->GetParam();
+    std::vector<DeviceInformation> resDevInfo;
+
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size());
+    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4));
+
+    for (auto& item : PriorityConfigs) {
+        resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item)));
+    }
+    for (unsigned int i = 0; i < PriorityConfigs.size(); i++) {
+        EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i]));
+        plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i]));
+    }
+}
+
+TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) {
+    // get Parameter
+    std::string netPrecision;
+    std::vector<PriorityParams> PriorityConfigs;
+    std::tie(netPrecision, PriorityConfigs) = this->GetParam();
+    std::vector<DeviceInformation> resDevInfo;
+    std::vector<std::future<void>> futureVect;
+
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size() * 2);
+    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4 * 2));
+    // selectdevice in multi threads, and UnregisterPriority them all, should not affect the
+    // Priority Map
+    for (auto& item : PriorityConfigs) {
+       unsigned int priority = std::get<0>(item);
+       auto future = std::async(std::launch::async, [this, &netPrecision, priority] {
+               auto deviceInfo = plugin->SelectDevice(metaDevices, netPrecision, priority);
+               plugin->UnregisterPriority(priority, deviceInfo.uniqueName);
+               });
+       futureVect.push_back(std::move(future));
+    }
+
+    for (auto& item : futureVect) {
+           item.get();
+    }
+
+    for (auto& item : PriorityConfigs) {
+        resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item)));
+    }
+    for (unsigned int i = 0; i < PriorityConfigs.size(); i++) {
+        EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i]));
+        plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i]));
+    }
+}
+
+
+// ConfigParams details
+// example
+// ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"},
+//                        PriorityParams {1, "iGPU_01"},
+//                        PriorityParams {2, "MYRIAD_01"},
+//                        PriorityParams {2, "MYRIAD_01"}}},
+//              {netPrecision, PriorityParamsVector{{priority, expect device uniqueName}}}
+
+const std::vector<ConfigParams> testConfigs = {
+                                               ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "MYRIAD_01"},
+                                                                      PriorityParams {2, "MYRIAD_01"}}},
+                                               ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {3, "iGPU_01"},
+                                                                      PriorityParams {4, "MYRIAD_01"},
+                                                                      PriorityParams {5, "CPU_01"}}},
+                                               ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"}}},
+                                               ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"},
+                                                                      PriorityParams {3, "MYRIAD_01"}}},
+                                               ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "MYRIAD_01"},
+                                                                      PriorityParams {3, "CPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "MYRIAD_01"},
+                                                                      PriorityParams {3, "CPU_01"}}},
+                                               ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"},
+                                                                      PriorityParams {1, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"}}},
+                                               ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"},
+                                                                      PriorityParams {3, "CPU_01"},
+                                                                      PriorityParams {4, "CPU_01"},
+                                                                      PriorityParams {5, "CPU_01"}}},
+                                               ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"},
+                                                                      PriorityParams {0, "VPUX_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"}}},
+                                               ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"},
+                                                                      PriorityParams {0, "VPUX_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {3, "CPU_01"}}},
+                                               ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"},
+                                                                      PriorityParams {1, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {3, "CPU_01"},
+                                                                      PriorityParams {0, "VPUX_01"},
+                                                                      PriorityParams {1, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {3, "CPU_01"}}},
+                                               ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {2, "CPU_01"}}},
+                                               ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {3, "iGPU_01"},
+                                                                      PriorityParams {4, "CPU_01"},
+                                                                      PriorityParams {5, "CPU_01"}}},
+                                               ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"}}},
+                                               ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {2, "iGPU_01"},
+                                                                      PriorityParams {3, "CPU_01"}}},
+                                               ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {3, "CPU_01"},
+                                                                      PriorityParams {0, "dGPU_01"},
+                                                                      PriorityParams {1, "iGPU_01"},
+                                                                      PriorityParams {2, "CPU_01"},
+                                                                      PriorityParams {3, "CPU_01"}}}
+                                              };
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, KeyNetworkPriorityTest,
+                ::testing::ValuesIn(testConfigs),
+            KeyNetworkPriorityTest::getTestCaseName);
+
diff --git a/src/tests/unit/auto/parse_meta_device_test.cpp b/src/tests/unit/auto/parse_meta_device_test.cpp
new file mode 100644
index 00000000000..83d62f3e6a7
--- /dev/null
+++ b/src/tests/unit/auto/parse_meta_device_test.cpp
@@ -0,0 +1,172 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_metric_helpers.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include <ie_core.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "plugin/mock_auto_device_plugin.hpp"
+#include "cpp/ie_plugin.hpp"
+#include "mock_common.hpp"
+
+using ::testing::MatcherCast;
+using ::testing::HasSubstr;
+using ::testing::AllOf;
+using ::testing::Throw;
+using ::testing::Matches;
+using ::testing::_;
+using ::testing::StrEq;
+using ::testing::StrNe;
+using ::testing::Return;
+using ::testing::Property;
+using ::testing::Eq;
+using ::testing::AnyNumber;
+using ::testing::ReturnRef;
+using ::testing::AtLeast;
+using ::testing::InvokeWithoutArgs;
+using Config = std::map<std::string, std::string>;
+using namespace MockMultiDevice;
+
+const char cpuFullDeviceName[] = "Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz";
+const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)";
+// const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)";
+const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU";
+const char vpuxFullDeviceName[] = "";
+using ConfigParams = std::tuple<
+        std::string,                        // Priority devices
+        std::vector<DeviceInformation>,     // expect metaDevices
+        bool                                // if throw exception
+        >;
+class ParseMetaDeviceTest : public ::testing::TestWithParam<ConfigParams> {
+public:
+    std::shared_ptr<MockICore>                      core;
+    std::shared_ptr<MockMultiDeviceInferencePlugin> plugin;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        std::string priorityDevices;
+        std::vector<DeviceInformation> metaDevices;
+        bool throwException;
+        std::tie(priorityDevices, metaDevices, throwException) = obj.param;
+        std::ostringstream result;
+        result << "priorityDevices_" << priorityDevices;
+        if (throwException) {
+            result << "_throwException_true";
+        } else {
+            result << "_throwException_false";
+        }
+        return result.str();
+    }
+
+    void TearDown() override {
+        core.reset();
+        plugin.reset();
+    }
+
+    void SetUp() override {
+       // prepare mockicore and cnnNetwork for loading
+       core  = std::shared_ptr<MockICore>(new MockICore());
+       auto* origin_plugin = new MockMultiDeviceInferencePlugin();
+       plugin  = std::shared_ptr<MockMultiDeviceInferencePlugin>(origin_plugin);
+       // replace core with mock Icore
+       plugin->SetCore(core);
+
+       IE_SET_METRIC(SUPPORTED_METRICS, metrics, {METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME)});
+       ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _))
+           .WillByDefault(RETURN_MOCK_VALUE(metrics));
+
+       ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU),
+                   StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(cpuFullDeviceName));
+       ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_GPU),
+                   StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(igpuFullDeviceName));
+       ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_MYRIAD),
+                   StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(myriadFullDeviceName));
+       ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_KEEMBAY),
+                   StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(vpuxFullDeviceName));
+       IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, otherConfigKeys, {CONFIG_KEY(DEVICE_ID)});
+       IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, cpuConfigKeys, {});
+       ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU),
+                   StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuConfigKeys));
+       ON_CALL(*core, GetMetric(Not(HasSubstr(CommonTestUtils::DEVICE_CPU)),
+                   StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(otherConfigKeys));
+       ON_CALL(*core, GetConfig(_, StrEq(CONFIG_KEY(DEVICE_ID))))
+           .WillByDefault(InvokeWithoutArgs([](){return "01";}));
+
+       ON_CALL(*plugin, ParseMetaDevices).WillByDefault([this](const std::string& priorityDevices,
+                   const std::map<std::string, std::string>& config) {
+               return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
+               });
+    }
+
+    void compare(std::vector<DeviceInformation>& result, std::vector<DeviceInformation>& expect) {
+        EXPECT_EQ(result.size(), expect.size());
+        if (result.size() == expect.size()) {
+            for (unsigned int i = 0 ; i < result.size(); i++) {
+                EXPECT_EQ(result[i].deviceName, expect[i].deviceName);
+                EXPECT_EQ(result[i].uniqueName, expect[i].uniqueName);
+                EXPECT_EQ(result[i].numRequestsPerDevices, expect[i].numRequestsPerDevices);
+                EXPECT_EQ(result[i].defaultDeviceID, expect[i].defaultDeviceID);
+            }
+        }
+    }
+};
+
+TEST_P(ParseMetaDeviceTest, ParseMetaDevices) {
+    // get Parameter
+    std::string priorityDevices;
+    std::vector<DeviceInformation> metaDevices;
+    bool throwException;
+    std::tie(priorityDevices, metaDevices, throwException) = this->GetParam();
+
+    EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1);
+    EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber());
+    EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber());
+    if (throwException) {
+        ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {}));
+    } else {
+       auto result = plugin->ParseMetaDevices(priorityDevices, {});
+       compare(result, metaDevices);
+    }
+}
+
+// ConfigParams details
+// example
+// ConfigParams {devicePriority, expect metaDevices, ifThrowException}
+
+const std::vector<ConfigParams> testConfigs = {
+    ConfigParams {"CPU,GPU,MYRIAD,VPUX",
+        {{"CPU", {}, -1, "", "CPU_"},
+            {"GPU", {}, -1, "01", std::string(igpuFullDeviceName) + "_01"},
+            {"MYRIAD", {}, -1, "01", "MYRIAD_01"},
+            {"VPUX", {}, -1, "01", "VPUX_01"}}, false},
+    ConfigParams {"CPU(1),GPU(2),MYRIAD(3),VPUX(4)",
+        {{"CPU", {}, 1, "", "CPU_"},
+            {"GPU", {}, 2, "01", std::string(igpuFullDeviceName) + "_01"},
+            {"MYRIAD", {}, 3, "01", "MYRIAD_01"},
+            {"VPUX", {}, 4, "01", "VPUX_01"}}, false},
+    ConfigParams {"CPU(-1),GPU,MYRIAD,VPUX",  {}, true},
+    ConfigParams {"CPU(NA),GPU,MYRIAD,VPUX",  {}, true},
+    ConfigParams {"CPU.02(3),GPU.03,MYRIAD.04,VPUX.05",
+        {{"CPU.02", {}, 3, "",  "CPU_02"},
+            {"GPU.03", {}, -1, "", std::string(igpuFullDeviceName) + "_03"},
+            {"MYRIAD.04", {}, -1, "", "MYRIAD_04"},
+            {"VPUX.05", {}, -1, "", "VPUX_05"}}, false}
+    };
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, ParseMetaDeviceTest,
+                ::testing::ValuesIn(testConfigs),
+            ParseMetaDeviceTest::getTestCaseName);
+
+//toDo need add test for ParseMetaDevices(_, config) to check device config of
+//return metaDevices
diff --git a/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp b/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp
index 883a5f581a8..f92bb52b187 100644
--- a/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp
+++ b/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp
@@ -14,7 +14,7 @@ namespace MockMultiDevice {
 class MockMultiDeviceInferencePlugin : public MultiDeviceInferencePlugin {
 public:
     MOCK_METHOD(DeviceInformation, SelectDevice, ((const std::vector<DeviceInformation>&),
-                const std::string&), (override));
+                const std::string&, unsigned int), (override));
     MOCK_METHOD((std::vector<DeviceInformation>), ParseMetaDevices,
                 (const std::string&, (const std::map<std::string, std::string>&)), (const, override));
 };
diff --git a/src/tests/unit/auto/select_device_test.cpp b/src/tests/unit/auto/select_device_test.cpp
new file mode 100644
index 00000000000..83d4cba279f
--- /dev/null
+++ b/src/tests/unit/auto/select_device_test.cpp
@@ -0,0 +1,214 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_metric_helpers.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include <ie_core.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "plugin/mock_auto_device_plugin.hpp"
+#include "cpp/ie_plugin.hpp"
+#include "mock_common.hpp"
+
+using ::testing::MatcherCast;
+using ::testing::AllOf;
+using ::testing::Throw;
+using ::testing::Matches;
+using ::testing::_;
+using ::testing::StrEq;
+using ::testing::Return;
+using ::testing::Property;
+using ::testing::Eq;
+using ::testing::ReturnRef;
+using ::testing::AtLeast;
+using ::testing::InvokeWithoutArgs;
+using Config = std::map<std::string, std::string>;
+using namespace MockMultiDevice;
+
+using ConfigParams = std::tuple<
+        std::string,                        // netPrecision
+        std::vector<DeviceInformation>,      // metaDevices for select
+        DeviceInformation,                   // expect DeviceInformation
+        bool                                // throw exception
+        >;
+
+const DeviceInformation CPU_INFO = {CommonTestUtils::DEVICE_CPU, {}, 2, "01", "CPU_01"};
+const DeviceInformation IGPU_INFO = {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01"};
+const DeviceInformation DGPU_INFO = {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01"};
+const DeviceInformation MYRIAD_INFO = {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01" };
+const DeviceInformation KEEMBAY_INFO = {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01" };
+const std::vector<DeviceInformation>  fp32DeviceVector = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO};
+const std::vector<DeviceInformation>  fp16DeviceVector = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO};
+const std::vector<DeviceInformation>  int8DeviceVector = {KEEMBAY_INFO, CPU_INFO};
+const std::vector<DeviceInformation>  binDeviceVector = {DGPU_INFO, IGPU_INFO, CPU_INFO};
+// if CPU support BATCHED_BLOB ?
+// currently if there are CPU in metaDevices, the selectDevice will select CPU if on other device support
+//const std::vector<DeviceInformation>  batchedblobDeviceVector = {DGPU_INFO, IGPU_INFO};
+const std::vector<DeviceInformation>  batchedblobDeviceVector = {DGPU_INFO, IGPU_INFO, CPU_INFO};
+std::map<std::string, const std::vector<DeviceInformation>> devicesMap = {{"FP32", fp32DeviceVector},
+                                                                           {"FP16", fp16DeviceVector},
+                                                                           {"INT8", int8DeviceVector},
+                                                                           {"BIN",  binDeviceVector},
+                                                                           {"BATCHED_BLOB", batchedblobDeviceVector}
+                                                                         };
+const std::vector<DeviceInformation> totalDevices = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO, KEEMBAY_INFO};
+const std::vector<std::string> netPrecisions = {"FP32", "FP16", "INT8", "BIN", "BATCHED_BLOB"};
+std::vector<ConfigParams> testConfigs;
+
+class SelectDeviceTest : public ::testing::TestWithParam<ConfigParams> {
+public:
+    std::shared_ptr<MockICore>                      core;
+    std::shared_ptr<MockMultiDeviceInferencePlugin> plugin;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        std::string netPrecision;
+        std::vector<DeviceInformation> devices;
+        DeviceInformation expect;
+        bool throwExcept;
+        std::tie(netPrecision, devices, expect, throwExcept) = obj.param;
+        std::ostringstream result;
+        result << "_netPrecision_" << netPrecision;
+        for (auto& item : devices) {
+            result <<  "_device_" << item.uniqueName;
+        }
+        result << "_expect_" << expect.uniqueName;
+        if (throwExcept) {
+            result << "_throwExcept_true";
+        } else {
+            result << "_throwExcept_false";
+        }
+        return result.str();
+    }
+    // combine select_num devices from devices and make them to ConfigParams
+    // insert the ConfigParams into testConfigs
+    static void combine_device(const std::vector<DeviceInformation>& devices, int start,
+            int* result, int result_index, const int select_num, std::string& netPrecision) {
+        int i = 0;
+        for (i = start; i < devices.size() + 1 - result_index; i++) {
+            result[result_index - 1] = i;
+            if (result_index - 1 == 0) {
+                std::vector<DeviceInformation> metaDevices = {};
+                for (int j = select_num - 1; j >= 0; j--) {
+                    metaDevices.push_back(devices[result[j]]);
+                }
+                // Debug the combine_device
+                // for (auto& item : metaDevices) {
+                //     std::cout << item.uniqueName << "_";
+                // }
+                // std::cout << netPrecision << std::endl;
+                auto& devicesInfo = devicesMap[netPrecision];
+                bool find = false;
+                DeviceInformation expect;
+                for (auto& item : devicesInfo) {
+                    auto device =  std::find_if(metaDevices.begin(), metaDevices.end(),
+                            [&item](const DeviceInformation& d)->bool{return d.uniqueName == item.uniqueName;});
+                    if (device != metaDevices.end()) {
+                        find = true;
+                        expect = item;
+                        break;
+                    }
+                }
+                testConfigs.push_back(std::make_tuple(netPrecision, metaDevices, expect, !find));
+            } else {
+                combine_device(devices, i + 1, result, result_index - 1, select_num, netPrecision);
+            }
+        }
+    }
+
+    static std::vector<ConfigParams> CreateConfigs() {
+        auto result = new int[totalDevices.size()];
+        // test all netPrecision with all possible combine devices
+        // netPrecision number is 5
+        // device number is 5
+        // combine devices is 5!/5! + 5!/(4!*1!) + 5!/(3!*2!) + 5!/(2!*3!) + 5(1!*4!) = 31
+        // null device 1
+        // total test config num is 32*5 = 160
+        for (auto netPrecision : netPrecisions) {
+            for (int i = 1; i <= totalDevices.size(); i++) {
+                combine_device(totalDevices, 0, result, i, i, netPrecision);
+            }
+            // test null device
+            testConfigs.push_back(ConfigParams{netPrecision, {}, {}, true});
+        }
+        delete []result;
+        return testConfigs;
+    }
+
+    void compare(DeviceInformation& a, DeviceInformation& b) {
+        EXPECT_EQ(a.deviceName, b.deviceName);
+        EXPECT_EQ(a.uniqueName, b.uniqueName);
+        EXPECT_EQ(a.defaultDeviceID, b.defaultDeviceID);
+    }
+
+    void TearDown() override {
+        core.reset();
+        plugin.reset();
+    }
+
+    void SetUp() override {
+       // prepare mockicore and cnnNetwork for loading
+       core  = std::shared_ptr<MockICore>(new MockICore());
+       auto* origin_plugin = new MockMultiDeviceInferencePlugin();
+       plugin  = std::shared_ptr<MockMultiDeviceInferencePlugin>(origin_plugin);
+       // replace core with mock Icore
+       plugin->SetCore(core);
+
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, cpuCability, {"FP32", "FP16", "INT8", "BIN"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, gpuCability, {"FP32", "FP16", "BATCHED_BLOB", "BIN"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, myriadCability, {"FP16"});
+       IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, vpuxCability, {"INT8"});
+
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(gpuCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_MYRIAD),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(myriadCability));
+       ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_KEEMBAY),
+                   StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(vpuxCability));
+       ON_CALL(*plugin, SelectDevice).WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
+                   const std::string& netPrecision, unsigned int priority) {
+               return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority);
+               });
+    }
+};
+
+TEST_P(SelectDeviceTest, SelectDevice) {
+    // get Parameter
+    std::string netPrecision;
+    std::vector<DeviceInformation> devices;
+    DeviceInformation expect;
+    bool throwExcept;
+    std::tie(netPrecision, devices, expect, throwExcept) = this->GetParam();
+
+    EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1);
+    if (devices.size() >= 1) {
+        EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(devices.size() - 1));
+    } else {
+        EXPECT_CALL(*core, GetMetric(_, _, _)).Times(0);
+    }
+
+    if (throwExcept) {
+        ASSERT_THROW(plugin->SelectDevice(devices, netPrecision, 0), InferenceEngine::Exception);
+    } else {
+        auto result =  plugin->SelectDevice(devices, netPrecision, 0);
+        compare(result, expect);
+    }
+}
+
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, SelectDeviceTest,
+                ::testing::ValuesIn(SelectDeviceTest::CreateConfigs()),
+            SelectDeviceTest::getTestCaseName);
+

From fa05743e01063233321f365ef253748674c1128b Mon Sep 17 00:00:00 2001
From: Anton Chetverikov <Anton.Chetverikov@intel.com>
Date: Thu, 16 Dec 2021 11:20:50 +0300
Subject: [PATCH 21/27] [MO]Handle new format of fp16 irs in croupconv pass
 (#8921)

* Handle new format of fp16 irs in croupconv pass

* Update condition check

* Make check more explicit
---
 .../openvino/tools/mo/utils/ir_reader/layer_to_class.py   | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py b/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py
index bbbc429fc0d..e207478e1ae 100644
--- a/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py
+++ b/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py
@@ -189,6 +189,14 @@ def groupconv_to_conv(op: Node):
         # We use add_destination method here to support case with multiple destinations of source port
         weights_node.in_port(0).get_source().get_connection().add_destination(op.in_port(1))
         weights_node.in_port(0).disconnect()
+    elif weights_node.type == 'Convert' and weights_node.destination_type == 'f32'\
+            and weights_node.in_port(0).get_source().node.type == 'Const':
+        # Support new FP16 IRs
+        const_node = weights_node.in_port(0).get_source().node
+        assert const_node.has_valid('value'), \
+            'Weights of GroupConv node {} have incorrect format'.format(op.name)
+        const_node.value = np.reshape(const_node.value, new_shape)
+
     else:
         assert op.in_port(1).get_source().data.get_shape() == new_shape, \
             'Weight shape and calculated shape mismatch in GroupConv node {}.'.format(op.name)

From 460a6634fdf8d62405969c17cb77953ffb855996 Mon Sep 17 00:00:00 2001
From: Sergey Lyubimtsev <sergey.lyubimtsev@intel.com>
Date: Thu, 16 Dec 2021 13:01:03 +0300
Subject: [PATCH 22/27] Fix build issue for openvino wheel package on Windows
 (#9231)

* fix build issue for openvino wheel package on Windows

* revert ngraph_libs
---
 .../ie_bridges/python/wheel/.env.in           | 15 -------
 .../ie_bridges/python/wheel/CMakeLists.txt    | 44 ++++++-------------
 .../wheel/meta/openvino-dev.requirements.txt  | 28 ------------
 .../python/wheel/meta/openvino-dev.setup.cfg  | 22 ----------
 .../wheel/meta/openvino.requirements.txt      |  1 -
 .../python/wheel/meta/pypi_overview.md        | 32 --------------
 .../python/wheel/requirements-dev.txt         |  2 +-
 .../ie_bridges/python/wheel/setup.cfg         | 14 +++---
 .../ie_bridges/python/wheel/setup.py          | 34 +++++++-------
 tools/openvino_dev/setup.py                   |  2 +-
 10 files changed, 41 insertions(+), 153 deletions(-)
 delete mode 100644 inference-engine/ie_bridges/python/wheel/.env.in
 delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
 delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
 delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
 delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md

diff --git a/inference-engine/ie_bridges/python/wheel/.env.in b/inference-engine/ie_bridges/python/wheel/.env.in
deleted file mode 100644
index 5dc313f6b1d..00000000000
--- a/inference-engine/ie_bridges/python/wheel/.env.in
+++ /dev/null
@@ -1,15 +0,0 @@
-WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
-WHEEL_VERSION=@WHEEL_VERSION@
-WHEEL_BUILD=@WHEEL_BUILD@
-WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
-WHEEL_AUTHOR=@WHEEL_AUTHOR@
-WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
-WHEEL_DESC=@WHEEL_DESC@
-WHEEL_LICENSE=@WHEEL_LICENSE@
-WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
-WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
-
-CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
-OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
-TBB_LIBS_DIR=@TBB_LIBS_DIR@
-PY_PACKAGES_DIR=@PY_PACKAGES_DIR@
diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
index 53d8207c347..64728a5cd7c 100644
--- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
@@ -1,40 +1,13 @@
 ﻿# Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-
-set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package")
-set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package")
-set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name")
-set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author")
-set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package")
-set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url")
-set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url")
-set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE)
 set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
-set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file")
-set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file")
-set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
-
-set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
-set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
-set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env")
-
 set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
 set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib)
-
-if(APPLE)
-    set(WHEEL_PLATFORM macosx_10_15_x86_64)
-elseif(UNIX)
-    set(WHEEL_PLATFORM manylinux2014_x86_64)
-elseif(WIN32)
-    set(WHEEL_PLATFORM win_amd64)
+if(WIN32)
     set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin)
-else()
-    message(FATAL_ERROR "This platform is not supported")
 endif()
 
-configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY)
-
 if(LINUX)
     find_host_program(patchelf_program
                       NAMES patchelf
@@ -55,21 +28,30 @@ endforeach()
 
 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI)
 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER)
+execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM)
 string(STRIP ${PYTHON_ABI} PYTHON_ABI)
 string(STRIP ${INTERPRETER} INTERPRETER)
+string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM)
 
 set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl")
 set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels")
 set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}")
 
 add_custom_command(OUTPUT ${openvino_wheel_path}
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing"
     COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
-    COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
+    COMMAND ${CMAKE_COMMAND} -E env OPENVINO_VERSION=${IE_VERSION}
+        OPENVINO_VERSION=${IE_VERSION}
+        CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
+        OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
+        TBB_LIBS_DIR=${TBB_LIBS_DIR}
+        PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
+        ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel
         --dist-dir ${openvino_wheels_output_dir}
         --build=${WHEEL_BUILD}
         --plat-name=${WHEEL_PLATFORM}
-    # COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT}
-    DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT}
+    DEPENDS ${openvino_wheel_deps} 
     WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
     COMMENT "Building Python wheel ${openvino_wheel_name}"
     VERBATIM)
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
deleted file mode 100644
index ccc569a0194..00000000000
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-defusedxml>=0.7.1
-scipy~=1.5.4
-jstyleson~=0.0.2
-numpy>=1.16.6,<1.20
-addict>=2.4.0
-pandas~=1.1.5
-hyperopt~=0.1.2
-networkx~=2.5
-tqdm>=4.54.1
-texttable~=1.6.3
-py-cpuinfo>=7.0.0
-PyYAML>=5.4.1
-pillow>=8.1.2
-scikit-image>=0.17.2
-scikit-learn>=0.24.1
-yamlloader>=0.5
-shapely>=1.7.1
-nibabel>=3.2.1
-pydicom>=2.1.2
-sentencepiece>=0.1.95
-tokenizers>=0.10.1
-editdistance>=0.5.3
-parasail>=1.2.4
-fast-ctc-decode>=0.2.5
-rawpy>=0.16.0
-nltk>=3.5
-opencv-python==4.5.*
-progress>=1.5
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
deleted file mode 100644
index d6789c4a084..00000000000
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
+++ /dev/null
@@ -1,22 +0,0 @@
-[options]
-py_modules =
-    mo
-    mo_tf
-    mo_caffe
-    mo_mxnet
-    mo_onnx
-    mo_kaldi
-
-[options.package_data]
-    * = *
-
-[options.entry_points]
-console_scripts =
-
-[metadata]
-license_files =
-    readme*
-    *LICENSE*
-    *license*
-    *third-party-programs*
-    *EULA*
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
deleted file mode 100644
index 63012dd1739..00000000000
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-numpy>=1.16.6,<1.20
diff --git a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
deleted file mode 100644
index 418a04bc16a..00000000000
--- a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
+++ /dev/null
@@ -1,32 +0,0 @@
-## OpenVINO™ Toolkit
-
-OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO™ toolkit includes the Deep Learning Deployment Toolkit (DLDT).
-
-OpenVINO™ toolkit:
-
-- Enables CNN-based deep learning inference on the edge
-- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
-- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
-- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCL™
-
-Operating Systems:
-- Ubuntu* 18.04 long-term support (LTS), 64-bit
-- Windows* 10, 64-bit
-- macOS* 10.15, 64-bit
-
-## Install the Runtime Package Using the PyPI Repository
-1. Set up and update pip to the highest version:
-   ```sh
-   python3 -m pip install --upgrade pip
-   ```
-2. Install the Intel® distribution of OpenVINO™ toolkit:
-   ```sh
-   pip install openvino
-   ```
-
-3. Verify that the package is installed:
-   ```sh
-   python3 -c "from openvino.inference_engine import IECore"
-   ```
-   
-Now you are ready to develop and run your application.
\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
index 8c4ce47c35f..b7574b392d2 100644
--- a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
+++ b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
@@ -1,3 +1,3 @@
 setuptools>=53.0.0
 wheel>=0.36.2
-python-decouple>=3.4
+
diff --git a/inference-engine/ie_bridges/python/wheel/setup.cfg b/inference-engine/ie_bridges/python/wheel/setup.cfg
index abb1790b67f..c6893c93c42 100644
--- a/inference-engine/ie_bridges/python/wheel/setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/setup.cfg
@@ -1,7 +1,11 @@
 [metadata]
 license_files =
-	readme* 
-	*LICENSE*
-	*license*
-	*third-party-programs*
-	*EULA*
+    readme* 
+    *LICENSE*
+    *license*
+    *third-party-programs*
+    ../../../../licensing/runtime-third-party-programs.txt
+    ../../../../licensing/tbb_third-party-programs.txt
+    ../../../../licensing/onednn_third-party-programs.txt
+    ../../../../LICENSE
+
diff --git a/inference-engine/ie_bridges/python/wheel/setup.py b/inference-engine/ie_bridges/python/wheel/setup.py
index 517dce7560e..eb8d573dfba 100644
--- a/inference-engine/ie_bridges/python/wheel/setup.py
+++ b/inference-engine/ie_bridges/python/wheel/setup.py
@@ -21,7 +21,6 @@ from setuptools import setup, find_namespace_packages, Extension
 from setuptools.command.build_ext import build_ext
 from setuptools.command.build_clib import build_clib
 from setuptools.command.install import install
-from decouple import config
 
 WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs')
 WHEEL_LIBS_PACKAGE = 'openvino.libs'
@@ -41,10 +40,11 @@ elif machine == 'aarch64':
     ARCH = 'arm64'
 
 # The following variables can be defined in environment or .env file
-CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
-OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
-TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
-PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
+SCRIPT_DIR = Path(__file__).resolve().parents[0]
+CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.')
+OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
+TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
+PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
 LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
 
 LIB_INSTALL_CFG = {
@@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms):
     sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin')
 
 # copy license file into the build directory
-package_license = config('WHEEL_LICENSE', '')
+package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE')
 if os.path.exists(package_license):
     copyfile(package_license, 'LICENSE')
 
 packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
 package_data: typing.Dict[str, list] = {}
-pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino')
+pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino')
 ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else []
 
 setup(
-    version=config('WHEEL_VERSION', '0.0.0'),
-    build=config('WHEEL_BUILD', '000'),
-    author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
+    version=os.getenv('WHEEL_VERSION', '0.0.0'),
+    build=os.getenv('WHEEL_BUILD', '000'),
+    author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
     name=pkg_name,
-    license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
-    author=config('WHEEL_AUTHOR', 'Intel Corporation'),
-    description=config('WHEEL_DESC', 'Inference Engine Python* API'),
-    install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
-    long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
+    license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
+    author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'),
+    description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'),
+    install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')),
+    long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')),
     long_description_content_type='text/markdown',
-    download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
-    url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
+    download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
+    url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
     cmdclass={
         'build': CustomBuild,
         'install': CustomInstall,
diff --git a/tools/openvino_dev/setup.py b/tools/openvino_dev/setup.py
index a18d6991a75..44861d6a3ed 100644
--- a/tools/openvino_dev/setup.py
+++ b/tools/openvino_dev/setup.py
@@ -195,7 +195,7 @@ setup(
     author_email='openvino_pushbot@intel.com',
     url='https://docs.openvinotoolkit.org/latest/index.html',
     download_url='https://github.com/openvinotoolkit/openvino/tags',
-    description='OpenVINO™ Developer Package',
+    description='OpenVINO(TM) Development Tools',
     long_description=get_description(SCRIPT_DIR.parents[1] / 'docs/install_guides/pypi-openvino-dev.md'),
     long_description_content_type='text/markdown',
     classifiers=[

From 2db19e6bf7749c49c19214c5528d04328d8d71a5 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Thu, 16 Dec 2021 13:13:05 +0300
Subject: [PATCH 23/27] Fix c4146 warning: unary minus operator on unsigned
 type (#9153)

---
 .../template_plugin/backend/evaluates_map.cpp | 19 ++++++++++++++++++-
 src/core/reference/CMakeLists.txt             |  4 ----
 .../include/ngraph/runtime/reference/abs.hpp  |  8 +++++++-
 .../include/ngraph/runtime/reference/max.hpp  |  3 +--
 .../ngraph/runtime/reference/sigmoid.hpp      | 12 +++++++++++-
 src/core/src/op/range.cpp                     |  3 ++-
 6 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp
index a8eee7c4488..789cff5b4b1 100644
--- a/docs/template_plugin/backend/evaluates_map.cpp
+++ b/docs/template_plugin/backend/evaluates_map.cpp
@@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr<op::v0::Log>& op, const HostTensorVector& outputs
 }
 
 namespace ctc_loss_v4 {
-template <element::Type_t t1, element::Type_t t2>
+template <element::Type_t t1,
+          element::Type_t t2,
+          typename std::enable_if<!std::is_floating_point<typename element_type_traits<t1>::value_type>::value &&
+                                      !std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value &&
+                                      !std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
+                                  bool>::type = true>
+inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
+                     const HostTensorVector& outputs,
+                     const HostTensorVector& inputs) {
+    OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1));
+}
+
+template <element::Type_t t1,
+          element::Type_t t2,
+          typename std::enable_if<std::is_floating_point<typename element_type_traits<t1>::value_type>::value ||
+                                      std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value ||
+                                      std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
+                                  bool>::type = true>
 inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
                      const HostTensorVector& outputs,
                      const HostTensorVector& inputs) {
diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt
index 0827873c4d9..82efbca3095 100644
--- a/src/core/reference/CMakeLists.txt
+++ b/src/core/reference/CMakeLists.txt
@@ -22,10 +22,6 @@ ie_faster_build(${TARGET_NAME}
     UNITY
     PCH PRIVATE "src/precomp.hpp")
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-    target_compile_options(${TARGET_NAME} PUBLIC /wd4146)
-endif()
-
 target_compile_definitions(${TARGET_NAME} PRIVATE XBYAK_NO_OP_NAMES XBYAK64)
 
 if(NOT BUILD_SHARED_LIBS)
diff --git a/src/core/reference/include/ngraph/runtime/reference/abs.hpp b/src/core/reference/include/ngraph/runtime/reference/abs.hpp
index e3738d17ee0..6528ca0674b 100644
--- a/src/core/reference/include/ngraph/runtime/reference/abs.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/abs.hpp
@@ -5,11 +5,17 @@
 #pragma once
 
 #include <cstddef>
+#include <type_traits>
 
 namespace ngraph {
 namespace runtime {
 namespace reference {
-template <typename T>
+template <typename T, typename std::enable_if<std::is_unsigned<T>::value, bool>::type = true>
+void abs(const T* arg, T* out, size_t count) {
+    std::copy(arg, arg + count, out);
+}
+
+template <typename T, typename std::enable_if<!std::is_unsigned<T>::value, bool>::type = true>
 void abs(const T* arg, T* out, size_t count) {
     for (size_t i = 0; i < count; i++) {
         // TODO: generic "abs" doesn't work here for some reason.
diff --git a/src/core/reference/include/ngraph/runtime/reference/max.hpp b/src/core/reference/include/ngraph/runtime/reference/max.hpp
index 842e6e9a051..35dc081fbdf 100644
--- a/src/core/reference/include/ngraph/runtime/reference/max.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/max.hpp
@@ -16,8 +16,7 @@ namespace runtime {
 namespace reference {
 template <typename T>
 void max(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
-    T minval =
-        std::numeric_limits<T>::has_infinity ? T(-std::numeric_limits<T>::infinity()) : std::numeric_limits<T>::min();
+    T minval = std::numeric_limits<T>::lowest();
 
     constexpr bool dont_keep_dims_in_output = false;
     const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
diff --git a/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp b/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp
index baaf3db2add..bf9b0743675 100644
--- a/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp
@@ -6,11 +6,21 @@
 
 #include <cmath>
 #include <cstddef>
+#include <type_traits>
 
 namespace ngraph {
 namespace runtime {
 namespace reference {
-template <typename T>
+template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
+void sigmoid(const T* arg, T* out, size_t count) {
+    T exp_value;
+    for (size_t i = 0; i < count; i++) {
+        exp_value = std::exp(-static_cast<typename std::make_signed<T>::type>(arg[i]));
+        out[i] = 1 / (1 + exp_value);
+    }
+}
+
+template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
 void sigmoid(const T* arg, T* out, size_t count) {
     T exp_value;
     for (size_t i = 0; i < count; i++) {
diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp
index 4d850eb7667..670ebe07faf 100644
--- a/src/core/src/op/range.cpp
+++ b/src/core/src/op/range.cpp
@@ -314,7 +314,8 @@ void static check_step(const op::v0::Range* node, T step) {
 
 template <typename T>
 static typename std::enable_if<std::is_integral<T>::value, T>::type adjust_for_step_and_sign(T span, T step) {
-    return ceil_div(span < 0 ? -span : span, step < 0 ? -step : step);
+    return ceil_div(span < 0 ? -static_cast<typename std::make_signed<T>::type>(span) : span,
+                    step < 0 ? -static_cast<typename std::make_signed<T>::type>(step) : step);
 }
 
 template <typename T>

From d10e8005c0ea32f748a3153623cb36baba5ebef0 Mon Sep 17 00:00:00 2001
From: Alexander Zhogov <alexander.zhogov@intel.com>
Date: Thu, 16 Dec 2021 14:33:56 +0300
Subject: [PATCH 24/27] Revert "Fix build issue for openvino wheel package on
 Windows (#9231)" (#9260)

This reverts commit 460a6634fdf8d62405969c17cb77953ffb855996.
---
 .../ie_bridges/python/wheel/.env.in           | 15 +++++++
 .../ie_bridges/python/wheel/CMakeLists.txt    | 44 +++++++++++++------
 .../wheel/meta/openvino-dev.requirements.txt  | 28 ++++++++++++
 .../python/wheel/meta/openvino-dev.setup.cfg  | 22 ++++++++++
 .../wheel/meta/openvino.requirements.txt      |  1 +
 .../python/wheel/meta/pypi_overview.md        | 32 ++++++++++++++
 .../python/wheel/requirements-dev.txt         |  2 +-
 .../ie_bridges/python/wheel/setup.cfg         | 14 +++---
 .../ie_bridges/python/wheel/setup.py          | 34 +++++++-------
 tools/openvino_dev/setup.py                   |  2 +-
 10 files changed, 153 insertions(+), 41 deletions(-)
 create mode 100644 inference-engine/ie_bridges/python/wheel/.env.in
 create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
 create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
 create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
 create mode 100644 inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md

diff --git a/inference-engine/ie_bridges/python/wheel/.env.in b/inference-engine/ie_bridges/python/wheel/.env.in
new file mode 100644
index 00000000000..5dc313f6b1d
--- /dev/null
+++ b/inference-engine/ie_bridges/python/wheel/.env.in
@@ -0,0 +1,15 @@
+WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
+WHEEL_VERSION=@WHEEL_VERSION@
+WHEEL_BUILD=@WHEEL_BUILD@
+WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
+WHEEL_AUTHOR=@WHEEL_AUTHOR@
+WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
+WHEEL_DESC=@WHEEL_DESC@
+WHEEL_LICENSE=@WHEEL_LICENSE@
+WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
+WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
+
+CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
+OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
+TBB_LIBS_DIR=@TBB_LIBS_DIR@
+PY_PACKAGES_DIR=@PY_PACKAGES_DIR@
diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
index 64728a5cd7c..53d8207c347 100644
--- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
@@ -1,13 +1,40 @@
 ﻿# Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
+
+set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package")
+set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package")
+set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name")
+set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author")
+set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package")
+set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url")
+set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url")
+set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE)
 set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
+set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file")
+set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file")
+set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
+
+set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
+set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
+set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env")
+
 set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
 set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib)
-if(WIN32)
+
+if(APPLE)
+    set(WHEEL_PLATFORM macosx_10_15_x86_64)
+elseif(UNIX)
+    set(WHEEL_PLATFORM manylinux2014_x86_64)
+elseif(WIN32)
+    set(WHEEL_PLATFORM win_amd64)
     set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin)
+else()
+    message(FATAL_ERROR "This platform is not supported")
 endif()
 
+configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY)
+
 if(LINUX)
     find_host_program(patchelf_program
                       NAMES patchelf
@@ -28,30 +55,21 @@ endforeach()
 
 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI)
 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER)
-execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM)
 string(STRIP ${PYTHON_ABI} PYTHON_ABI)
 string(STRIP ${INTERPRETER} INTERPRETER)
-string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM)
 
 set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl")
 set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels")
 set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}")
 
 add_custom_command(OUTPUT ${openvino_wheel_path}
-    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
-    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing"
     COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
-    COMMAND ${CMAKE_COMMAND} -E env OPENVINO_VERSION=${IE_VERSION}
-        OPENVINO_VERSION=${IE_VERSION}
-        CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
-        OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
-        TBB_LIBS_DIR=${TBB_LIBS_DIR}
-        PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
-        ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel
+    COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
         --dist-dir ${openvino_wheels_output_dir}
         --build=${WHEEL_BUILD}
         --plat-name=${WHEEL_PLATFORM}
-    DEPENDS ${openvino_wheel_deps} 
+    # COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT}
+    DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT}
     WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
     COMMENT "Building Python wheel ${openvino_wheel_name}"
     VERBATIM)
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
new file mode 100644
index 00000000000..ccc569a0194
--- /dev/null
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
@@ -0,0 +1,28 @@
+defusedxml>=0.7.1
+scipy~=1.5.4
+jstyleson~=0.0.2
+numpy>=1.16.6,<1.20
+addict>=2.4.0
+pandas~=1.1.5
+hyperopt~=0.1.2
+networkx~=2.5
+tqdm>=4.54.1
+texttable~=1.6.3
+py-cpuinfo>=7.0.0
+PyYAML>=5.4.1
+pillow>=8.1.2
+scikit-image>=0.17.2
+scikit-learn>=0.24.1
+yamlloader>=0.5
+shapely>=1.7.1
+nibabel>=3.2.1
+pydicom>=2.1.2
+sentencepiece>=0.1.95
+tokenizers>=0.10.1
+editdistance>=0.5.3
+parasail>=1.2.4
+fast-ctc-decode>=0.2.5
+rawpy>=0.16.0
+nltk>=3.5
+opencv-python==4.5.*
+progress>=1.5
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
new file mode 100644
index 00000000000..d6789c4a084
--- /dev/null
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
@@ -0,0 +1,22 @@
+[options]
+py_modules =
+    mo
+    mo_tf
+    mo_caffe
+    mo_mxnet
+    mo_onnx
+    mo_kaldi
+
+[options.package_data]
+    * = *
+
+[options.entry_points]
+console_scripts =
+
+[metadata]
+license_files =
+    readme*
+    *LICENSE*
+    *license*
+    *third-party-programs*
+    *EULA*
diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
new file mode 100644
index 00000000000..63012dd1739
--- /dev/null
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
@@ -0,0 +1 @@
+numpy>=1.16.6,<1.20
diff --git a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
new file mode 100644
index 00000000000..418a04bc16a
--- /dev/null
+++ b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
@@ -0,0 +1,32 @@
+## OpenVINO™ Toolkit
+
+OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO™ toolkit includes the Deep Learning Deployment Toolkit (DLDT).
+
+OpenVINO™ toolkit:
+
+- Enables CNN-based deep learning inference on the edge
+- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
+- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
+- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCL™
+
+Operating Systems:
+- Ubuntu* 18.04 long-term support (LTS), 64-bit
+- Windows* 10, 64-bit
+- macOS* 10.15, 64-bit
+
+## Install the Runtime Package Using the PyPI Repository
+1. Set up and update pip to the highest version:
+   ```sh
+   python3 -m pip install --upgrade pip
+   ```
+2. Install the Intel® distribution of OpenVINO™ toolkit:
+   ```sh
+   pip install openvino
+   ```
+
+3. Verify that the package is installed:
+   ```sh
+   python3 -c "from openvino.inference_engine import IECore"
+   ```
+   
+Now you are ready to develop and run your application.
\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
index b7574b392d2..8c4ce47c35f 100644
--- a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
+++ b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
@@ -1,3 +1,3 @@
 setuptools>=53.0.0
 wheel>=0.36.2
-
+python-decouple>=3.4
diff --git a/inference-engine/ie_bridges/python/wheel/setup.cfg b/inference-engine/ie_bridges/python/wheel/setup.cfg
index c6893c93c42..abb1790b67f 100644
--- a/inference-engine/ie_bridges/python/wheel/setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/setup.cfg
@@ -1,11 +1,7 @@
 [metadata]
 license_files =
-    readme* 
-    *LICENSE*
-    *license*
-    *third-party-programs*
-    ../../../../licensing/runtime-third-party-programs.txt
-    ../../../../licensing/tbb_third-party-programs.txt
-    ../../../../licensing/onednn_third-party-programs.txt
-    ../../../../LICENSE
-
+	readme* 
+	*LICENSE*
+	*license*
+	*third-party-programs*
+	*EULA*
diff --git a/inference-engine/ie_bridges/python/wheel/setup.py b/inference-engine/ie_bridges/python/wheel/setup.py
index eb8d573dfba..517dce7560e 100644
--- a/inference-engine/ie_bridges/python/wheel/setup.py
+++ b/inference-engine/ie_bridges/python/wheel/setup.py
@@ -21,6 +21,7 @@ from setuptools import setup, find_namespace_packages, Extension
 from setuptools.command.build_ext import build_ext
 from setuptools.command.build_clib import build_clib
 from setuptools.command.install import install
+from decouple import config
 
 WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs')
 WHEEL_LIBS_PACKAGE = 'openvino.libs'
@@ -40,11 +41,10 @@ elif machine == 'aarch64':
     ARCH = 'arm64'
 
 # The following variables can be defined in environment or .env file
-SCRIPT_DIR = Path(__file__).resolve().parents[0]
-CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.')
-OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
-TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
-PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
+CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
+OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
+TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
+PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
 LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
 
 LIB_INSTALL_CFG = {
@@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms):
     sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin')
 
 # copy license file into the build directory
-package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE')
+package_license = config('WHEEL_LICENSE', '')
 if os.path.exists(package_license):
     copyfile(package_license, 'LICENSE')
 
 packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
 package_data: typing.Dict[str, list] = {}
-pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino')
+pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino')
 ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else []
 
 setup(
-    version=os.getenv('WHEEL_VERSION', '0.0.0'),
-    build=os.getenv('WHEEL_BUILD', '000'),
-    author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
+    version=config('WHEEL_VERSION', '0.0.0'),
+    build=config('WHEEL_BUILD', '000'),
+    author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
     name=pkg_name,
-    license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
-    author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'),
-    description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'),
-    install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')),
-    long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')),
+    license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
+    author=config('WHEEL_AUTHOR', 'Intel Corporation'),
+    description=config('WHEEL_DESC', 'Inference Engine Python* API'),
+    install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
+    long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
     long_description_content_type='text/markdown',
-    download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
-    url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
+    download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
+    url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
     cmdclass={
         'build': CustomBuild,
         'install': CustomInstall,
diff --git a/tools/openvino_dev/setup.py b/tools/openvino_dev/setup.py
index 44861d6a3ed..a18d6991a75 100644
--- a/tools/openvino_dev/setup.py
+++ b/tools/openvino_dev/setup.py
@@ -195,7 +195,7 @@ setup(
     author_email='openvino_pushbot@intel.com',
     url='https://docs.openvinotoolkit.org/latest/index.html',
     download_url='https://github.com/openvinotoolkit/openvino/tags',
-    description='OpenVINO(TM) Development Tools',
+    description='OpenVINO™ Developer Package',
     long_description=get_description(SCRIPT_DIR.parents[1] / 'docs/install_guides/pypi-openvino-dev.md'),
     long_description_content_type='text/markdown',
     classifiers=[

From 2514c0ef38b8a46f515dbe1dd95355bd2a7652ad Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Thu, 16 Dec 2021 15:20:28 +0300
Subject: [PATCH 25/27] [GPU] Add gemm_tiled_opt i8/u8 output support (#9202)

---
 .../gemm/gemm_kernel_tiled_opt.cpp            | 21 ++++++++++----
 .../core/cl_kernels/gemm_tiled_opt.cl         | 24 +++++++--------
 .../tests/test_cases/fusings_gpu_test.cpp     | 29 +++++++++++++++++++
 3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp
index 93df406663c..9f77050b46d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp
@@ -13,6 +13,8 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const {
     k.EnableInputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::F16);
     k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
     k.EnableInputLayout(DataLayout::bfyx);
     k.EnableOutputLayout(DataLayout::bfyx);
     k.EnableInputLayout(DataLayout::bfzyx);
@@ -21,6 +23,7 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const {
     k.EnableOutputLayout(DataLayout::bfwzyx);
 
     k.EnableBatching();
+    k.EnableDifferentTypes();
 
     return k;
 }
@@ -117,25 +120,29 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
     if (tuning_data.tile_k_size > tuning_data.simd_size) {
         jit.AddConstants({
             MakeJitConstant("A_VEC_SIZE", tuning_data.tile_k_size / tuning_data.simd_size),
-            MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size)),
+            MakeJitConstant("A_FLOATN", std::string("CAT(INPUT0_TYPE, ") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size) + ")"),
         });
     } else {
         jit.AddConstants({
             MakeJitConstant("A_VEC_SIZE", 1),
-            MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE")),
+            MakeJitConstant("A_FLOATN", std::string("INPUT0_TYPE")),
         });
     }
 
     if (tuning_data.tile_n_size > tuning_data.simd_size) {
         jit.AddConstants({
             MakeJitConstant("B_VEC_SIZE", b_vec_size),
-            MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE") + toCodeString(b_vec_size)),
+            MakeJitConstant("B_FLOATN", std::string("CAT(INPUT1_TYPE, ") + toCodeString(b_vec_size) + ")"),
+            MakeJitConstant("OUTPUT_TYPE_VEC", std::string("CAT(OUTPUT_TYPE, ") + toCodeString(b_vec_size) + ")"),
+            MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("CAT(ACCUMULATOR_TYPE, ") + toCodeString(b_vec_size) + ")"),
         });
     } else {
         b_vec_size = 1;
         jit.AddConstants({
-            MakeJitConstant("B_VEC_SIZE", 1),
-            MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE")),
+            MakeJitConstant("B_VEC_SIZE", b_vec_size),
+            MakeJitConstant("B_FLOATN", std::string("INPUT1_TYPE")),
+            MakeJitConstant("OUTPUT_TYPE_VEC", std::string("OUTPUT_TYPE")),
+            MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("ACCUMULATOR_TYPE")),
         });
     }
 
@@ -183,6 +190,10 @@ bool GemmKernelTiledOpt::Validate(const Params& params, const optional_params& o
     if ((gmm_params.transpose_input0 || gmm_params.transpose_input1) && gemm_leftovers)
         return false;
 
+    for (size_t i = 1; i < gmm_params.inputs.size(); i++)
+        if (gmm_params.inputs[0].GetDType() != gmm_params.inputs[i].GetDType())
+            return false;
+
     return true;
 }
 }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl
index ae79242b369..cba34cdcf8c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl
@@ -3,7 +3,7 @@
 //
 
 #include "include/batch_headers/fetch_data.cl"
-#include "include/unit_type.cl"
+#include "include/batch_headers/data_types.cl"
 
 #define unroll_for __attribute__((opencl_unroll_hint)) for
 
@@ -14,17 +14,17 @@
 #endif // INPUT0_TYPE_SIZE == 4
 
 #if TILE_K > SIMD_WIDTH
-    #define BLOCK_READ_A(ptr, offset) CAT(UNIT_BLOCK_READ, A_VEC_SIZE)(ptr, offset)
+    #define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, A_VEC_SIZE, ptr, offset)
 #else // TILE_K > SIMD_WIDTH
-    #define BLOCK_READ_A(ptr, offset) UNIT_BLOCK_READ(ptr, offset)
+    #define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, 1, ptr, offset)
 #endif // TILE_K > SIMD_WIDTH
 
 #if TILE_N > SIMD_WIDTH
-    #define BLOCK_READ_B(ptr, offset) CAT(UNIT_BLOCK_READ, B_VEC_SIZE)(ptr, offset)
-    #define BLOCK_WRITE_C(ptr, offset, data) CAT(UNIT_BLOCK_WRITE, B_VEC_SIZE)(ptr, offset, data)
+    #define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, B_VEC_SIZE, ptr, offset)
+    #define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, B_VEC_SIZE, ptr, offset, data)
 #else // TILE_N > SIMD_WIDTH
-    #define BLOCK_READ_B(ptr, offset) UNIT_BLOCK_READ(ptr, offset)
-    #define BLOCK_WRITE_C(ptr, offset, data) UNIT_BLOCK_WRITE(ptr, offset, data)
+    #define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset)
+    #define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, data)
 #endif // TILE_N > SIMD_WIDTH
 
 inline uint FUNC(get_input0_batch_offset)(uint b, uint f, uint w, uint z) {
@@ -294,9 +294,9 @@ KERNEL(gemm_tiled_opt)(
 #if TILE_N_NOT_DIVISIBLE
         if (b_raw_global_id < N) {
 #ifdef INPUT2_TYPE
-            OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid];
+            ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid];
 #else // INPUT2_TYPE
-            OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
+            ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
 #endif // INPUT2_TYPE
 
 #if HAS_FUSED_OPS
@@ -316,9 +316,9 @@ KERNEL(gemm_tiled_opt)(
 
 #ifdef INPUT2_TYPE
         B_FLOATN c_val = BLOCK_READ_B(c_ptr, 0);
-        B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val;
+        ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val;
 #else // INPUT2_TYPE
-        B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
+        ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
 #endif // INPUT2_TYPE
 
 #if HAS_FUSED_OPS
@@ -327,7 +327,7 @@ KERNEL(gemm_tiled_opt)(
 #else // FUSED_OPS_CAN_USE_PRELOAD
         FUSED_OPS_VEC;
 #endif // FUSED_OPS_CAN_USE_PRELOAD
-        B_FLOATN res = FUSED_OPS_RESULT_VEC;
+        OUTPUT_TYPE_VEC res = FUSED_OPS_RESULT_VEC;
         BLOCK_WRITE_C(d_ptr, 0, res);
 #else // HAS_FUSED_OPS
         BLOCK_WRITE_C(d_ptr, 0, dequantized);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 962759bdc7c..35e4fe25e08 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -3264,6 +3264,35 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_u8,
                       //gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 },
 }));
 
+class gemm_2in_quantize_float_in : public GemmFusingTest {};
+TEST_P(gemm_2in_quantize_float_in, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input0", get_input_layout(p, 0)),
+        input_layout("input1", get_input_layout(p, 1)),
+        data("in_lo", get_mem(get_per_channel_layout(p), 0)),
+        data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+        data("out_lo", get_mem(get_single_element_layout(p), 0)),
+        data("out_hi", get_mem(get_single_element_layout(p), 255)),
+        gemm("gemm_prim", { "input0", "input1" }, data_types::f32),
+        quantize("quantize", "gemm_prim", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
+        reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+    );
+
+    implementation_desc gemm_impl = { format::bfyx, "gemm_tiled_opt" };
+    bo_fused.set_option(build_option::force_implementations({ {"gemm_prim", gemm_impl} }));
+
+    tolerance = 1.0f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_float_in,
+    ::testing::ValuesIn(std::vector<gemm_test_params>{
+                        gemm_test_params{ CASE_GEMM_2IN_FP16_1, 3, 4 },
+                        gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 },
+                        gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 4 },
+                        gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 4 },
+}));
+
 class gemm_2in_scale : public GemmFusingTest {};
 TEST_P(gemm_2in_scale, basic) {
     auto p = GetParam();

From 6ddc47a7ef0a0a72e5d55511255ed7e39d46573e Mon Sep 17 00:00:00 2001
From: Vladislav Volkov <vladislav.volkov@intel.com>
Date: Thu, 16 Dec 2021 16:58:19 +0300
Subject: [PATCH 26/27] [CPU] Convert precisions on inputs/outputs (#8805)

---
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |  31 +-
 .../nodes/common/cpu_convert.cpp              | 616 ++++++++++++++++--
 .../mkldnn_plugin/nodes/common/cpu_convert.h  |  29 +-
 .../nodes/mkldnn_convert_node.cpp             |  21 +-
 .../mkldnn_plugin/nodes/mkldnn_convert_node.h |   1 +
 .../python/tests/test_onnx/test_backend.py    |   7 -
 .../python/tests/test_onnx/test_zoo_models.py |   6 -
 .../interface/ie_iplugin_internal.cpp         |  16 +-
 .../blob_tests/set_blob.cpp                   |  11 +-
 .../single_layer_tests/conversion.cpp         |  28 +-
 .../skip_tests_config.cpp                     |  24 +-
 .../ov_executable_network/exec_graph_info.hpp |   8 +
 .../plugin/shared/src/blob_tests/set_blob.cpp |  22 +-
 .../src/base/layer_test_utils.cpp             |  84 +--
 .../src/single_layer/conversion.cpp           |   3 +
 15 files changed, 725 insertions(+), 182 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 1c8b40aaa8b..a6bda152e23 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -504,23 +504,24 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
 
     // verification of supported input
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    for (const auto &ii : _networkInputs) {
+    for (const auto &ii : network.getInputsInfo()) {
         auto input_precision = ii.second->getPrecision();
-        if (input_precision != InferenceEngine::Precision::FP64 &&
-            input_precision != InferenceEngine::Precision::FP32 &&
-            input_precision != InferenceEngine::Precision::I32 &&
-            input_precision != InferenceEngine::Precision::U32 &&
-            input_precision != InferenceEngine::Precision::U16 &&
-            input_precision != InferenceEngine::Precision::I16 &&
-            input_precision != InferenceEngine::Precision::I8 &&
-            input_precision != InferenceEngine::Precision::U8 &&
-            input_precision != InferenceEngine::Precision::BF16 &&
-            input_precision != InferenceEngine::Precision::BOOL &&
-            input_precision != InferenceEngine::Precision::I64 &&
-            input_precision != InferenceEngine::Precision::U64) {
+
+        using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
+
+        static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
+            Precision::U8,   Precision::I8,
+            Precision::U16,  Precision::I16,
+            Precision::U32,  Precision::I32,
+            Precision::U64,  Precision::I64,
+            Precision::BF16, Precision::FP16,
+            Precision::FP32, Precision::FP64,
+            Precision::BOOL
+        };
+
+        if (!supported_precisions.count(input_precision)) {
             IE_THROW(NotImplemented)
-                               << "Input image format " << input_precision << " is not supported yet...";
+                        << "Input image format " << input_precision << " is not supported yet...";
         }
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp
index 8763b551af9..31205ad84e3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp
@@ -4,27 +4,208 @@
 
 #include "cpu_convert.h"
 #include "cpu_memcpy.h"
-#include "utils/bfloat16.hpp"
+#include <utils/bfloat16.hpp>
+#include <utils/general_utils.h>
 #include <mkldnn_selective_build.h>
+#include <ie_parallel.hpp>
+#include <openvino/core/type/float16.hpp>
+#include <cpu/x64/jit_generator.hpp>
+#include <algorithm>
 #include <type_traits>
 #include <tuple>
-#include <ie_parallel.hpp>
+#include <cmath>
 
+using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
+using namespace dnnl::impl::cpu::x64;
+using namespace dnnl::impl::utils;
+using namespace Xbyak;
 
 namespace {
 
-template<typename srcType, typename dstType>
-void convert(const void *srcPtr, void *dstPtr, const size_t size) {
-    if (std::is_same<srcType, dstType>::value) {
-        cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
-    } else {
-        const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
-        dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
+template <typename src_t, typename dst_t>
+void convert_vec(jit_generator & gen,
+                 const RegExp & src,
+                 const RegExp & dst);
 
-        parallel_for(size, [&](size_t i) {
-            dstData[i] = static_cast<dstType>(srcData[i]);
+template <>
+void convert_vec<ov::float16, float>(jit_generator & gen,
+                                     const RegExp & src,
+                                     const RegExp & dst) {
+    auto const & f16vec = gen.xmm3;
+    auto const & f32vec = gen.ymm4;
+
+    gen.movdqu(f16vec, gen.xword[src]);
+    gen.vcvtph2ps(f32vec, f16vec);
+    gen.vmovups(gen.yword[dst], f32vec);
+}
+
+template <>
+void convert_vec<float, ov::float16>(jit_generator & gen,
+                                     const RegExp & src,
+                                     const RegExp & dst) {
+    auto const & f16vec = gen.xmm3;
+    auto const & f32vec = gen.ymm4;
+
+    gen.vmovups(f32vec, gen.yword[src]);
+    gen.vcvtps2ph(f16vec, f32vec, 0);
+    gen.movdqu(gen.xword[dst], f16vec);
+}
+
+class jit_convert_array : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
+
+    void generate() override {
+        const size_t vlen = 8u;
+        const size_t vlen_log2 = 3;
+
+        auto reg_src = rax;
+        auto reg_dst = rbx;
+        auto reg_sz = rdx;
+
+        Label tail, exit;
+
+        preamble();
+
+        mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
+        mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
+        mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
+
+        xor_(rsi, rsi);
+        mov(r8, reg_sz);
+        shr(r8, vlen_log2);
+
+        foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
+            _convert_vec(*this, reg_src, reg_dst);
+            add(reg_src, _src_size * vlen);
+            add(reg_dst, _dst_size * vlen);
         });
+
+        L(tail);
+
+        shl(rsi, vlen_log2);
+        sub(reg_sz, rsi);
+        test(reg_sz, reg_sz);
+        jz(exit);
+
+        // allocate array for 8 floats on stack
+        sub(rsp, vlen * sizeof(float));
+        mov(r8, rsp);
+
+        vpxor(ymm4, ymm4, ymm4);
+        vmovups(yword[r8], ymm4);
+
+        // Tail conversion
+        copy(r8, reg_src, reg_sz, _src_size);
+        _convert_vec(*this, r8, r8);
+        copy(reg_dst, r8, reg_sz, _dst_size);
+
+        // Free the array on stack
+        add(rsp, vlen * sizeof(float));
+
+        L(exit);
+
+        postamble();
+    }
+
+    void foreach(const Xbyak::Reg64& idx,
+                 size_t step,
+                 const Xbyak::Reg64& end,
+                 std::function<void(const Xbyak::Reg64&)> && fn) {
+        Label loop, exit;
+
+        L(loop);
+        cmp(idx, end);
+        jge(exit);
+
+        fn(idx);
+
+        add(idx, step);
+        jmp(loop);
+        L(exit);
+    }
+
+    void copy(const Xbyak::Reg64& dst,
+              const Xbyak::Reg64& src,
+              const Xbyak::Reg64& size,
+              size_t item_size) {
+        push(rsi);
+        push(r15);
+
+        xor_(rsi, rsi);
+
+        auto address_frame = [this](size_t size) -> const AddressFrame& {
+            switch (size) {
+                case 1: return byte;
+                case 2: return word;
+                case 4: return dword;
+                case 8: return qword;
+                default:
+                    break;
+            }
+            return ptr;
+        };
+
+        const auto & addr_frame = address_frame(item_size);
+
+        foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
+            mov(r15, addr_frame[src + idx * item_size]);
+            mov(addr_frame[dst + idx * item_size], r15);
+        });
+
+        pop(r15);
+        pop(rsi);
+    }
+
+public:
+    typedef struct {
+        const void* src;
+        void* out;
+        const size_t count;
+    } args_t;
+
+    typedef void (*fn_t)(const args_t*);
+
+    typedef void (*convert_vec_t)(jit_generator &,
+                                  const RegExp &,
+                                  const RegExp &);
+
+    jit_convert_array(convert_vec_t convert_vec,
+                      size_t src_size,
+                      size_t dst_size)
+        : _convert_vec(convert_vec)
+        , _src_size(src_size)
+        , _dst_size(dst_size) {}
+
+    template<typename src_t, typename dst_t>
+    static fn_t get() {
+        if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
+            static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
+            auto & generator = static_cast<jit_generator&>(converter);
+            generator.create_kernel();
+            return (fn_t)generator.jit_ker();
+        }
+        return nullptr;
+    }
+
+private:
+    convert_vec_t _convert_vec;
+    size_t _src_size;
+    size_t _dst_size;
+};
+
+template <typename TI, typename TO>
+void jit_convert(const TI* arg, TO* out, size_t count) {
+    using jit_impl = jit_convert_array;
+    static auto converter = jit_impl::get<TI, TO>();
+
+    if (converter) {
+        typename jit_impl::args_t args = { arg, out, count };
+        converter(&args);
+    } else {
+        for (size_t i = 0; i < count; ++i) {
+            out[i] = static_cast<TO>(arg[i]);
+        }
     }
 }
 
@@ -35,84 +216,391 @@ struct PrecisionInfo {
 
 template <>
 struct PrecisionInfo<Precision::BF16> {
-    using value_type = MKLDNNPlugin::bfloat16_t;
+    using value_type = bfloat16_t;
 };
 
+template <>
+struct PrecisionInfo<Precision::FP16> {
+    using value_type = ov::float16;
+};
+
+template <>
+struct PrecisionInfo<Precision::BOOL> {
+    using value_type = uint8_t;
+};
+
+template<typename T,
+         typename U = typename std::conditional<
+                        std::is_same<ov::float16, T>::value
+                        || std::is_same<bfloat16_t, T>::value,
+                        float, T>::type>
+struct Range {
+    const std::tuple<U, U> & fit(const Precision & prec);
+
+private:
+    std::tuple<U, U> _range {
+        std::numeric_limits<T>::lowest(),
+        std::numeric_limits<T>::max()
+    };
+};
+
+template<typename T, typename U>
+const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
+    if (prec.is_float()) {
+        double lbound, ubound;
+        switch (prec) {
+            case Precision::BF16:
+                lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
+                break;
+            case Precision::FP16:
+                lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
+                break;
+            case Precision::FP32:
+                lbound = static_cast<double>(std::numeric_limits<float>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<float>::max());
+                break;
+            case Precision::FP64:
+                lbound = std::numeric_limits<double>::lowest();
+                ubound = std::numeric_limits<double>::max();
+                break;
+            default:
+                IE_THROW() << "Unsupported precision";
+        }
+        std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
+        std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
+    } else {
+        int64_t lbound;
+        uint64_t ubound;
+        switch (prec) {
+            case Precision::BOOL:
+            case Precision::U8:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
+                break;
+            case Precision::I8:
+                lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
+                break;
+            case Precision::U16:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
+                break;
+            case Precision::I16:
+                lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
+                break;
+            case Precision::U32:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
+                break;
+            case Precision::I32:
+                lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
+                break;
+            case Precision::U64:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
+                break;
+            case Precision::I64:
+                lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
+                break;
+            default:
+                IE_THROW() << "Unsupported precision";
+        }
+        using ltype = typename std::conditional<
+                            std::is_floating_point<U>::value,
+                            double, int64_t>::type;
+        using utype = typename std::conditional<
+                            std::is_floating_point<U>::value,
+                            double, uint64_t>::type;
+        std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
+        std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
+    }
+    return _range;
+}
+
 struct ConvertContext {
     const void *srcPtr;
     void *dstPtr;
     size_t size;
+    Precision interimPrc;
+    Precision dstPrc;
     bool converted;
+
+    template<typename T>
+    std::tuple<T, T> range() const {
+        Range<T> r;
+        r.fit(interimPrc);
+        return r.fit(dstPrc);
+    }
 };
 
 template<typename T>
-struct ConvertPrecision {
-    using src_t = typename std::tuple_element<0, T>::type;
-    using dst_t = typename std::tuple_element<1, T>::type;
+struct ConvertPrecision;
 
+template<typename src_t, typename dst_t>
+struct ConvertPrecision<std::tuple<src_t, dst_t>> {
     void operator()(ConvertContext & ctx) {
-        convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
+        auto src = static_cast<const src_t *>(ctx.srcPtr);
+        auto dst = static_cast<dst_t *>(ctx.dstPtr);
+        src_t lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<src_t>();
+
+        if (std::is_integral<src_t>::value
+            || ctx.interimPrc.is_float()
+            || std::is_integral<dst_t>::value) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
+            });
+        } else {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
+            });
+        }
+
         ctx.converted = true;
     }
 };
 
+template<>
+struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const float *>(ctx.srcPtr);
+        auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
+
+        if (ctx.interimPrc.is_float()) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<bfloat16_t>(src[i]);
+            });
+        } else {
+            float lbound, ubound;
+            std::tie(lbound, ubound) = ctx.range<float>();
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<>
+struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
+        auto dst = static_cast<float *>(ctx.dstPtr);
+
+        if (ctx.interimPrc.is_float()) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<float>(src[i]);
+            });
+        } else {
+            float lbound, ubound;
+            std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<typename src_t>
+struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const src_t *>(ctx.srcPtr);
+        auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        src_t lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<src_t>();
+
+        if (std::is_integral<src_t>::value
+            || ctx.interimPrc.is_float()) {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                for (size_t j = 0; j < current_batch_size; ++j)         // src_t -> fp32
+                    tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                for (size_t j = 0; j < current_batch_size; ++j)         // src_t -> fp32
+                    tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<typename dst_t>
+struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
+        auto dst = static_cast<dst_t *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        float lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<ov::float16>();
+
+        if (ctx.interimPrc.is_float()
+            || std::is_integral<dst_t>::value) {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // fp32 -> dst_t
+                    dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
+            });
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // fp32 -> dst_t
+                    dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<>
+struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
+        auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        float lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<ov::float16>();
+
+        if (ctx.interimPrc.is_float()) {
+            cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // truncate fp32
+                    tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
+    return to.bitsSize() < from.bitsSize()
+            || (from.is_float() && !to.is_float())      // float -> integral
+            || (from.isSigned() != to.isSigned())       // signed <-> unsigned
+            || (to == Precision::BOOL && from != to);   // T -> bool
+}
+
 }   // namespace
 
 #define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
 
-void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
-    using namespace MKLDNNPlugin;
+#define MKLDNN_CVT_LIST                                                                             \
+    MKLDNN_CVT(U8, I8),     MKLDNN_CVT(U8, U16),    MKLDNN_CVT(U8, I16),    MKLDNN_CVT(U8, U32),    \
+    MKLDNN_CVT(U8, I32),    MKLDNN_CVT(U8, U64),    MKLDNN_CVT(U8, I64),    MKLDNN_CVT(U8, FP32),   \
+    MKLDNN_CVT(U8, FP16),   MKLDNN_CVT(U8, BF16),   MKLDNN_CVT(U8, FP64),   MKLDNN_CVT(U8, BOOL),   \
+    MKLDNN_CVT(I8, U8),     MKLDNN_CVT(I8, U16),    MKLDNN_CVT(I8, I16),    MKLDNN_CVT(I8, U32),    \
+    MKLDNN_CVT(I8, I32),    MKLDNN_CVT(I8, U64),    MKLDNN_CVT(I8, I64),    MKLDNN_CVT(I8, FP32),   \
+    MKLDNN_CVT(I8, FP16),   MKLDNN_CVT(I8, BF16),   MKLDNN_CVT(I8, FP64),   MKLDNN_CVT(I8, BOOL),   \
+    MKLDNN_CVT(U16, U8),    MKLDNN_CVT(U16, I8),    MKLDNN_CVT(U16, I16),   MKLDNN_CVT(U16, U32),   \
+    MKLDNN_CVT(U16, I32),   MKLDNN_CVT(U16, U64),   MKLDNN_CVT(U16, I64),   MKLDNN_CVT(U16, FP32),  \
+    MKLDNN_CVT(U16, FP16),  MKLDNN_CVT(U16, BF16),  MKLDNN_CVT(U16, FP64),  MKLDNN_CVT(U16, BOOL),  \
+    MKLDNN_CVT(I16, U8),    MKLDNN_CVT(I16, I8),    MKLDNN_CVT(I16, U16),   MKLDNN_CVT(I16, U32),   \
+    MKLDNN_CVT(I16, I32),   MKLDNN_CVT(I16, U64),   MKLDNN_CVT(I16, I64),   MKLDNN_CVT(I16, FP32),  \
+    MKLDNN_CVT(I16, FP16),  MKLDNN_CVT(I16, BF16),  MKLDNN_CVT(I16, FP64),  MKLDNN_CVT(I16, BOOL),  \
+    MKLDNN_CVT(U32, U8),    MKLDNN_CVT(U32, I8),    MKLDNN_CVT(U32, U16),   MKLDNN_CVT(U32, I16),   \
+    MKLDNN_CVT(U32, I32),   MKLDNN_CVT(U32, U64),   MKLDNN_CVT(U32, I64),   MKLDNN_CVT(U32, FP32),  \
+    MKLDNN_CVT(U32, FP16),  MKLDNN_CVT(U32, BF16),  MKLDNN_CVT(U32, FP64),  MKLDNN_CVT(U32, BOOL),  \
+    MKLDNN_CVT(I32, U8),    MKLDNN_CVT(I32, I8),    MKLDNN_CVT(I32, U16),   MKLDNN_CVT(I32, I16),   \
+    MKLDNN_CVT(I32, U32),   MKLDNN_CVT(I32, U64),   MKLDNN_CVT(I32, I64),   MKLDNN_CVT(I32, FP32),  \
+    MKLDNN_CVT(I32, FP16),  MKLDNN_CVT(I32, BF16),  MKLDNN_CVT(I32, FP64),  MKLDNN_CVT(I32, BOOL),  \
+    MKLDNN_CVT(U64, U8),    MKLDNN_CVT(U64, I8),    MKLDNN_CVT(U64, U16),   MKLDNN_CVT(U64, I16),   \
+    MKLDNN_CVT(U64, U32),   MKLDNN_CVT(U64, I32),   MKLDNN_CVT(U64, I64),   MKLDNN_CVT(U64, FP32),  \
+    MKLDNN_CVT(U64, FP16),  MKLDNN_CVT(U64, BF16),  MKLDNN_CVT(U64, FP64),  MKLDNN_CVT(U64, BOOL),  \
+    MKLDNN_CVT(I64, U8),    MKLDNN_CVT(I64, I8),    MKLDNN_CVT(I64, U16),   MKLDNN_CVT(I64, I16),   \
+    MKLDNN_CVT(I64, U32),   MKLDNN_CVT(I64, I32),   MKLDNN_CVT(I64, U64),   MKLDNN_CVT(I64, FP32),  \
+    MKLDNN_CVT(I64, FP16),  MKLDNN_CVT(I64, BF16),  MKLDNN_CVT(I64, FP64),  MKLDNN_CVT(I64, BOOL),  \
+    MKLDNN_CVT(FP32, U8),   MKLDNN_CVT(FP32, I8),   MKLDNN_CVT(FP32, U16),  MKLDNN_CVT(FP32, I16),  \
+    MKLDNN_CVT(FP32, U32),  MKLDNN_CVT(FP32, I32),  MKLDNN_CVT(FP32, U64),  MKLDNN_CVT(FP32, I64),  \
+    MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
+    MKLDNN_CVT(FP16, U8),   MKLDNN_CVT(FP16, I8),   MKLDNN_CVT(FP16, U16),  MKLDNN_CVT(FP16, I16),  \
+    MKLDNN_CVT(FP16, U32),  MKLDNN_CVT(FP16, I32),  MKLDNN_CVT(FP16, U64),  MKLDNN_CVT(FP16, I64),  \
+    MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
+    MKLDNN_CVT(BF16, U8),   MKLDNN_CVT(BF16, I8),   MKLDNN_CVT(BF16, U16),  MKLDNN_CVT(BF16, I16),  \
+    MKLDNN_CVT(BF16, U32),  MKLDNN_CVT(BF16, I32),  MKLDNN_CVT(BF16, U64),  MKLDNN_CVT(BF16, I64),  \
+    MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
+    MKLDNN_CVT(FP64, U8),   MKLDNN_CVT(FP64, I8),   MKLDNN_CVT(FP64, U16),  MKLDNN_CVT(FP64, I16),  \
+    MKLDNN_CVT(FP64, U32),  MKLDNN_CVT(FP64, I32),  MKLDNN_CVT(FP64, U64),  MKLDNN_CVT(FP64, I64),  \
+    MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
+    MKLDNN_CVT(BOOL, U8),   MKLDNN_CVT(BOOL, I8),   MKLDNN_CVT(BOOL, U16),  MKLDNN_CVT(BOOL, I16),  \
+    MKLDNN_CVT(BOOL, U32),  MKLDNN_CVT(BOOL, I32),  MKLDNN_CVT(BOOL, U64),  MKLDNN_CVT(BOOL, I64),  \
+    MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
+    MKLDNN_CVT(U8, U8),     MKLDNN_CVT(I8, I8),     MKLDNN_CVT(U16, U16),   MKLDNN_CVT(I16, I16),   \
+    MKLDNN_CVT(U32, U32),   MKLDNN_CVT(I32, I32),   MKLDNN_CVT(U64, U64),   MKLDNN_CVT(I64, I64),   \
+    MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
+    MKLDNN_CVT(BOOL, BOOL)
 
+void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
+    cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
+}
+
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision interimPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size) {
     if (srcPtr == nullptr || dstPtr == nullptr)
         IE_THROW() << "cpu_convert has null data pointer";
 
-    if (srcPrc == dstPrc) {
-        cpu_memcpy(dstPtr, srcPtr, size*dstPrc.size());
-        return;
+    if (srcPrc == dstPrc && srcPrc == interimPrc) {
+        cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
+    } else {
+        ConvertContext ctx = {
+            srcPtr,
+            dstPtr,
+            size,
+            interimPrc,
+            dstPrc,
+            false
+        };
+        OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
+        if (!ctx.converted)
+            IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
     }
-
-    ConvertContext ctx = { srcPtr, dstPtr, size, false };
-
-    OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
-    MKLDNN_CVT(U8, I8),    MKLDNN_CVT(U8, U16),    MKLDNN_CVT(U8, I16),
-    MKLDNN_CVT(U8, I32),   MKLDNN_CVT(U8, U64),    MKLDNN_CVT(U8, I64),
-    MKLDNN_CVT(U8, FP32),  MKLDNN_CVT(U8, BF16),   MKLDNN_CVT(U8, BOOL),
-    MKLDNN_CVT(I8, U8),    MKLDNN_CVT(I8, U16),    MKLDNN_CVT(I8, I16),
-    MKLDNN_CVT(I8, I32),   MKLDNN_CVT(I8, U64),    MKLDNN_CVT(I8, I64),
-    MKLDNN_CVT(I8, FP32),  MKLDNN_CVT(I8, BF16),   MKLDNN_CVT(I8, BOOL),
-    MKLDNN_CVT(U16, U8),   MKLDNN_CVT(U16, I8),    MKLDNN_CVT(U16, I16),
-    MKLDNN_CVT(U16, I32),  MKLDNN_CVT(U16, U64),   MKLDNN_CVT(U16, I64),
-    MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16),  MKLDNN_CVT(U16, BOOL),
-    MKLDNN_CVT(I16, U8),   MKLDNN_CVT(I16, I8),    MKLDNN_CVT(I16, U16),
-    MKLDNN_CVT(I16, I32),  MKLDNN_CVT(I16, U64),   MKLDNN_CVT(I16, I64),
-    MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16),  MKLDNN_CVT(I16, BOOL),
-    MKLDNN_CVT(I32, U8),   MKLDNN_CVT(I32, I8),    MKLDNN_CVT(I32, U16),
-    MKLDNN_CVT(I32, I16),  MKLDNN_CVT(I32, U64),   MKLDNN_CVT(I32, I64),
-    MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16),  MKLDNN_CVT(I32, BOOL),
-    MKLDNN_CVT(U64, U8),   MKLDNN_CVT(U64, I8),    MKLDNN_CVT(U64, U16),
-    MKLDNN_CVT(U64, I16),  MKLDNN_CVT(U64, I32),   MKLDNN_CVT(U64, I64),
-    MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16),  MKLDNN_CVT(U64, BOOL),
-    MKLDNN_CVT(I64, U8),   MKLDNN_CVT(I64, I8),    MKLDNN_CVT(I64, U16),
-    MKLDNN_CVT(I64, I16),  MKLDNN_CVT(I64, I32),   MKLDNN_CVT(I64, U64),
-    MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16),  MKLDNN_CVT(I64, BOOL),
-    MKLDNN_CVT(FP32, U8),  MKLDNN_CVT(FP32, I8),   MKLDNN_CVT(FP32, U16),
-    MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32),  MKLDNN_CVT(FP32, U64),
-    MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
-    MKLDNN_CVT(BF16, U8),  MKLDNN_CVT(BF16, I8),   MKLDNN_CVT(BF16, U16),
-    MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32),  MKLDNN_CVT(BF16, U64),
-    MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
-    MKLDNN_CVT(BOOL, U8),  MKLDNN_CVT(BOOL, I8),   MKLDNN_CVT(BOOL, U16),
-    MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32),  MKLDNN_CVT(BOOL, U64),
-    MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
-    MKLDNN_CVT(FP64, U8),  MKLDNN_CVT(FP64, I8),   MKLDNN_CVT(FP64, U16),
-    MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32),  MKLDNN_CVT(FP64, U64),
-    MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
-    MKLDNN_CVT(U32, U8),  MKLDNN_CVT(U32, I8),   MKLDNN_CVT(U32, U16),
-    MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32),  MKLDNN_CVT(U32, U64),
-    MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
-
-    if (!ctx.converted)
-        IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
 }
 
 #undef MKLDNN_CVT
+#undef MKLDNN_CVT_LIST
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h
index dd4ef59a38b..8ed46cab7a0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h
@@ -19,5 +19,32 @@
  * number of elements in buffers to be converted
  * @return none.
  */
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size);
 
-void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
+/**
+ * @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
+ * If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
+ * @param srcPtr
+ * pointer to the buffer to convert from
+ * @param dstPtr
+ * pointer to the buffer to convert to
+ * @param srcPrc
+ * precision the buffer from which convert
+ * @param interimPrc
+ * intermediate precision used for type truncation
+ * @param dstPrc
+ * precision the buffer to which convert
+ * @param size
+ * number of elements in buffers to be converted
+ * @return none.
+ */
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision interimPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
index 2faf969c1ea..2a20f45ff29 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -7,7 +7,8 @@
 #include "common/cpu_convert.h"
 #include "common/blocked_desc_creator.h"
 #include <ngraph/opsets/opset1.hpp>
-#include "utils/ngraph_utils.hpp"
+#include <ie_ngraph_utils.hpp>
+#include <utils/ngraph_utils.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
     return true;
 }
 
-MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(op, eng, cache) {
+MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
     std::string errorMessage;
     if (isSupportedOperation(op, errorMessage)) {
         errorPrefix = "Convert node with name '" + getName() + "'";
     } else {
         IE_THROW(NotImplemented) << errorMessage;
     }
+
+    auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
+    origPrc = details::convertPrecision(convert->get_destination_type());
 }
 
 std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
@@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
 
 MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
                                      const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode("Convert", nodeName, eng, cache) {
+        : MKLDNNNode("Convert", nodeName, eng, cache)
+        , origPrc(outPrc) {
     inputShapes.push_back(shape);
     addOriginalInputPrecision(inPrc);
     outputShapes.push_back(shape);
@@ -147,7 +152,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
 
     void* srcPtr = parentMem.GetPtr();
     void* dstPtr = childMem.GetPtr();
-    cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
+
+    cpu_convert(srcPtr,
+                dstPtr,
+                parentMem.getDesc().getPrecision(),
+                origPrc,
+                childMem.getDesc().getPrecision(),
+                parentPaddElemCount);
 }
 
 bool MKLDNNConvertNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
index bffb2447280..08042187788 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@@ -49,6 +49,7 @@ public:
 private:
     MemoryDescPtr input;
     MemoryDescPtr output;
+    InferenceEngine::Precision origPrc;
 
     std::string errorPrefix;
 };
diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py
index a3f88676852..7385c5057ef 100644
--- a/src/bindings/python/tests/test_onnx/test_backend.py
+++ b/src/bindings/python/tests/test_onnx/test_backend.py
@@ -101,16 +101,9 @@ tests_expected_to_fail = [
     (
         xfail_issue_FLOAT_LIKE,
         "OnnxBackendNodeModelTest.test_cast_BFLOAT16_to_FLOAT_cpu",
-        "OnnxBackendNodeModelTest.test_cast_FLOAT16_to_DOUBLE_cpu",
-        "OnnxBackendNodeModelTest.test_cast_FLOAT16_to_FLOAT_cpu",
         "OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu",
         "OnnxBackendNodeModelTest.test_castlike_BFLOAT16_to_FLOAT_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_DOUBLE_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_FLOAT_expanded_cpu",
         "OnnxBackendNodeModelTest.test_castlike_FLOAT_to_BFLOAT16_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_max_float16_cpu",
-        "OnnxBackendNodeModelTest.test_min_float16_cpu",
-        "OnnxBackendNodeModelTest.test_mod_mixed_sign_float16_cpu",
     ),
     (
         xfail_issue_49207,
diff --git a/src/bindings/python/tests/test_onnx/test_zoo_models.py b/src/bindings/python/tests/test_onnx/test_zoo_models.py
index ee16fa4724a..ac455d76168 100644
--- a/src/bindings/python/tests/test_onnx/test_zoo_models.py
+++ b/src/bindings/python/tests/test_onnx/test_zoo_models.py
@@ -171,12 +171,6 @@ if len(zoo_models) > 0:
     test_cases = backend_test.test_cases["OnnxBackendModelExecutionTest"]
     if tests.MODEL_ZOO_XFAIL:
         execution_xfail_list = [
-            # New Python API - fp16 blob
-            (xfail_issue_67415, "test_MSFT_opset7_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
-            (xfail_issue_67415, "test_MSFT_opset7_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
-            (xfail_issue_67415, "test_MSFT_opset8_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
-            (xfail_issue_67415, "test_MSFT_opset8_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
-
             # ONNX Model Zoo
             (xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_encoder_12_t5_encoder_cpu"),
             (xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_decoder_with_lm_head_12_t5_decoder_with_lm_head_cpu"),
diff --git a/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp b/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp
index a3f27eb8ea2..945f06ef6e8 100644
--- a/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp
+++ b/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp
@@ -309,16 +309,15 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
     OPENVINO_ASSERT(outputsInfo.size() == function->get_output_size());
 
     for (const auto& param : function->get_parameters()) {
-        auto new_param = param->copy_with_new_inputs({});
+        auto new_param = ov::as_type_ptr<ov::op::v0::Parameter>(param->copy_with_new_inputs({}));
         new_param->set_friendly_name(param->get_friendly_name());
         if (add_operation_names)
             new_param->output(0).get_tensor().add_names({new_param->get_friendly_name()});
         // WA: use CNNNetwork's precisions since plugins sometimes override their precisions
         // after transformation pipeline is run
-        new_param->set_output_type(
-            0,
-            InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()),
-            new_param->get_output_partial_shape(0));
+        new_param->set_element_type(
+            InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()));
+        new_param->validate_and_infer_types();
         const_params.emplace_back(new_param);
     }
     for (const auto& result : function->get_results()) {
@@ -326,10 +325,9 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
                                                                   result->get_output_partial_shape(0));
         const std::string param_name = ngraph::op::util::create_ie_output_name(result->input_value(0));
         fake_param->set_friendly_name(param_name);
-        fake_param->set_output_type(
-            0,
-            InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()),
-            fake_param->get_output_partial_shape(0));
+        fake_param->set_element_type(
+            InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()));
+        fake_param->validate_and_infer_types();
         auto new_result = result->copy_with_new_inputs({fake_param});
         new_result->set_friendly_name(result->get_friendly_name());
         if (add_operation_names) {
diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp
index 5e27ee86b77..641d25dacb8 100644
--- a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp
+++ b/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp
@@ -8,8 +8,15 @@
 using namespace BehaviorTestsDefinitions;
 using namespace InferenceEngine;
 
-const std::vector<Precision> precisionSet = {Precision::FP32, Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, Precision::BOOL,
-                                             Precision::I64, Precision::U64};
+const std::vector<Precision> precisionSet = {
+    Precision::U8,   Precision::I8,
+    Precision::U16,  Precision::I16,
+    Precision::U32,  Precision::I32,
+    Precision::U64,  Precision::I64,
+    Precision::BF16, Precision::FP16,
+    Precision::FP32, Precision::FP64,
+    Precision::BOOL
+};
 
 const std::vector<setType> typeSet = {setType::INPUT, setType::OUTPUT, setType::BOTH};
 
diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp
index 03be6f86285..868f22db789 100644
--- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp
+++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp
@@ -18,24 +18,26 @@ const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
 const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
-    // Ticket: 59594
-    // InferenceEngine::Precision::I4,
-    InferenceEngine::Precision::I8,
-    InferenceEngine::Precision::I16,
-    InferenceEngine::Precision::I32,
-    InferenceEngine::Precision::I64,
-    // Ticket: 59594
-    // InferenceEngine::Precision::BIN,
-    // InferenceEngine::Precision::BOOL,
-    // InferenceEngine::Precision::U4,
     InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I8,
     InferenceEngine::Precision::U16,
-    // Ticket: 59594
-    // InferenceEngine::Precision::U32,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::U32,
+    InferenceEngine::Precision::I32,
     InferenceEngine::Precision::U64,
+    InferenceEngine::Precision::I64,
     InferenceEngine::Precision::BF16,
     InferenceEngine::Precision::FP16,
-    InferenceEngine::Precision::FP32};
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP64,
+    InferenceEngine::Precision::BOOL,
+    InferenceEngine::Precision::MIXED,
+    InferenceEngine::Precision::Q78,
+    InferenceEngine::Precision::U4,
+    InferenceEngine::Precision::I4,
+    InferenceEngine::Precision::BIN,
+    InferenceEngine::Precision::CUSTOM,
+};
 
 INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest,
                          ConversionLayerTest,
diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index e9d0e22bc4e..b6857ff3671 100644
--- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -104,17 +104,6 @@ std::vector<std::string> disabledTestPatterns() {
 
         // CPU plugin does not support some precisions
         R"(smoke_CachingSupportCase_CPU/LoadNetworkCacheTestBase.CompareWithRefImpl/ReadConcatSplitAssign_f32_batch1_CPU)",
-        // CPU plugin does not support some precisions
-        R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(i8|u32).*)",
-        R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(f16).*)",
-        R"(.*EltwiseLayerTest.*NetType=f16.*)",
-
-        // TODO: CVS-66526 overrides i/o precisions in execution graph
-        // as WA we used GetInputsInfo() precisions instead of ngraph ones
-        // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i16|u16).*)",
-        // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i64|u64).*)",
-        // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i16|u16).*)",
-        // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i64|u64).*)",
 
         // CPU does not support dynamic rank
         // Issue: CVS-66778
@@ -168,7 +157,18 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)",
         // Issue: 71756
         R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)",
-        R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)"
+        R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)",
+        // Issue: 72150
+        R"(.*smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=.*_Device=CPU_PrecisionInNet=BOOL.*)",
+        // Issue: 59594
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BOOL.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*MIXED.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*Q78.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*U4.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*I4.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BIN.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*CUSTOM.*)",
+        R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*UNSPECIFIED.*)",
     };
 
 #define FIX_62820 0
diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp
index 65aac7c4766..2478213bd7d 100644
--- a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp
@@ -108,11 +108,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) {
               importedExecNet.input(0).get_tensor().get_partial_shape());
     EXPECT_EQ(function->input(0).get_tensor().get_element_type(),
               importedExecNet.input(0).get_tensor().get_element_type());
+    EXPECT_EQ(function->input(0).get_element_type(),
+              importedExecNet.input(0).get_tensor().get_element_type());
     EXPECT_EQ(function->input(1).get_tensor().get_names(), importedExecNet.input(1).get_tensor().get_names());
     EXPECT_EQ(function->input(1).get_tensor().get_partial_shape(),
               importedExecNet.input(1).get_tensor().get_partial_shape());
     EXPECT_EQ(function->input(1).get_tensor().get_element_type(),
               importedExecNet.input(1).get_tensor().get_element_type());
+    EXPECT_EQ(function->input(1).get_element_type(),
+              importedExecNet.input(1).get_tensor().get_element_type());
     EXPECT_EQ(importedExecNet.input(0).get_node(), importedExecNet.input("data1").get_node());
     EXPECT_NE(importedExecNet.input(1).get_node(), importedExecNet.input("data1").get_node());
     EXPECT_EQ(importedExecNet.input(1).get_node(), importedExecNet.input("data2").get_node());
@@ -125,11 +129,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) {
               importedExecNet.output(0).get_tensor().get_partial_shape());
     EXPECT_EQ(function->output(0).get_tensor().get_element_type(),
               importedExecNet.output(0).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(0).get_element_type(),
+              importedExecNet.output(0).get_tensor().get_element_type());
     EXPECT_EQ(function->output(1).get_tensor().get_names(), importedExecNet.output(1).get_tensor().get_names());
     EXPECT_EQ(function->output(1).get_tensor().get_partial_shape(),
               importedExecNet.output(1).get_tensor().get_partial_shape());
     EXPECT_EQ(function->output(1).get_tensor().get_element_type(),
               importedExecNet.output(1).get_tensor().get_element_type());
+    EXPECT_EQ(function->output(1).get_element_type(),
+              importedExecNet.output(1).get_tensor().get_element_type());
     EXPECT_EQ(importedExecNet.output(0).get_node(), importedExecNet.output("relu").get_node());
     EXPECT_NE(importedExecNet.output(1).get_node(), importedExecNet.output("relu").get_node());
     EXPECT_EQ(importedExecNet.output(1).get_node(), importedExecNet.output("concat").get_node());
diff --git a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp b/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp
index 073b78c66c5..ad8e0555683 100644
--- a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp
+++ b/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp
@@ -43,15 +43,19 @@ std::string SetBlobTest::getTestCaseName(testing::TestParamInfo<SetBlobParams> o
 inline void fillBlob(Blob::Ptr &blob) {
     switch (blob->getTensorDesc().getPrecision()) {
 #define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob); break;
-        CASE(InferenceEngine::Precision::FP32)
-        CASE(InferenceEngine::Precision::U8)
-        CASE(InferenceEngine::Precision::U16)
-        CASE(InferenceEngine::Precision::I8)
-        CASE(InferenceEngine::Precision::I16)
-        CASE(InferenceEngine::Precision::I64)
-        CASE(InferenceEngine::Precision::U64)
-        CASE(InferenceEngine::Precision::I32)
-        CASE(InferenceEngine::Precision::BOOL)
+    CASE(Precision::U8)
+    CASE(Precision::I8)
+    CASE(Precision::U16)
+    CASE(Precision::I16)
+    CASE(Precision::U32)
+    CASE(Precision::I32)
+    CASE(Precision::U64)
+    CASE(Precision::I64)
+    CASE(Precision::BF16)
+    CASE(Precision::FP16)
+    CASE(Precision::FP32)
+    CASE(Precision::FP64)
+    CASE(Precision::BOOL)
 #undef CASE
         default:
             IE_THROW() << "Can't fill blob with precision: " << blob->getTensorDesc().getPrecision();
diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
index 67dc3d5cb2c..7292ae0eb78 100644
--- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -131,54 +131,54 @@ inline void callCompare(const std::pair<ngraph::element::Type, std::vector<std::
                         const T_IE* actualBuffer, size_t size, float threshold, float abs_threshold) {
     auto expectedBuffer = expected.second.data();
     switch (expected.first) {
-        case ngraph::element::Type_t::i64:
-            LayerTestsCommon::Compare<T_IE, int64_t>(reinterpret_cast<const int64_t *>(expectedBuffer),
-                                                     actualBuffer, size, threshold, abs_threshold);
-            break;
-        case ngraph::element::Type_t::i32:
-            LayerTestsCommon::Compare<T_IE, int32_t>(reinterpret_cast<const int32_t *>(expectedBuffer),
-                                                     actualBuffer, size, threshold, abs_threshold);
-            break;
-        case ngraph::element::Type_t::i16:
-            LayerTestsCommon::Compare<T_IE, int16_t>(reinterpret_cast<const int16_t *>(expectedBuffer),
+        case ngraph::element::Type_t::boolean:
+        case ngraph::element::Type_t::u8:
+            LayerTestsCommon::Compare<T_IE, uint8_t>(reinterpret_cast<const uint8_t *>(expectedBuffer),
                                                      actualBuffer, size, threshold, abs_threshold);
             break;
         case ngraph::element::Type_t::i8:
             LayerTestsCommon::Compare<T_IE, int8_t>(reinterpret_cast<const int8_t *>(expectedBuffer),
                                                     actualBuffer, size, threshold, abs_threshold);
             break;
-        case ngraph::element::Type_t::u64:
-            LayerTestsCommon::Compare<T_IE, uint64_t>(reinterpret_cast<const uint64_t *>(expectedBuffer),
+        case ngraph::element::Type_t::u16:
+            LayerTestsCommon::Compare<T_IE, uint16_t>(reinterpret_cast<const uint16_t *>(expectedBuffer),
                                                       actualBuffer, size, threshold, abs_threshold);
             break;
+        case ngraph::element::Type_t::i16:
+            LayerTestsCommon::Compare<T_IE, int16_t>(reinterpret_cast<const int16_t *>(expectedBuffer),
+                                                     actualBuffer, size, threshold, abs_threshold);
+            break;
         case ngraph::element::Type_t::u32:
             LayerTestsCommon::Compare<T_IE, uint32_t>(reinterpret_cast<const uint32_t *>(expectedBuffer),
                                                       actualBuffer, size, threshold, abs_threshold);
             break;
-        case ngraph::element::Type_t::u16:
-            LayerTestsCommon::Compare<T_IE, uint16_t>(reinterpret_cast<const uint16_t *>(expectedBuffer),
-                                                      actualBuffer, size, threshold, abs_threshold);
-            break;
-        case ngraph::element::Type_t::boolean:
-        case ngraph::element::Type_t::u8:
-            LayerTestsCommon::Compare<T_IE, uint8_t>(reinterpret_cast<const uint8_t *>(expectedBuffer),
+        case ngraph::element::Type_t::i32:
+            LayerTestsCommon::Compare<T_IE, int32_t>(reinterpret_cast<const int32_t *>(expectedBuffer),
                                                      actualBuffer, size, threshold, abs_threshold);
             break;
-        case ngraph::element::Type_t::f64:
-            LayerTestsCommon::Compare<T_IE, double>(reinterpret_cast<const double *>(expectedBuffer),
-                                                   actualBuffer, size, threshold, abs_threshold);
+        case ngraph::element::Type_t::u64:
+            LayerTestsCommon::Compare<T_IE, uint64_t>(reinterpret_cast<const uint64_t *>(expectedBuffer),
+                                                      actualBuffer, size, threshold, abs_threshold);
             break;
-        case ngraph::element::Type_t::f32:
-            LayerTestsCommon::Compare<T_IE, float>(reinterpret_cast<const float *>(expectedBuffer),
-                                                   actualBuffer, size, threshold, abs_threshold);
+        case ngraph::element::Type_t::i64:
+            LayerTestsCommon::Compare<T_IE, int64_t>(reinterpret_cast<const int64_t *>(expectedBuffer),
+                                                     actualBuffer, size, threshold, abs_threshold);
+            break;
+        case ngraph::element::Type_t::bf16:
+            LayerTestsCommon::Compare<T_IE, ngraph::bfloat16>(reinterpret_cast<const ngraph::bfloat16 *>(expectedBuffer),
+                                                              actualBuffer, size, threshold, abs_threshold);
             break;
         case ngraph::element::Type_t::f16:
             LayerTestsCommon::Compare<T_IE, ngraph::float16>(reinterpret_cast<const ngraph::float16 *>(expectedBuffer),
                                                              actualBuffer, size, threshold, abs_threshold);
             break;
-        case ngraph::element::Type_t::bf16:
-            LayerTestsCommon::Compare<T_IE, ngraph::bfloat16>(reinterpret_cast<const ngraph::bfloat16 *>(expectedBuffer),
-                                                              actualBuffer, size, threshold, abs_threshold);
+        case ngraph::element::Type_t::f32:
+            LayerTestsCommon::Compare<T_IE, float>(reinterpret_cast<const float *>(expectedBuffer),
+                                                   actualBuffer, size, threshold, abs_threshold);
+            break;
+        case ngraph::element::Type_t::f64:
+            LayerTestsCommon::Compare<T_IE, double>(reinterpret_cast<const double *>(expectedBuffer),
+                                                   actualBuffer, size, threshold, abs_threshold);
             break;
         case ngraph::element::Type_t::i4: {
             auto expectedOut = ngraph::helpers::convertOutputPrecision(
@@ -230,14 +230,9 @@ void LayerTestsCommon::Compare(const std::pair<ngraph::element::Type, std::vecto
 
     const auto &size = actual->size();
     switch (precision) {
-        case InferenceEngine::Precision::FP32:
-            callCompare<float>(expected, reinterpret_cast<const float *>(actualBuffer), size, threshold, abs_threshold);
-            break;
-        case InferenceEngine::Precision::I32:
-            callCompare<int32_t>(expected, reinterpret_cast<const int32_t *>(actualBuffer), size, threshold, abs_threshold);
-            break;
-        case InferenceEngine::Precision::I64:
-            callCompare<int64_t>(expected, reinterpret_cast<const int64_t *>(actualBuffer), size, threshold, abs_threshold);
+        case InferenceEngine::Precision::BOOL:
+        case InferenceEngine::Precision::U8:
+            callCompare<uint8_t>(expected, reinterpret_cast<const uint8_t *>(actualBuffer), size, threshold, abs_threshold);
             break;
         case InferenceEngine::Precision::I8:
             callCompare<int8_t>(expected, reinterpret_cast<const int8_t *>(actualBuffer), size, threshold, abs_threshold);
@@ -248,19 +243,30 @@ void LayerTestsCommon::Compare(const std::pair<ngraph::element::Type, std::vecto
         case InferenceEngine::Precision::I16:
             callCompare<int16_t>(expected, reinterpret_cast<const int16_t *>(actualBuffer), size, threshold, abs_threshold);
             break;
-        case InferenceEngine::Precision::BOOL:
-        case InferenceEngine::Precision::U8:
-            callCompare<uint8_t>(expected, reinterpret_cast<const uint8_t *>(actualBuffer), size, threshold, abs_threshold);
+        case InferenceEngine::Precision::U32:
+            callCompare<uint32_t>(expected, reinterpret_cast<const uint32_t *>(actualBuffer), size, threshold, abs_threshold);
+            break;
+        case InferenceEngine::Precision::I32:
+            callCompare<int32_t>(expected, reinterpret_cast<const int32_t *>(actualBuffer), size, threshold, abs_threshold);
             break;
         case InferenceEngine::Precision::U64:
             callCompare<uint64_t>(expected, reinterpret_cast<const uint64_t *>(actualBuffer), size, threshold, abs_threshold);
             break;
+        case InferenceEngine::Precision::I64:
+            callCompare<int64_t>(expected, reinterpret_cast<const int64_t *>(actualBuffer), size, threshold, abs_threshold);
+            break;
         case InferenceEngine::Precision::BF16:
             callCompare<ngraph::bfloat16>(expected, reinterpret_cast<const ngraph::bfloat16 *>(actualBuffer), size, threshold, abs_threshold);
             break;
         case InferenceEngine::Precision::FP16:
             callCompare<ngraph::float16>(expected, reinterpret_cast<const ngraph::float16 *>(actualBuffer), size, threshold, abs_threshold);
             break;
+        case InferenceEngine::Precision::FP32:
+            callCompare<float>(expected, reinterpret_cast<const float *>(actualBuffer), size, threshold, abs_threshold);
+            break;
+        case InferenceEngine::Precision::FP64:
+            callCompare<double>(expected, reinterpret_cast<const double *>(actualBuffer), size, threshold, abs_threshold);
+            break;
         default:
             FAIL() << "Comparator for " << precision << " precision isn't supported";
     }
diff --git a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp
index 0c5e7a387a7..1320abbd362 100644
--- a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp
@@ -28,6 +28,9 @@ std::string ConversionLayerTest::getTestCaseName(const testing::TestParamInfo<Co
 }
 
 void ConversionLayerTest::SetUp() {
+    if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) {
+        GTEST_SKIP() << "Disabled test due to configuration" << std::endl;
+    }
     ngraph::helpers::ConversionTypes conversionOpType;
     InferenceEngine::Precision inputPrecision, targetPrecision;
     std::vector<std::vector<size_t>> inputShape;

From bd2e3de2953e4e379ec5cc41949a8c1af9088a49 Mon Sep 17 00:00:00 2001
From: Steve Yoo <steve.yoo@intel.com>
Date: Thu, 16 Dec 2021 23:48:10 +0900
Subject: [PATCH 27/27] Create LSTMCell-1 (#9073)

---
 .../template_plugin/backend/evaluates_map.cpp |  24 +
 .../template_plugin/backend/opset_int_tbl.hpp |   1 +
 .../functional/op_reference/lstm_cell.cpp     | 589 ++++++++++++++----
 3 files changed, 477 insertions(+), 137 deletions(-)

diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp
index 789cff5b4b1..c6a864f9727 100644
--- a/docs/template_plugin/backend/evaluates_map.cpp
+++ b/docs/template_plugin/backend/evaluates_map.cpp
@@ -1961,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
     return true;
 }
 
+template <element::Type_t ET>
+bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
+    using T = typename element_type_traits<ET>::value_type;
+    runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
+                                     inputs[0]->get_shape(),
+                                     inputs[1]->get_data_ptr<ET>(),
+                                     inputs[1]->get_shape(),
+                                     inputs[2]->get_data_ptr<ET>(),
+                                     inputs[2]->get_shape(),
+                                     inputs[3]->get_data_ptr<ET>(),
+                                     inputs[3]->get_shape(),
+                                     inputs[4]->get_data_ptr<ET>(),
+                                     inputs[4]->get_shape(),
+                                     inputs[5]->get_data_ptr<ET>(),
+                                     inputs[5]->get_shape(),
+                                     outputs[0]->get_data_ptr<ET>(),
+                                     outputs[1]->get_data_ptr<ET>(),
+                                     op->get_activations()[0],
+                                     op->get_activations()[1],
+                                     op->get_activations()[2],
+                                     op->get_clip());
+    return true;
+}
+
 template <element::Type_t ET>
 bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
     using T = typename element_type_traits<ET>::value_type;
diff --git a/docs/template_plugin/backend/opset_int_tbl.hpp b/docs/template_plugin/backend/opset_int_tbl.hpp
index fe1230d79d4..287bf9a0d11 100644
--- a/docs/template_plugin/backend/opset_int_tbl.hpp
+++ b/docs/template_plugin/backend/opset_int_tbl.hpp
@@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
 NGRAPH_OP(GRN, op::v0)
 NGRAPH_OP(HardSigmoid, op::v0)
 NGRAPH_OP(LRN, ngraph::op::v0)
+NGRAPH_OP(LSTMCell, op::v0)
 NGRAPH_OP(MVN, ngraph::op::v0)
 NGRAPH_OP(NormalizeL2, op::v0)
 NGRAPH_OP(PriorBox, ngraph::op::v0)
diff --git a/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp b/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp
index c28bbf0ba94..493224da17b 100644
--- a/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp
@@ -4,7 +4,8 @@
 
 #include <gtest/gtest.h>
 
-#include "openvino/op/lstm_cell.hpp"
+#include "openvino/opsets/opset4.hpp"
+#include "openvino/opsets/opset1.hpp"
 #include "base_reference_test.hpp"
 
 using namespace reference_tests;
@@ -12,13 +13,6 @@ using namespace ov;
 
 namespace {
 struct LSTMCellParams {
-    LSTMCellParams(
-        int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
-        const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
-        const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
-        batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
-        X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
-
     int32_t batchSize;
     int32_t inputSize;
     int32_t hiddenSize;
@@ -34,6 +28,22 @@ struct LSTMCellParams {
     std::string testcaseName;
 };
 
+struct Builder : ParamsBuilder<LSTMCellParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
 class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
 public:
     void SetUp() override {
@@ -63,26 +73,24 @@ public:
         result << "_hoType=" << param.Ho.type;
         result << "_hoShape=" << param.Ho.shape;
         result << "_coType=" << param.Co.type;
+        result << "_coShape=" << param.Co.shape;
         if (param.testcaseName != "") {
-            result << "_coShape=" << param.Co.shape;
             result << "_=" << param.testcaseName;
-        } else {
-            result << "_coShape=" << param.Co.shape;
         }
         return result.str();
     }
 
 private:
     static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
 
         const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                                H_t,
                                                C_t,
                                                op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@@ -107,15 +115,15 @@ public:
 
 private:
     static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
 
         const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                                H_t,
                                                C_t,
                                                op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@@ -142,15 +150,15 @@ private:
     static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
         const float clip_threshold = 3.5f;
 
-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
 
         const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                                H_t,
                                                C_t,
                                                W,
@@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
     Exec();
 }
 
+class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
+                                               params.hiddenSize);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
+                                               params.hiddenSize);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const float clip_threshold = 3.5f;
+
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               W,
+                                               R,
+                                               B,
+                                               params.hiddenSize,
+                                               op::LSTMWeightsFormat::IFCO,
+                                               std::vector<std::string>{"sigmoid", "tanh", "tanh"},
+                                               std::vector<float>{},
+                                               std::vector<float>{},
+                                               clip_threshold);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
+    Exec();
+}
+
 template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParams() {
     using T = typename element_type_traits<ET>::value_type;
     std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
-            "lstm_cell_zero_bias_default_attrs"),
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
+            0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
+            0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
+        .testcaseName("lstm_cell_zero_bias_default_attrs")
     };
     return params;
 }
@@ -232,53 +334,56 @@ template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
     using T = typename element_type_traits<ET>::value_type;
     std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
-                                               1.15248052f,
-                                               1.16671345f,
-                                               0.21450312f,
-                                               1.2380678f,
-                                               1.51688835f,
-                                               0.46718366f,
-                                               0.91810346f,
-                                               1.1274234f,
-                                               0.51022074f,
-                                               1.11389844f,
-                                               0.74174305f}),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
                                               0.76665538549423218,
                                               0.82509011030197144,
                                               0.6479143500328064,
                                               0.66586339473724365,
-                                              0.74838578701019287}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
                                               1.1150213479995728,
                                               1.4578367471694946,
                                               1.0649888515472412,
                                               0.93761754035949707,
-                                              1.3659683465957642}),
-            "lstm_cell_bias_default_attrs"),
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_bias_default_attrs"),
     };
     return params;
 }
@@ -302,53 +407,56 @@ template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParamsBiasClip() {
     using T = typename element_type_traits<ET>::value_type;
     std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
-                                               1.15248052f,
-                                               1.16671345f,
-                                               0.21450312f,
-                                               1.2380678f,
-                                               1.51688835f,
-                                               0.46718366f,
-                                               0.91810346f,
-                                               1.1274234f,
-                                               0.51022074f,
-                                               1.11389844f,
-                                               0.74174305f}),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
                                               0.76665538549423218,
                                               0.82387429475784302,
                                               0.6479143500328064,
                                               0.66586339473724365,
-                                              0.74838578701019287}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
                                               1.1150213479995728,
                                               1.4510968923568726,
                                               1.0649888515472412,
                                               0.93761754035949707,
-                                              1.3659683465957642}),
-            "lstm_cell_bias_clip"),
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_bias_clip"),
     };
     return params;
 }
@@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe
 
 INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
     testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
-} // namespace
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
+            0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
+            0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
+        .testcaseName("lstm_cell_v1_zero_bias_default_attrs")
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsV1<element::Type_t::bf16>(),
+        generateParamsV1<element::Type_t::f16>(),
+        generateParamsV1<element::Type_t::f32>(),
+        generateParamsV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+                                              0.76665538549423218,
+                                              0.82509011030197144,
+                                              0.6479143500328064,
+                                              0.66586339473724365,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              1.1150213479995728,
+                                              1.4578367471694946,
+                                              1.0649888515472412,
+                                              0.93761754035949707,
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_v1_bias_default_attrs"),
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsBiasClipV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+                                              0.76665538549423218,
+                                              0.82387429475784302,
+                                              0.6479143500328064,
+                                              0.66586339473724365,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              1.1150213479995728,
+                                              1.4510968923568726,
+                                              1.0649888515472412,
+                                              0.93761754035949707,
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_v1_bias_clip"),
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsBiasClipV1<element::Type_t::bf16>(),
+        generateParamsBiasClipV1<element::Type_t::f16>(),
+        generateParamsBiasClipV1<element::Type_t::f32>(),
+        generateParamsBiasClipV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
+    testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
+    testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
+    testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+} // namespace
\ No newline at end of file