Merge remote-tracking branch 'upstream/master' into debian-packages

2021-12-08 18:06:01 +03:00
parent 95bbc6f1e8 92760949bf
commit aabd31a3a5
58 changed files with 1357 additions and 1123 deletions
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -287,8 +287,8 @@ if(ENABLE_INTEL_GNA)
            set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
        endif()
        if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "03.00.00.1377")
-            set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
+            set(GNA_VERSION "03.00.00.1455")
+            set(GNA_HASH "8ac1af18eb32777b00193f4f8c252ee4f8bd64a9069138b4a5aaeebd82ead464")
        endif()

        set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
@@ -63,10 +63,10 @@ Caffe*-specific parameters:
  -k K                  Path to CustomLayersMapping.xml to register custom
                        layers
  --mean_file MEAN_FILE, -mf MEAN_FILE
-                        Mean image to be used for the input. Should be a
+                        [DEPRECATED] Mean image to be used for the input. Should be a
                        binaryproto file
  --mean_file_offsets MEAN_FILE_OFFSETS, -mo MEAN_FILE_OFFSETS
-                        Mean image offsets to be used for the input
+                        [DEPRECATED] Mean image offsets to be used for the input
                        binaryproto file. When the mean image is bigger than
                        the expected input, it is cropped. By default, centers
                        of the input image and the mean image are the same and
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
@@ -42,7 +42,7 @@ To convert a Paddle\* model:
 Parameters to convert your model:

 * [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
-> **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are not supported in the current version of mo_paddle.
+> **NOTE:** `--scale`, `--scale_values`, `--mean_values` are not supported in the current version of mo_paddle.

 ### Example of Converting a Paddle* Model
 Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@@ -6,7 +6,7 @@ mo --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
 ```
 You need to have have write permissions for an output directory.

-> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
+> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).

 To adjust the conversion process, you may use general parameters defined in the [Converting a Model Using General Conversion Parameters](Converting_Model_General.md) and 
 Framework-specific parameters for:
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
@@ -151,7 +151,7 @@ Usually neural network models are trained with the normalized input data. This m
 
 In the first case, the Model Optimizer generates the IR with required pre-processing layers and Inference Engine samples may be used to infer the model. 
 
-In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. 
+In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`. 

 If both mean and scale values are specified, the mean is subtracted first and then scale is applied. Input values are *divided* by the scale value(s). 

--- a/inference-engine/src/mkldnn_plugin/utils/shape_inference/shape_inference.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/shape_inference/shape_inference.cpp
@@ -2,30 +2,23 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "shape_inference.hpp"
-
-#include <ngraph/runtime/host_tensor.hpp>
 #include <openvino/core/node.hpp>
+#include <ngraph/runtime/host_tensor.hpp>
 #include <openvino/opsets/opset1.hpp>
 #include <openvino/opsets/opset2.hpp>
 #include <openvino/opsets/opset4.hpp>
 #include <openvino/opsets/opset5.hpp>
 #include <openvino/opsets/opset6.hpp>
 #include <openvino/opsets/opset8.hpp>
-
-#include "assign_shape_inference.hpp"
-#include "convolution_shape_inference.hpp"
-#include "experimental_detectron_detection_output_shape_inference.hpp"
-#include "experimental_detectron_prior_grid_generator_shape_inference.hpp"
-#include "fake_quantize.hpp"
-#include "lstm_cell_shape_inference.hpp"
-#include "read_value_shape_inference.hpp"
-#include "reduce_shape_inference.hpp"
-#include "shape_inference.hpp"
-#include "shape_nodes.hpp"
 #include "static_shape.hpp"
-#include "tile_shape_inference.hpp"
 #include "utils.hpp"
+#include "shape_inference.hpp"
+#include "convolution_shape_inference.hpp"
+#include "reduce_shape_inference.hpp"
+#include "shape_nodes.hpp"
+#include "fake_quantize.hpp"
+#include "experimental_detectron_detection_output_shape_inference.hpp"
+

 void shape_inference(ov::Node* op,
                     const std::vector<ov::StaticShape>& input_shapes,
@@ -34,53 +27,44 @@ void shape_inference(ov::Node* op,
    if (auto node = ov::as_type<ov::opset8::Convolution>(op)) {
        ov::CoordinateDiff pads_begin, pads_end;
        bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 2);
-        OPENVINO_ASSERT(status,
-                        "Convolution shape inference doesn't have enough information to calculate static shapes");
+        OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes");
        shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::opset8::GroupConvolution>(op)) {
        ov::CoordinateDiff pads_begin, pads_end;
        bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 3);
-        OPENVINO_ASSERT(status,
-                        "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
+        OPENVINO_ASSERT(status, "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
        shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::opset8::ConvolutionBackpropData>(op)) {
        ov::CoordinateDiff pads_begin, pads_end;
        ov::StaticShape output_shape_input;
        if (node->get_input_size() == 3)
            get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
-        bool status =
-            resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
-        OPENVINO_ASSERT(
-            status,
-            "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
+        bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
+        OPENVINO_ASSERT(status, "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
        shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::opset8::GroupConvolutionBackpropData>(op)) {
        ov::CoordinateDiff pads_begin, pads_end;
        ov::StaticShape output_shape_input;
        if (node->get_input_size() == 3)
            get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
-        bool status =
-            resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
-        OPENVINO_ASSERT(
-            status,
-            "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
+        bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
+        OPENVINO_ASSERT(status, "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
        shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::op::util::ArithmeticReductionKeepDims>(op)) {
        shape_infer(node, input_shapes, output_shapes, constant_data);
    } else if (auto node = ov::as_type<ov::op::util::LogicalReductionKeepDims>(op)) {
        shape_infer(node, input_shapes, output_shapes, constant_data);
-    } else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) || ov::is_type<ov::opset1::Convert>(op) ||
-               ov::is_type<ov::opset1::Clamp>(op) || ov::is_type<ov::opset1::GRN>(op) ||
-               ov::is_type<ov::opset1::LRN>(op) || ov::is_type<ov::opset1::LogicalNot>(op) ||
-               ov::is_type<ov::opset4::Mish>(op) || ov::is_type<ov::opset2::MVN>(op) ||
-               ov::is_type<ov::opset6::MVN>(op) || ov::is_type<ov::opset1::PRelu>(op) ||
-               ov::is_type<ov::opset1::Relu>(op) || ov::is_type<ov::opset4::Swish>(op) ||
-               ov::is_type<ov::opset1::Softmax>(op) || ov::is_type<ov::opset1::Elu>(op) ||
-               ov::is_type<ov::opset5::Round>(op)) {
+    } else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) ||
+            ov::is_type<ov::opset1::Convert>(op) || ov::is_type<ov::opset1::Clamp>(op) ||
+            ov::is_type<ov::opset1::GRN>(op) || ov::is_type<ov::opset1::LRN>(op) ||
+            ov::is_type<ov::opset1::LogicalNot>(op) || ov::is_type<ov::opset4::Mish>(op) ||
+            ov::is_type<ov::opset2::MVN>(op) || ov::is_type<ov::opset6::MVN>(op) ||
+            ov::is_type<ov::opset1::PRelu>(op) || ov::is_type<ov::opset1::Relu>(op) ||
+            ov::is_type<ov::opset4::Swish>(op) || ov::is_type<ov::opset1::Softmax>(op) ||
+            ov::is_type<ov::opset1::Elu>(op) || ov::is_type<ov::opset5::Round>(op)) {
        copy_shape_infer(node, input_shapes, output_shapes);
    } else if (ov::is_type<ov::op::util::BinaryElementwiseArithmetic>(op) ||
-               ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) ||
-               ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
+            ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) || ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
        eltwise_shape_infer(op, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::opset1::FakeQuantize>(op)) {
        shape_infer(node, input_shapes, output_shapes);
@@ -96,30 +80,15 @@ void shape_inference(ov::Node* op,
        shape_infer(node, input_shapes, output_shapes);
    } else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronDetectionOutput>(op)) {
        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset3::Assign>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset6::Assign>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronPriorGridGenerator>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset1::LSTMCell>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset6::LSTMCell>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset3::ReadValue>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset6::ReadValue>(op)) {
-        shape_infer(node, input_shapes, output_shapes);
-    } else if (auto node = ov::as_type<ov::opset6::Tile>(op)) {
-        shape_infer(node, input_shapes, output_shapes, constant_data);
    } else {
        ngraph::OutputVector new_inputs;
        for (size_t i = 0; i < op->get_input_size(); ++i) {
            if (constant_data.count(i)) {
                new_inputs.push_back(std::make_shared<ov::opset1::Constant>(constant_data.at(i)));
            } else {
-                new_inputs.push_back(std::make_shared<ov::opset1::Parameter>(op->get_input_element_type(i),
-                                                                             input_shapes[i].to_partial_shape()));
+                new_inputs.push_back(
+                        std::make_shared<ov::opset1::Parameter>(
+                                op->get_input_element_type(i), input_shapes[i].to_partial_shape()));
            }
        }
        const auto local_op = op->clone_with_new_inputs(new_inputs);
@@ -127,10 +96,8 @@ void shape_inference(ov::Node* op,

        output_shapes.resize(op->get_output_size());
        for (size_t i = 0; i < output_shapes.size(); ++i) {
-            const auto& partial_shape = local_op->get_output_partial_shape(i);
-            OPENVINO_ASSERT(
-                partial_shape.is_static(),
-                "On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
+            const auto &partial_shape = local_op->get_output_partial_shape(i);
+            OPENVINO_ASSERT(partial_shape.is_static(), "On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
            output_shapes[i] = ov::StaticShape(partial_shape.to_shape());
        }
    }
--- a/inference-engine/tests/functional/inference_engine/transformations/transpose_reshape_elimination_for_matmul.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/transpose_reshape_elimination_for_matmul.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
+#include <transformations/op_conversions/einsum_decomposition.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+
+using namespace testing;
+using namespace ngraph;
+
+
+TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul) {
+    Shape data_shape_1{10, 2};
+    Shape data_shape_2{10, 2, 25};
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
+        auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
+        auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
+        auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before);
+        auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
+        auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
+        auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
+        auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
+        function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
+    }
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
+        function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
+    }
+}
+
+TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedA) {
+    Shape data_shape_1{2, 10};
+    Shape data_shape_2{10, 2, 25};
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
+        auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
+        auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
+        auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, false);
+        auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
+        auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
+        auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
+        auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
+        function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
+        manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
+    }
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
+        function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
+    }
+}
+
+TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedB) {
+    Shape data_shape_1{10, 2};
+    Shape data_shape_2{10, 2, 25};
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
+        auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
+        auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
+        auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, false, true);
+        auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
+        auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
+        auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
+        auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
+        function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
+        manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
+    }
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
+        function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
+    }
+}
+
+TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedAB) {
+    Shape data_shape_1{2, 10};
+    Shape data_shape_2{10, 2, 25};
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
+        auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
+        auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
+        auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, true);
+        auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
+        auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
+        auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
+        auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
+        function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
+        manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
+    }
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
+        function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
+    }
+}
+
+TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_Einsum) {
+    Shape data_shape_1{5, 2};
+    Shape data_shape_2{10, 2, 25};
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        auto einsum = std::make_shared<opset7::Einsum>(OutputVector{data_1, data_2}, "kl,mlj->mkj");
+        function = std::make_shared<Function>(NodeVector{einsum}, ParameterVector{data_1, data_2});
+        manager.register_pass<pass::EinsumDecomposition>();
+        manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
+    }
+    {
+        auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
+        auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
+        // for some cases Reshape may be first input for Matmul
+        auto shape_constant = std::make_shared<opset1::Constant>(element::i64, Shape{data_shape_1.size()}, data_shape_1);
+        auto reshape = std::make_shared<opset1::Reshape>(data_1, shape_constant, false);
+        auto matmul = std::make_shared<opset1::MatMul>(reshape, data_2, false, false);
+        function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
+    }
+}
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -74,6 +74,15 @@ const std::vector<std::vector<size_t >> kernels2D = {
        {7, 1},
        {3, 3},
 };
+
+const std::vector<std::vector<size_t >> kernels2D_big = {
+        {7, 2},
+        {2, 7},
+        {3, 7},
+        {6, 6},
+        {7, 7},
+};
+
 const std::vector<std::vector<size_t >> strides2D = {
        {1, 1},
 };
@@ -100,6 +109,16 @@ const auto conv2DParams_Kernels2D = ::testing::Combine(
        ::testing::ValuesIn(numOutCannels2D),
        ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
+const auto conv2DParams_Kernels2D_big = ::testing::Combine(
+    ::testing::ValuesIn(kernels2D_big),
+    ::testing::ValuesIn(strides2D),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2D),
+    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+
 const auto conv2DParams_ExplicitPadding_Height1 = ::testing::Combine(
        ::testing::ValuesIn(kernelsH1),
        ::testing::ValuesIn(stridesH1),
@@ -218,4 +237,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D, GnaConvolutionLayerTest,
                                 ::testing::Values(input2DNCHW),
                                 ::testing::Values(CommonTestUtils::DEVICE_GNA)),
                         GnaConvolutionLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D_big, GnaConvolutionLayerTest,
+    ::testing::Combine(
+        conv2DParams_Kernels2D_big,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(input2DNCHW),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA)),
+    GnaConvolutionLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp
@@ -22,21 +22,39 @@ const std::vector<std::vector<size_t >> kernels2D = {
        {1, 3},
        {7, 1},
        {3, 3},
+        {7, 2},
+        {2, 7}
 };

 const std::vector<std::vector<size_t >> kernels2DInvalid = {
-        {1, 4},
-        {2, 3},
-        {3, 2},
+        {9, 3},
+        {1, 9},
+        {1, 8},
        {8, 1},
-        {4, 4},
+        {8, 8},
+};
+
+const std::vector<std::vector<size_t >> kernels2DInvalidFor56InC = {
+        {1, 6},
+        {2, 6},
+        {7, 7},
+        {1, 7},
+        {4, 7},
+};
+
+const std::vector<std::vector<size_t >> kernels2DInvalidFor120InC = {
+        {1, 4},
+        {8, 3},
+        {7, 5},
+        {1, 6},
+        {4, 7},
 };

 const std::vector<std::vector<size_t >> strides2D = {
        {1, 1},
 };
 const std::vector<std::vector<size_t >> strides2DInvalid = {
-        {4, 4}, {1, 4}
+        {8, 8}, {1, 8}
 };
 const std::vector<std::vector<ptrdiff_t>> padBegins2D = { {0, 0},
 };
@@ -51,10 +69,13 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
 const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
 };
 const std::vector<size_t> numOutChannels2D = { 32 };
-const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };
+const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 1032 };

 const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };

+const std::vector<std::vector<size_t>> input2DNCHWWithInC56 = { { 1, 56, 20, 16 } };
+const std::vector<std::vector<size_t>> input2DNCHWWithInC120 = { { 1, 120, 20, 16 } };
+
 const std::vector<std::vector<size_t>> input2DNCHWInvalidInputC = {
        { 1, 7, 20, 16 },
        { 1, 9, 20, 16 },
@@ -80,6 +101,27 @@ const auto conv2DParametersInvalidKernel = ::testing::Combine(
        ::testing::ValuesIn(numOutChannels2D),
        ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
+
+const auto conv2DParametersInvalidKernelFor56InC = ::testing::Combine(
+    ::testing::ValuesIn(kernels2DInvalidFor56InC),
+    ::testing::ValuesIn(strides2D),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2D),
+    ::testing::ValuesIn(numOutChannels2D),
+    ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+
+const auto conv2DParametersInvalidKernelFor120InC = ::testing::Combine(
+    ::testing::ValuesIn(kernels2DInvalidFor120InC),
+    ::testing::ValuesIn(strides2D),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2D),
+    ::testing::ValuesIn(numOutChannels2D),
+    ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+
 const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
        ::testing::ValuesIn(kernels2D),
        ::testing::ValuesIn(strides2D),
@@ -165,6 +207,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaCon

 GNA_NEG_INSTANTIATE(FilterNumber, InvalidFilterNumber, Fine, "Unsupported number of kernels")
 GNA_NEG_INSTANTIATE(Kernel, InvalidKernel, Fine, "Unsupported kernel shape")
+GNA_NEG_INSTANTIATE(BigKernelFor56InC, InvalidKernelFor56InC, WithInC56, "Unsupported kernel shape")
+GNA_NEG_INSTANTIATE(BigKernelFor120InC, InvalidKernelFor120InC, WithInC120, "Unsupported kernel shape")
 GNA_NEG_INSTANTIATE(InputH, Fine, InvalidInputH, "Unsupported input height")
 GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width")
 GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels")
@@ -172,4 +216,4 @@ GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Convolution's input padding
 GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape")
 GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA")

-}  // namespace
+}  // namespace
--- a/inference-engine/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp
+++ b/inference-engine/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp
@@ -1,47 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <openvino/op/ops.hpp>
-#include <openvino/op/parameter.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-
-using namespace ov;
-template <class T>
-std::shared_ptr<T> constructGraph();
-
-template <>
-std::shared_ptr<op::v3::Assign> constructGraph() {
-    auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto read_value = std::make_shared<op::v3::ReadValue>(input, "variable_id");
-    return std::make_shared<op::v3::Assign>(read_value, "variable_id");
-}
-
-template <>
-std::shared_ptr<op::v6::Assign> constructGraph() {
-    auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto variable = std::make_shared<ov::op::util::Variable>(
-        ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
-    auto read_value = std::make_shared<op::v6::Assign>(input, variable);
-    return std::make_shared<op::v6::Assign>(read_value, variable);
-}
-
-template <class T>
-void assignTest() {
-    auto assign = constructGraph<T>();
-
-    // Test StaticShape
-    std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
-    shape_inference(assign.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_input_shapes[0], (StaticShape{1, 2, 64, 64}));
-}
-
-TEST(StaticShapeInferenceTest, AssignTest) {
-    // Test v3 Assign
-    assignTest<op::v3::Assign>();
-    // Test v6 Assign
-    assignTest<op::v6::Assign>();
-}
--- a/inference-engine/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp
+++ b/inference-engine/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp
@@ -1,37 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
-#include <openvino/op/ops.hpp>
-#include <openvino/op/parameter.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-
-using namespace ov;
-
-TEST(StaticShapeInferenceTest, PriorGridGenerator) {
-    op::v6::ExperimentalDetectronPriorGridGenerator::Attributes attrs;
-    attrs.flatten = false;
-    attrs.h = 0;
-    attrs.w = 0;
-    attrs.stride_x = 4.0f;
-    attrs.stride_y = 4.0f;
-
-    auto priors = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    auto feature_map = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto im_data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-
-    auto grid_gen =
-        std::make_shared<ov::op::v6::ExperimentalDetectronPriorGridGenerator>(priors, feature_map, im_data, attrs);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{3, 4},
-                                                    StaticShape{1, 256, 200, 336},
-                                                    StaticShape{1, 3, 800, 1344}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(grid_gen.get(), static_input_shapes, static_output_shapes);
-
-    ASSERT_EQ(static_output_shapes[0], StaticShape({200, 336, 3, 4}));
-}
--- a/inference-engine/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp
+++ b/inference-engine/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp
@@ -1,38 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <openvino/op/ops.hpp>
-#include <openvino/op/parameter.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-
-using namespace ov;
-
-TEST(StaticShapeInferenceTest, LstmCellTest) {
-    const size_t batch_size = 2;
-    const size_t input_size = 3;
-    const size_t hidden_size = 3;
-    const size_t gates_count = 4;
-
-    const auto X = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    const auto W = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    const auto R = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    const auto H_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    const auto C_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    const auto Bias = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1});
-    const auto lstm_cell = std::make_shared<op::v4::LSTMCell>(X, H_t, C_t, W, R, Bias, hidden_size);
-
-    std::vector<StaticShape> static_input_shapes = {StaticShape{batch_size, input_size},
-                                                    StaticShape{batch_size, hidden_size},
-                                                    StaticShape{batch_size, hidden_size},
-                                                    StaticShape{gates_count * hidden_size, input_size},
-                                                    StaticShape{gates_count * hidden_size, hidden_size},
-                                                    StaticShape{gates_count * hidden_size}},
-                             static_output_shapes = {StaticShape{}, StaticShape{}};
-    shape_inference(lstm_cell.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size}));
-    ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, hidden_size}));
-}
--- a/inference-engine/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp
+++ b/inference-engine/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp
@@ -1,45 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#include <gtest/gtest.h>
-
-#include <openvino/op/ops.hpp>
-#include <openvino/op/parameter.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-
-using namespace ov;
-
-template <class T>
-std::shared_ptr<T> constructGraph();
-
-template <>
-std::shared_ptr<op::v3::ReadValue> constructGraph() {
-    auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    return std::make_shared<op::v3::ReadValue>(input, "variable_id");
-}
-
-template <>
-std::shared_ptr<op::v6::ReadValue> constructGraph() {
-    auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
-    auto variable = std::make_shared<ov::op::util::Variable>(
-        ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
-    return std::make_shared<op::v6::ReadValue>(input, variable);
-}
-
-template <class T>
-void readValueTest() {
-    auto readValue = constructGraph<T>();
-
-    // Test StaticShape
-    std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
-    shape_inference(readValue.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_output_shapes[0], (StaticShape{1, 2, 64, 64}));
-}
-
-TEST(StaticShapeInferenceTest, ReadValueTest) {
-    // Test v3 ReadValue
-    readValueTest<op::v3::ReadValue>();
-    // Test v6 ReadValue
-    readValueTest<op::v6::ReadValue>();
-}
--- a/inference-engine/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp
+++ b/inference-engine/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp
@@ -1,50 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <openvino/op/ops.hpp>
-#include <openvino/op/parameter.hpp>
-#include <utils/shape_inference/shape_inference.hpp>
-#include <utils/shape_inference/static_shape.hpp>
-
-using namespace ov;
-
-TEST(StaticShapeInferenceTest, TileTest) {
-    auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
-    auto param1 = std::make_shared<ov::op::v0::Constant>(element::i64, ov::Shape{3}, std::vector<int>{3, 4, 1});
-    auto tile = std::make_shared<op::v0::Tile>(param0, param1);
-    // Test Static Shape
-    std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{3}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(tile.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_output_shapes[0], StaticShape({18, 32, 10}));
-    // Test Wrong Static Shape
-    std::vector<StaticShape> wrong_static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{}},
-                             wrong_static_output_shapes = {StaticShape{}};
-
-    ASSERT_THROW(shape_inference(tile.get(), wrong_static_input_shapes, wrong_static_output_shapes), ov::AssertFailure);
-}
-
-TEST(StaticShapeInferenceTest, TileFewRepeatsTest) {
-    auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
-    auto param1 = ov::op::v0::Constant::create(element::i64, Shape{2}, {4, 1});
-    auto tile = std::make_shared<op::v0::Tile>(param0, param1);
-    // Test Static Shape
-    std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{2}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(tile.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_output_shapes[0], StaticShape({6, 32, 10}));
-}
-
-TEST(StaticShapeInferenceTest, TileSmallDataRankTest) {
-    auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
-    auto param1 = ov::op::v0::Constant::create(element::i64, Shape{3}, {3, 4, 1});
-    auto tile = std::make_shared<op::v0::Tile>(param0, param1);
-    // Test Static Shape
-    std::vector<StaticShape> static_input_shapes = {StaticShape{8, 10}, StaticShape{3}},
-                             static_output_shapes = {StaticShape{}};
-    shape_inference(tile.get(), static_input_shapes, static_output_shapes);
-    ASSERT_EQ(static_output_shapes[0], StaticShape({3, 32, 10}));
-}
--- a/src/common/legacy/CMakeLists.txt
+++ b/src/common/legacy/CMakeLists.txt
@@ -84,8 +84,8 @@ openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NA

 if(BUILD_SHARED_LIBS)
    install(TARGETS ${TARGET_NAME}
-            RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
-            LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+            RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core OPTIONAL
+            LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core OPTIONAL
            NAMELINK_COMPONENT core_dev)
 else()
    ov_install_static_lib(${TARGET_NAME} core)
--- a/src/common/snippets/CMakeLists.txt
+++ b/src/common/snippets/CMakeLists.txt
@@ -48,10 +48,6 @@ set_target_properties(${TARGET_NAME} PROPERTIES SOVERSION 2022.1.1)

 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

-# developer package
-
-openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NAME})
-
 # install

 # TODO: uncomment once snippets are integrated into CPU plugin
--- a/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp
+++ b/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include "transformations_visibility.hpp"
+
+#include "ngraph/pass/graph_rewrite.hpp"
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API TransposeReshapeEliminationForMatmul;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief TransposeReshapeEliminationForMatmul transformation eliminates Transpose and Reshape which were created to
+ * align input and output dimension ranks before second MatMul input and after MatMul output
+ * (for example, after Einsum Decomposition inside TensorFlow 1 and nGraph EinsumDecomposition transformation)
+ */
+class ngraph::pass::TransposeReshapeEliminationForMatmul: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    TransposeReshapeEliminationForMatmul();
+};
--- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -51,6 +51,7 @@
 #include "transformations/common_optimizations/mul_conv_fusion.hpp"
 #include "transformations/common_optimizations/interpolate_sequence_fusion.hpp"
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
+#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
 #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
 #include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
 #include "transformations/op_conversions/convert_divide.hpp"
@@ -149,6 +150,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
    decomp->add_matcher<ngraph::pass::SoftmaxDecomposition, false>();
    decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
    decomp->add_matcher<ngraph::pass::DropoutWithRandomUniformReplacer>();
+    decomp->add_matcher<ngraph::pass::TransposeReshapeEliminationForMatmul>();
    decomp->set_name("ngraph::pass::CommonDecompositions");

    // CF is required after all decompositions
--- a/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp
@@ -0,0 +1,175 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp"
+
+#include <memory>
+#include <vector>
+
+#include "ngraph/opsets/opset1.hpp"
+#include "ngraph/rt_info.hpp"
+#include "ngraph/pattern/op/wrap_type.hpp"
+#include "ngraph/validation_util.hpp"
+#include "itt.hpp"
+
+namespace {
+/// \brief      Check for correct Transpose orders which are before and after MatMul. Second Transpose must be back for
+///  first Transpose before MatMul
+///
+/// \param      before_order       Order of Transpose which is before MatMul
+/// \param      after_order        Order of Transpose which is after MatMul
+/// \param      transposed_b       true - second MatMul input is transposed, otherwise, it's not transposed
+///
+/// \return     True - Transposes have right orders, otherwise, Transposes have incorrect order for transformation
+///
+bool check_transposes(const std::vector<int64_t>& before_order, const std::vector<int64_t>& after_order, const bool transposed_b) {
+    const size_t rank = before_order.size();
+    if (rank < 3)
+        return false;
+
+    if (before_order.size() != after_order.size())
+        return false;
+
+    if (transposed_b) {
+        // before order must be : 0, 1, 2, ..., N-1, N-2
+        std::vector<int64_t> start_order(rank);
+        std::iota(start_order.begin(), start_order.begin() + rank - 2, 0);
+        start_order[rank - 1] = rank - 2;
+        start_order[rank - 2] = rank - 1;
+
+        if (before_order != start_order)
+            return false;
+
+        // after order must be : 1, ..., N-2, 0, N-1
+        std::vector<int64_t> back_order(rank);
+        std::iota(back_order.begin(), back_order.begin() + rank - 2, 1);
+        back_order[rank - 2] = 0;
+        back_order[rank - 1] = rank - 1;
+
+        if (after_order != back_order)
+            return false;
+    } else {
+        // before order must be : N-2, N-1, 0, 1, 2, ...
+        std::vector<int64_t> needed_transpose_order_before(rank);
+        std::iota(needed_transpose_order_before.begin() + 2, needed_transpose_order_before.end(), 0);
+        needed_transpose_order_before[0] = rank - 2;
+        needed_transpose_order_before[1] = rank - 1;
+
+        if (before_order != needed_transpose_order_before)
+            return false;
+
+        // transpose order after matmul must be back for transpose before
+        std::vector<int64_t> back_order(rank);
+        for (size_t i = 0; i < rank; i++)
+            back_order[i] = std::distance(after_order.begin(), std::find(after_order.begin(), after_order.end(), i));
+
+        if (before_order != back_order)
+            return false;
+    }
+
+    return true;
+}
+
+/// \brief      Check for input Reshape which are before MatMul
+///
+/// \param      reshape            Reshape which is before MatMul
+/// \param      new_shape          New shape for Reshape
+/// \param      transposed_b       true - second MatMul input is transposed, otherwise, it's not transposed
+///
+/// \return     True - Reshape has right new shape for reshaping, otherwise, Reshape has incorrect new shape for transformation
+///
+bool check_input_reshape(const std::shared_ptr<ngraph::opset1::Reshape>& reshape,
+                         const std::vector<int64_t>& new_shape, const bool transposed_b) {
+    const auto input_shape = reshape->get_input_shape(0);
+    const size_t input_rank = input_shape.size();
+    const size_t output_rank = reshape->get_output_shape(0).size();
+    if (input_rank < 3 || output_rank != 2)
+        return false;
+
+    if (transposed_b) {
+        const int64_t k = input_shape.back();
+        const int64_t new_n  = ov::shape_size(input_shape) / k;
+        if (new_shape != std::vector<int64_t>{new_n, k})
+            return false;
+    } else {
+        const int64_t k = input_shape.front();
+        const int64_t new_n  = ov::shape_size(input_shape) / k;
+        if (new_shape != std::vector<int64_t>{k, -1} && new_shape != std::vector<int64_t>{k, new_n})
+            return false;
+    }
+
+    return true;
+}
+}  // namespace
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReshapeEliminationForMatmul, "TransposeReshapeEliminationForMatmul", 0);
+
+ngraph::pass::TransposeReshapeEliminationForMatmul::TransposeReshapeEliminationForMatmul() {
+    MATCHER_SCOPE(TransposeReshapeEliminationForMatmul);
+    auto input_1_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
+                                                          const auto& shape = node.get_partial_shape();
+                                                          const auto& rank = shape.rank();
+                                                          return rank.is_static() && rank.get_length() == 2 && shape.is_static();
+                                                          });
+    auto input_2_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
+                                                          return node.get_partial_shape().is_static();
+                                                          });
+
+    auto const_transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
+    auto transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({input_2_pattern, const_transpose_before_pattern});
+
+    auto const_reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
+    auto reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({transpose_before_pattern, const_reshape_before_pattern});
+
+    auto matmul_pattern = ngraph::pattern::wrap_type<opset1::MatMul>({input_1_pattern, reshape_before_pattern});
+
+    auto const_reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
+    auto reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({matmul_pattern, const_reshape_after_pattern});
+
+    auto const_transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
+    auto transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({reshape_after_pattern, const_transpose_after_pattern});
+
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto& pattern_value_map = m.get_pattern_value_map();
+        const auto& input_1 = pattern_value_map.at(input_1_pattern);
+        const auto& input_2 = pattern_value_map.at(input_2_pattern);
+
+        auto matmul = std::dynamic_pointer_cast<opset1::MatMul>(pattern_value_map.at(matmul_pattern).get_node_shared_ptr());
+        if (!matmul)
+            return false;
+        const bool transposed_a = matmul->get_transpose_a();
+        const bool transposed_b = matmul->get_transpose_b();
+
+        auto reshape_before = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_before_pattern).get_node_shared_ptr());
+        auto reshape_after = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_after_pattern).get_node_shared_ptr());
+        auto reshape_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(
+                pattern_value_map.at(const_reshape_before_pattern).get_node_shared_ptr());
+        if (!reshape_before || !reshape_after || !reshape_before_constant)
+            return false;
+        if (!check_input_reshape(reshape_before, reshape_before_constant->cast_vector<int64_t>(), transposed_b))
+            return false;
+
+        // check transpose order before and after matmul
+        auto transpose_before = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_before_pattern).get_node_shared_ptr());
+        auto transpose_after = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_after_pattern).get_node_shared_ptr());
+        auto transpose_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_before->get_input_node_shared_ptr(1));
+        auto transpose_after_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_after->get_input_node_shared_ptr(1));
+        if (!transpose_before || !transpose_after || !transpose_before_constant || !transpose_after_constant)
+            return false;
+        auto transpose_before_order = transpose_before_constant->cast_vector<int64_t>();
+        auto transpose_after_order = transpose_after_constant->cast_vector<int64_t>();
+        // need to check that input shape is correctly contracted and output shape is correctly unpacked using transposes
+        if (!check_transposes(transpose_before_order, transpose_after_order, transposed_b))
+            return false;
+
+        const auto new_matmul = std::make_shared<opset1::MatMul>(input_1, input_2, transposed_a, false);
+        new_matmul->set_friendly_name(transpose_after->get_friendly_name());
+        copy_runtime_info({transpose_before, reshape_before, matmul, reshape_after, transpose_after}, new_matmul);
+        replace_node(transpose_after, new_matmul);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_after_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
--- a/src/core/include/openvino/op/assign.hpp
+++ b/src/core/include/openvino/op/assign.hpp
@@ -34,8 +34,6 @@ public:

 private:
    std::string m_variable_id;
-    template <class T>
-    friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
 };
 }  // namespace v3

@@ -72,10 +70,6 @@ public:
    OPENVINO_SUPPRESS_DEPRECATED_END
    bool has_evaluate() const override;
    bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override;
-
-private:
-    template <class T>
-    friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
 };
 }  // namespace v6
 }  // namespace op
--- a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp
+++ b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp
@@ -60,10 +60,8 @@ public:

 private:
    Attributes m_attrs;
-    template <class T>
-    friend void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
-                            const std::vector<T>& input_shapes,
-                            std::vector<T>& output_shapes);
+
+    void validate();
 };
 }  // namespace v6
 }  // namespace op
--- a/src/core/include/openvino/op/lstm_cell.hpp
+++ b/src/core/include/openvino/op/lstm_cell.hpp
@@ -241,8 +241,6 @@ private:

    static constexpr std::size_t s_gates_count{4};
    static constexpr std::size_t s_peepholes_count{3};
-    template <class T>
-    friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
 };
 }  // namespace v0

@@ -380,8 +378,6 @@ private:
    util::ActivationFunction m_activation_h;

    static constexpr std::size_t s_gates_count{4};
-    template <class T>
-    friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
 };
 }  // namespace v4
 }  // namespace op
--- a/src/core/shape_inference/include/assign_shape_inference.hpp
+++ b/src/core/shape_inference/include/assign_shape_inference.hpp
@@ -1,41 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include <openvino/core/graph_util.hpp>
-#include <openvino/op/assign.hpp>
-
-#include "utils.hpp"
-namespace ov {
-namespace op {
-namespace v3 {
-
-template <class T>
-void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 1 && output_shapes.size() == 1);
-    const auto& input_shape = input_shapes[0];
-    const auto& variable_info = op->m_variable->get_info();
-    NODE_VALIDATION_CHECK(op,
-                          op->m_variable_id == variable_info.variable_id,
-                          "Variables identifiers are inconsistent.");
-    const auto& arg_t = op->get_input_element_type(0);
-    NODE_VALIDATION_CHECK(op, arg_t == variable_info.data_type, "Variables types are inconsistent.");
-
-    if (input_shape.is_static() && variable_info.data_shape.is_static()) {
-        NODE_VALIDATION_CHECK(op,
-                              input_shape.to_shape() == variable_info.data_shape.to_shape(),
-                              "Variables output shapes are inconsistent.");
-    }
-    copy_shape_infer(op, input_shapes, output_shapes);
-}
-}  // namespace v3
-
-namespace v6 {
-
-template <class T>
-void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    copy_shape_infer(op, input_shapes, output_shapes);
-}
-}  // namespace v6
-}  // namespace op
-}  // namespace ov
--- a/src/core/shape_inference/include/experimental_detectron_prior_grid_generator_shape_inference.hpp
+++ b/src/core/shape_inference/include/experimental_detectron_prior_grid_generator_shape_inference.hpp
@@ -1,76 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
-
-namespace ov {
-namespace op {
-namespace v6 {
-
-template <class T>
-void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 3 && output_shapes.size() == 1);
-    const auto& priors_shape = input_shapes[0];
-    const auto& featmap_shape = input_shapes[1];
-    const auto& im_data_shape = input_shapes[2];
-
-    auto& output_shape = output_shapes[0];
-    size_t output_size = op->m_attrs.flatten ? 2 : 4;
-
-    output_shape.resize(output_size);
-    output_shape[output_size - 1] = 4;
-
-    bool prior_rank_static = priors_shape.rank().is_static();
-    bool featmap_rank_static = featmap_shape.rank().is_static();
-    bool im_data_rank_static = im_data_shape.rank().is_static();
-
-    if (prior_rank_static) {
-        NODE_VALIDATION_CHECK(op, priors_shape.size() == 2, "Priors rank must be equal to 2.");
-        NODE_VALIDATION_CHECK(op,
-                              priors_shape[1].compatible(4),
-                              "The last dimension of the 'priors' input must be equal to 4. Got: ",
-                              priors_shape[1]);
-    }
-
-    if (featmap_rank_static) {
-        NODE_VALIDATION_CHECK(op, featmap_shape.size() == 4, "Feature_map rank must be equal to 4.");
-    }
-
-    if (im_data_rank_static) {
-        NODE_VALIDATION_CHECK(op, im_data_shape.size() == 4, "Im_data rank must be equal to 4.");
-    }
-
-    if (featmap_rank_static && im_data_rank_static) {
-        const auto& num_batches_featmap = featmap_shape[0];
-        const auto& num_batches_im_data = im_data_shape[0];
-
-        NODE_VALIDATION_CHECK(op,
-                              num_batches_featmap.compatible(num_batches_im_data),
-                              "The first dimension of both 'feature_map' and 'im_data' must match. "
-                              "Feature_map: ",
-                              num_batches_featmap,
-                              "; Im_data: ",
-                              num_batches_im_data);
-    }
-
-    if (op->m_attrs.flatten) {
-        if (prior_rank_static && featmap_rank_static) {
-            output_shape[0] = featmap_shape[2] * featmap_shape[3] * priors_shape[0];
-        }
-    } else {
-        if (featmap_rank_static) {
-            output_shape[0] = featmap_shape[2];
-            output_shape[1] = featmap_shape[3];
-        }
-        if (prior_rank_static) {
-            output_shape[2] = priors_shape[0];
-        }
-    }
-}
-
-}  // namespace v6
-}  // namespace op
-}  // namespace ov
--- a/src/core/shape_inference/include/lstm_cell_shape_inference.hpp
+++ b/src/core/shape_inference/include/lstm_cell_shape_inference.hpp
@@ -1,191 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include <openvino/op/lstm_cell.hpp>
-#include "utils.hpp"
-
-namespace ov {
-namespace op {
-namespace ShapeInferLSTM {
-template <class OpsType, class ShapeType>
-void lstm_shape_infer(const OpsType* op,
-                      const std::vector<ShapeType>& input_shapes,
-                      std::vector<ShapeType>& output_shapes,
-                      std::size_t gates_count) {
-    using DimType = typename std::iterator_traits<typename ShapeType::iterator>::value_type;
-    enum { X, initial_hidden_state, initial_cell_state, W, R, B };
-    std::vector<bool> input_rank_static(6, false);
-    bool all_rank_dynamic = false;
-    bool all_rank_static = true;
-    // Prepare OutShape
-    auto& hidden_shape = output_shapes[0];
-    auto& cell_shape = output_shapes[1];
-    hidden_shape.resize(2);
-    cell_shape.resize(2);
-
-    // If rank is dynamic, then output_shape is undefined
-    for (size_t i = 0; i < input_shapes.size(); i++) {
-        input_rank_static[i] = input_shapes[i].rank().is_static();
-        all_rank_dynamic &= !input_rank_static[i];
-        all_rank_static &= input_rank_static[i];
-    }
-
-    if (all_rank_dynamic) {
-        return;
-    }
-    const auto& x_pshape = input_shapes[0];
-    const auto& w_pshape = input_shapes[3];
-
-    DimType output_batch_size;
-    DimType output_hidden_size;
-    bool is_batch_init = false;
-    bool is_hidden_init = false;
-
-    // deduce batch/hidden_size
-    for (size_t i = 0; i < input_shapes.size(); i++) {
-        const auto& input = input_shapes[i];
-        if (input_rank_static[i]) {
-            // batch could be deduced from x, cell_state or hidden_state
-            if (i == X || i == initial_cell_state || i == initial_hidden_state) {
-                NODE_VALIDATION_CHECK(op,
-                                      (input.size() == 2),
-                                      "LSTMCell input rank is not correct for ",
-                                      i,
-                                      " input parameter. Current rank: ",
-                                      input.size(),
-                                      ", expected: 2.");
-                if (!is_batch_init) {
-                    output_batch_size = input[0];
-                    is_batch_init = true;
-                } else {
-                    NODE_VALIDATION_CHECK(
-                        op,
-                        DimType::merge(output_batch_size, output_batch_size, input[0]),
-                        "Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
-                        "inputs.");
-                }
-                if (i == initial_cell_state || i == initial_hidden_state) {
-                    if (!is_hidden_init) {
-                        output_hidden_size = input[1];
-                        is_hidden_init = true;
-                    } else {
-                        NODE_VALIDATION_CHECK(op,
-                                              DimType::merge(output_hidden_size, output_hidden_size, input[1]),
-                                              "Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
-                                              "initial_cell_state "
-                                              "inputs.");
-                    }
-                }
-            } else if (i == W || i == R || i == B) {
-                // check input dimension
-                if (i == B) {
-                    NODE_VALIDATION_CHECK(op,
-                                          (input.size() == 1),
-                                          "LSTMCell input tensor dimension is not correct for ",
-                                          i,
-                                          " input parameter. Current input length: ",
-                                          input.size(),
-                                          ", expected: 1.");
-                    if (input[0].is_static()) {
-                        if (!is_hidden_init) {
-                            output_hidden_size = input[0].get_length() / gates_count;
-                            is_hidden_init = true;
-                        } else {
-                            NODE_VALIDATION_CHECK(
-                                op,
-                                DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
-                                "Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
-                                "initial_cell_state "
-                                "inputs.");
-                        }
-                    }
-                } else {
-                    NODE_VALIDATION_CHECK(op,
-                                          (input.size() == 2),
-                                          "LSTMCell input rank is not correct for ",
-                                          i,
-                                          " input parameter. Current rank: ",
-                                          input.size(),
-                                          ", expected: 2.");
-                    if (input[0].is_static()) {
-                        if (!is_hidden_init) {
-                            output_hidden_size = input[0].get_length() / gates_count;
-                            is_hidden_init = true;
-                        } else {
-                            NODE_VALIDATION_CHECK(
-                                op,
-                                DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
-                                "Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
-                                "initial_cell_state "
-                                "inputs.");
-                        }
-                    }
-                    if (i == R) {
-                        if (!is_hidden_init) {
-                            output_hidden_size = input[1];
-                            is_hidden_init = true;
-                        } else {
-                            NODE_VALIDATION_CHECK(op,
-                                                  DimType::merge(output_hidden_size, output_hidden_size, input[1]),
-                                                  "Parameter hidden_size not matched for W, R, B, initial_hidden_state "
-                                                  "and initial_cell_state "
-                                                  "inputs.");
-                        }
-                    }
-                }
-            }
-        }
-    }
-    // Check peepholes
-    if (input_shapes.size() == 7) {
-        const auto& p_pshape = input_shapes[6];
-        NODE_VALIDATION_CHECK(op,
-                              (p_pshape.rank().compatible(1)),
-                              "LSTMCell input tensor P shall have dimension 1D.");
-    }
-
-    // check input size
-    if (input_rank_static[X] && input_rank_static[W]) {
-        NODE_VALIDATION_CHECK(op, (x_pshape[1].compatible(w_pshape[1])), "LSTMCell mismatched input_size dimension.");
-    }
-
-    hidden_shape[0] = output_batch_size;
-    hidden_shape[1] = output_hidden_size;
-    cell_shape[0] = output_batch_size;
-    cell_shape[1] = output_hidden_size;
-}
-
-}  // namespace ShapeInferLSTM
-
-namespace v0 {
-using ShapeInferLSTM::lstm_shape_infer;
-template <class T>
-void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 7 && output_shapes.size() == 2);
-    const auto& p_pshape = input_shapes[6];
-
-    lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
-    const auto& hidden_size = output_shapes[0][1];
-    if (p_pshape[0].is_static() && hidden_size.is_static()) {
-        NODE_VALIDATION_CHECK(op,
-                              p_pshape[0].compatible(hidden_size * op->s_peepholes_count),
-                              "Parameter hidden_size mistmatched in P input. Current value is: ",
-                              p_pshape[0].get_length(),
-                              ", expected: ",
-                              hidden_size.get_length() * op->s_peepholes_count,
-                              ".");
-    }
-}
-}  // namespace v0
-
-namespace v4 {
-using ShapeInferLSTM::lstm_shape_infer;
-template <class T>
-void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 6 && output_shapes.size() == 2);
-    lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
-}
-}  // namespace v4
-}  // namespace op
-}  // namespace ov
--- a/src/core/shape_inference/include/read_value_shape_inference.hpp
+++ b/src/core/shape_inference/include/read_value_shape_inference.hpp
@@ -1,29 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include <openvino/op/read_value.hpp>
-#include "utils.hpp"
-namespace ov {
-namespace op {
-
-template <class OpType, class ShapeType>
-void read_value_shape_infer(const OpType* op, const std::vector<ShapeType>& input_shapes, std::vector<ShapeType>& output_shapes) {
-    copy_shape_infer(op, input_shapes, output_shapes);
-}
-
-namespace v3 {
-template <class T>
-void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    read_value_shape_infer(op, input_shapes, output_shapes);
-}
-}  // namespace v3
-
-namespace v6 {
-template <class T>
-void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
-    read_value_shape_infer(op, input_shapes, output_shapes);
-}
-}  // namespace v6
-}  // namespace op
-}  // namespace ov
--- a/src/core/shape_inference/include/tile_shape_inference.hpp
+++ b/src/core/shape_inference/include/tile_shape_inference.hpp
@@ -1,52 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include <openvino/op/tile.hpp>
-
-#include "utils.hpp"
-namespace ov {
-namespace op {
-namespace v0 {
-
-template <class T>
-void shape_infer(const Tile* op,
-                 const std::vector<T>& input_shapes,
-                 std::vector<T>& output_shapes,
-                 const std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>>& constant_data = {}) {
-    NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
-    const auto& arg_shape = input_shapes[0];
-    auto& repeats_shape = input_shapes[1];
-    auto& output_shape = output_shapes[0];
-    using DimType = typename std::iterator_traits<typename T::iterator>::value_type;
-    std::vector<int64_t> axes_val;
-    NODE_VALIDATION_CHECK(op, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
-
-    //Get repeats
-    bool axes_are_known = get_data_as_int64<T>(1, op, axes_val, constant_data);
-    const auto arg_rank = arg_shape.rank();
-    if (arg_rank.is_static() && (axes_are_known || repeats_shape[0].is_static())) {
-        //try to specify rank
-        int64_t data_rank = arg_shape.size();
-        int64_t repeats_rank = axes_are_known ? axes_val.size() : repeats_shape[0].get_length();
-        auto output_rank = std::max(data_rank, repeats_rank);
-        output_shape.resize(output_rank);
-        //if have constant axes, compute new axes
-        if (axes_are_known) {
-            auto remain_arg = output_rank - data_rank;
-            auto remain_axes = output_rank - repeats_rank;
-            for (size_t i = 0; i < output_rank; i++) {
-                auto data_tmp = i < remain_arg ? DimType(1) : arg_shape[i - (remain_arg)];
-                auto repeat_tmp =
-                    i < remain_axes ? DimType(1) : axes_val[i - remain_axes];
-                output_shape[i] = data_tmp * repeat_tmp;
-            }
-        }
-    } else {
-        //can't deduce shape, set default value
-        output_shape = PartialShape::dynamic();
-    }
-}
-}  // namespace v0
-}  // namespace op
-}  // namespace ov
--- a/src/core/src/op/assign.cpp
+++ b/src/core/src/op/assign.cpp
@@ -4,8 +4,6 @@

 #include "ngraph/op/assign.hpp"

-#include <assign_shape_inference.hpp>
-
 #include "itt.hpp"
 #include "ngraph/op/read_value.hpp"
 #include "ngraph/op/util/variable.hpp"
@@ -28,7 +26,7 @@ void op::v3::Assign::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v3_Assign_validate_and_infer_types);
    auto value = input_value(0);
    auto arg_t = get_input_element_type(0);
-    const auto& input_shape = get_input_partial_shape(0);
+    auto output_shape = get_input_partial_shape(0);
    if (!m_variable) {
        NodeVector start_nodes;
        for (const auto& input : inputs()) {
@@ -43,10 +41,20 @@ void op::v3::Assign::validate_and_infer_types() {
        }
        NODE_VALIDATION_CHECK(this, m_variable != nullptr, "Can't find variable with id = ", m_variable_id);
    }
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {input_shape};
-    shape_infer(this, input_shapes, output_shapes);
-    set_output_type(0, arg_t, output_shapes[0]);
+
+    auto variable_info = m_variable->get_info();
+    NODE_VALIDATION_CHECK(this, m_variable_id == variable_info.variable_id, "Variables identifiers are inconsistent.");
+    NODE_VALIDATION_CHECK(this, arg_t == variable_info.data_type, "Variables types are inconsistent.");
+
+    if (output_shape.is_static() && variable_info.data_shape.is_static()) {
+        NODE_VALIDATION_CHECK(this,
+                              output_shape == variable_info.data_shape,
+                              "Variables output shapes are inconsistent.");
+
+        set_output_type(0, arg_t, output_shape);
+    } else {
+        set_output_type(0, arg_t, ov::PartialShape::dynamic());
+    }
 }

 shared_ptr<Node> op::v3::Assign::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -70,10 +78,7 @@ op::v6::Assign::Assign(const Output<Node>& new_value, const std::shared_ptr<Vari
 void op::v6::Assign::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v6_Assign_validate_and_infer_types);
    m_variable->update({get_input_partial_shape(0), get_input_element_type(0), m_variable->get_info().variable_id});
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0)};
-    shape_infer(this, input_shapes, output_shapes);
-    set_output_type(0, get_input_element_type(0), output_shapes[0]);
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
 }

 shared_ptr<Node> op::v6::Assign::clone_with_new_inputs(const OutputVector& new_args) const {
--- a/src/core/src/op/experimental_detectron_prior_grid_generator.cpp
+++ b/src/core/src/op/experimental_detectron_prior_grid_generator.cpp
@@ -4,7 +4,6 @@

 #include "ngraph/op/experimental_detectron_prior_grid_generator.hpp"

-#include <experimental_detectron_prior_grid_generator_shape_inference.hpp>
 #include <memory>

 #include "itt.hpp"
@@ -50,15 +49,71 @@ static constexpr size_t priors_port = 0;
 static constexpr size_t featmap_port = 1;
 static constexpr size_t im_data_port = 2;

+void op::v6::ExperimentalDetectronPriorGridGenerator::validate() {
+    auto priors_shape = get_input_partial_shape(priors_port);
+    auto featmap_shape = get_input_partial_shape(featmap_port);
+    auto im_data_shape = get_input_partial_shape(im_data_port);
+
+    if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
+        return;
+    }
+
+    NODE_VALIDATION_CHECK(this, priors_shape.rank().get_length() == 2, "Priors rank must be equal to 2.");
+
+    if (priors_shape[1].is_static()) {
+        NODE_VALIDATION_CHECK(this,
+                              priors_shape[1].is_static() && priors_shape[1].get_length() == 4u,
+                              "The last dimension of the 'priors' input must be equal to 4. Got: ",
+                              priors_shape[1]);
+    }
+
+    NODE_VALIDATION_CHECK(this, featmap_shape.rank().get_length() == 4, "Feature_map rank must be equal to 4.");
+
+    if (im_data_shape.rank().is_dynamic()) {
+        return;
+    }
+
+    NODE_VALIDATION_CHECK(this, im_data_shape.rank().get_length() == 4, "Im_data rank must be equal to 4.");
+
+    const auto num_batches_featmap = featmap_shape[0];
+    const auto num_batches_im_data = im_data_shape[0];
+    const auto batches_intersection = num_batches_featmap & num_batches_im_data;
+    NODE_VALIDATION_CHECK(this,
+                          !batches_intersection.get_interval().empty(),
+                          "The first dimension of both 'feature_map' and 'im_data' must match. "
+                          "Feature_map: ",
+                          num_batches_featmap,
+                          "; Im_data: ",
+                          num_batches_im_data);
+}
+
 void op::v6::ExperimentalDetectronPriorGridGenerator::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v6_ExperimentalDetectronPriorGridGenerator_validate_and_infer_types);
-    const auto& priors_shape = get_input_partial_shape(priors_port);
-    const auto& featmap_shape = get_input_partial_shape(featmap_port);
-    const auto& input_et = get_input_element_type(0);
+    auto priors_shape = get_input_partial_shape(priors_port);
+    auto featmap_shape = get_input_partial_shape(featmap_port);
+    auto input_et = get_input_element_type(0);
+
+    validate();

    set_output_size(1);
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {priors_shape, featmap_shape, get_input_partial_shape(im_data_port)};
-    shape_infer(this, input_shapes, output_shapes);
-    set_output_type(0, input_et, output_shapes[0]);
+    ov::PartialShape out_shape = {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 4};
+    if (m_attrs.flatten) {
+        out_shape = ov::PartialShape{Dimension::dynamic(), 4};
+    }
+
+    if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
+        set_output_type(0, input_et, out_shape);
+        return;
+    }
+
+    auto num_priors = priors_shape[0];
+    auto featmap_height = featmap_shape[2];
+    auto featmap_width = featmap_shape[3];
+
+    if (m_attrs.flatten) {
+        out_shape = ov::PartialShape{featmap_height * featmap_width * num_priors, 4};
+    } else {
+        out_shape = ov::PartialShape{featmap_height, featmap_width, num_priors, 4};
+    }
+    set_output_type(0, input_et, out_shape);
 }
--- a/src/core/src/op/lstm_cell.cpp
+++ b/src/core/src/op/lstm_cell.cpp
@@ -6,7 +6,6 @@

 #include <cmath>
 #include <functional>
-#include <lstm_cell_shape_inference.hpp>

 #include "itt.hpp"
 #include "ngraph/attribute_visitor.hpp"
@@ -140,7 +139,30 @@ void op::v0::LSTMCell::validate_and_infer_types() {
        set_argument(6, get_default_peepholes_input());
    }

+    for (const auto& input : inputs()) {
+        if (input.get_partial_shape().rank().is_dynamic()) {
+            set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
+            set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
+            return;
+        }
+    }
+
+    std::vector<ov::PartialShape> input_param{};
+
+    auto merged_batch_size = Dimension::dynamic();
+    auto merged_hidden_size = Dimension::dynamic();
    auto result_et = element::dynamic;
+
+    // Copy all inputs without peephole (7th input) and initial_cell_state (2nd input)
+    // information
+    // for further validation
+    for (size_t i = 0; i < get_input_size() - 1; i++) {
+        // exclude initial_cell_state input
+        if (i != 2) {
+            input_param.push_back(get_input_partial_shape(i));
+        }
+    }
+
    // Get input partial shape for all inputs
    const auto& x_pshape = get_input_partial_shape(0);
    const auto& ht_pshape = get_input_partial_shape(1);
@@ -150,6 +172,24 @@ void op::v0::LSTMCell::validate_and_infer_types() {
    const auto& b_pshape = get_input_partial_shape(5);
    const auto& p_pshape = get_input_partial_shape(6);

+    validate_input_rank_dimension(input_param);
+
+    // Validate rank and dimension for initial_cell_state input
+    NODE_VALIDATION_CHECK(this,
+                          (ct_pshape.rank().is_static()),
+                          "LSTMCell input tensor initial_cell_state shall have static rank.");
+
+    NODE_VALIDATION_CHECK(this,
+                          (ct_pshape.rank().get_length() == 2),
+                          "LSTMCell input tensor initial_cell_state shall have dimension 2D.");
+
+    // Validate rank and dimension for P input
+    NODE_VALIDATION_CHECK(this, (p_pshape.rank().is_static()), "LSTMCell input tensor P shall have static rank.");
+
+    NODE_VALIDATION_CHECK(this,
+                          (p_pshape.rank().get_length() == 1),
+                          "LSTMCell input tensor P shall have dimension 1D.");
+
    // Validate input element types and save result for output type
    NODE_VALIDATION_CHECK(this,
                          element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
@@ -161,10 +201,65 @@ void op::v0::LSTMCell::validate_and_infer_types() {
                          "Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
                          "match.");

-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes =
-        {x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape, p_pshape};
-    shape_infer(this, input_shapes, output_shapes);
+    // Merge batch_size dimension across all inputs to evaluate output[0] dimension
+    NODE_VALIDATION_CHECK(this,
+                          Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
+                              Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
+                              Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
+                          "Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
+                          "inputs.");
+
+    // Merge hidden_size dimension across all inputs to evaluate output[1] dimension
+    NODE_VALIDATION_CHECK(this,
+                          Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
+                              Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
+                              Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
+                          "Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
+                          "inputs.");
+
+    // Validate hidden_size value for W, R and P inputs
+    if (merged_hidden_size.is_static()) {
+        if (w_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  w_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in W input. Current value is: ",
+                                  w_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+
+        if (r_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  r_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in R input. Current value is: ",
+                                  r_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+
+        if (b_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  b_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in B input. Current value is: ",
+                                  b_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+
+        if (p_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  p_pshape[0].compatible(merged_hidden_size * s_peepholes_count),
+                                  "Parameter hidden_size mistmatched in P input. Current value is: ",
+                                  p_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_peepholes_count,
+                                  ".");
+        }
+    }
+
    // Mark inputs which are relevant to output parameters
    set_input_is_relevant_to_shape(0);
    set_input_is_relevant_to_shape(1);
@@ -173,8 +268,8 @@ void op::v0::LSTMCell::validate_and_infer_types() {

    // Set output size, type and shape
    set_output_size(2);
-    set_output_type(0, result_et, output_shapes[0]);
-    set_output_type(1, result_et, output_shapes[1]);
+    set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
+    set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
 }

 Output<Node> op::v0::LSTMCell::get_default_bias_input() const {
@@ -319,7 +414,15 @@ bool ngraph::op::v4::LSTMCell::visit_attributes(AttributeVisitor& visitor) {

 void op::v4::LSTMCell::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v4_LSTMCell_validate_and_infer_types);
-
+    for (const auto& input : inputs()) {
+        if (input.get_partial_shape().rank().is_dynamic()) {
+            set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
+            set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
+            return;
+        }
+    }
+    auto merged_batch_size = Dimension::dynamic();
+    auto merged_hidden_size = Dimension::dynamic();
    auto result_et = element::dynamic;

    // Get input partial shape for all inputs
@@ -330,6 +433,12 @@ void op::v4::LSTMCell::validate_and_infer_types() {
    const auto& r_pshape = get_input_partial_shape(4);
    const auto& b_pshape = get_input_partial_shape(5);

+    NODE_VALIDATION_CHECK(this,
+                          (ct_pshape.rank().get_length() == 2),
+                          "LSTMCell input tensor initial_cell_state shall have dimension 2D.");
+
+    validate_input_rank_dimension({x_pshape, ht_pshape, w_pshape, r_pshape, b_pshape});
+
    // Validate input element types and save result for output type
    NODE_VALIDATION_CHECK(this,
                          element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
@@ -341,9 +450,54 @@ void op::v4::LSTMCell::validate_and_infer_types() {
                          "Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
                          "match.");

-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape};
-    shape_infer(this, input_shapes, output_shapes);
+    // Merge batch_size dimension across all inputs to evaluate output[0] dimension
+    NODE_VALIDATION_CHECK(this,
+                          Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
+                              Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
+                              Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
+                          "Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
+                          "inputs.");
+
+    // Merge hidden_size dimension across all inputs to evaluate output[1] dimension
+    NODE_VALIDATION_CHECK(this,
+                          Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
+                              Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
+                              Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
+                          "Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
+                          "inputs.");
+
+    // Validate hidden_size value for W, R and P inputs
+    if (merged_hidden_size.is_static()) {
+        if (w_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  w_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in W input. Current value is: ",
+                                  w_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+
+        if (r_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  r_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in R input. Current value is: ",
+                                  r_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+
+        if (b_pshape[0].is_static()) {
+            NODE_VALIDATION_CHECK(this,
+                                  b_pshape[0].compatible(merged_hidden_size * s_gates_count),
+                                  "Parameter hidden_size mistmatched in B input. Current value is: ",
+                                  b_pshape[0].get_length(),
+                                  ", expected: ",
+                                  merged_hidden_size.get_length() * s_gates_count,
+                                  ".");
+        }
+    }

    // Mark inputs which are relevant to output parameters
    set_input_is_relevant_to_shape(0);
@@ -353,8 +507,8 @@ void op::v4::LSTMCell::validate_and_infer_types() {

    // Set output size, type and shape
    set_output_size(2);
-    set_output_type(0, result_et, output_shapes[0]);
-    set_output_type(1, result_et, output_shapes[1]);
+    set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
+    set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
 }

 Output<Node> op::v4::LSTMCell::get_default_bias_input() const {
--- a/src/core/src/op/read_value.cpp
+++ b/src/core/src/op/read_value.cpp
@@ -4,8 +4,6 @@

 #include "ngraph/op/read_value.hpp"

-#include <read_value_shape_inference.hpp>
-
 #include "itt.hpp"
 #include "ngraph/op/util/variable_context.hpp"
 #include "ngraph/ops.hpp"
@@ -25,13 +23,8 @@ op::v3::ReadValue::ReadValue(const Output<Node>& init_value, const std::string&
 void op::v3::ReadValue::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v3_ReadValue_validate_and_infer_types);
    auto arg_t = get_input_element_type(0);
-    auto input_shape = get_input_partial_shape(0);
+    auto output_shape = get_input_partial_shape(0);

-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {input_shape};
-    shape_infer(this, input_shapes, output_shapes);
-
-    const auto& output_shape = output_shapes[0];
    VariableInfo info = {output_shape, arg_t, m_variable_id};
    if (m_variable == nullptr)
        m_variable = std::make_shared<Variable>(info);
@@ -61,11 +54,7 @@ op::v6::ReadValue::ReadValue(const Output<Node>& init_value, const shared_ptr<Va
 void op::v6::ReadValue::validate_and_infer_types() {
    NGRAPH_OP_SCOPE(v6_ReadValue_validate_and_infer_types);
    const auto arg_t = get_input_element_type(0);
-    auto input_shape = get_input_partial_shape(0);
-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {input_shape};
-    shape_infer(this, input_shapes, output_shapes);
-    const auto& output_shape = output_shapes[0];
+    auto output_shape = get_input_partial_shape(0);
    NGRAPH_CHECK(m_variable, "Variable is not initialized.");
    VariableInfo var_info = {output_shape, element::dynamic, m_variable->get_info().variable_id};
    NODE_VALIDATION_CHECK(this,
--- a/src/core/src/op/tile.cpp
+++ b/src/core/src/op/tile.cpp
@@ -5,7 +5,6 @@
 #include "ngraph/op/tile.hpp"

 #include <ngraph/validation_util.hpp>
-#include <tile_shape_inference.hpp>

 #include "itt.hpp"
 #include "ngraph/op/constant.hpp"
@@ -38,10 +37,37 @@ void op::v0::Tile::validate_and_infer_types() {
                          "Tile repeats must have any integer element type, but has ",
                          repeats_et);

-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0), get_input_partial_shape(1)};
-    shape_infer(this, input_shapes, output_shapes);
-    set_output_type(0, arg_et, output_shapes[0]);
+    auto arg_shape = get_input_partial_shape(0);
+    auto repeats_shape = get_input_partial_shape(1);
+    NODE_VALIDATION_CHECK(this, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
+    ov::PartialShape repeats_as_pshape;
+    bool repeats_are_known = evaluate_as_partial_shape(get_input_source_output(1), repeats_as_pshape);
+    std::vector<Dimension> repeats_value(repeats_as_pshape);
+    if (repeats_are_known && !repeats_value.empty() && arg_shape.rank().is_static()) {
+        std::vector<Dimension> data_shape(arg_shape);
+        auto data_rank = data_shape.size();
+        auto repeats_rank = repeats_value.size();
+        auto output_rank = std::max(data_rank, repeats_rank);
+
+        // expand data shape and repeats to output rank
+        data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
+        repeats_value.insert(repeats_value.begin(), output_rank - repeats_rank, 1);
+
+        auto output_shape = ov::PartialShape::dynamic(output_rank);
+        for (size_t i = 0; i < output_rank; i++)
+            output_shape[i] = data_shape[i] * repeats_value[i];
+        set_output_type(0, arg_et, output_shape);
+    } else {
+        Rank outRank = Rank::dynamic();
+        if (arg_shape.rank().is_static() && repeats_shape.is_static()) {
+            std::vector<Dimension> data_shape(arg_shape);
+            auto data_rank = data_shape.size();
+            auto repeats_rank = repeats_value.size();
+            auto output_rank = std::max(data_rank, repeats_rank);
+            outRank = Rank(output_rank);
+        }
+        set_output_type(0, arg_et, ov::PartialShape::dynamic(outRank));
+    }

    set_input_is_relevant_to_shape(0);
    set_input_is_relevant_to_shape(1);
@@ -58,16 +84,24 @@ bool op::v0::Tile::evaluate_tile(const HostTensorVector& outputs, const HostTens
    const auto& axis = inputs[1];
    auto& output = outputs[0];
    auto repeats_val = read_index_vector(axis);
-    const auto repeats_rank = repeats_val.size();
+    auto repeats_rank = repeats_val.size();
+    ov::Shape data_shape = data->get_shape();
+    auto data_rank = data_shape.size();
+    auto output_rank = std::max(data_rank, repeats_rank);
+
+    // expand data shape and repeats to output rank
+    data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
+    repeats_val.insert(repeats_val.begin(), output_rank - repeats_rank, 1);
+
+    ov::Shape output_shape(output_rank);
+    for (size_t i = 0; i < output_rank; i++) {
+        output_shape[i] = data_shape[i] * repeats_val[i];
+    }

-    std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
-    std::vector<ov::PartialShape> input_shapes = {data->get_shape(), axis->get_shape()};
-    shape_infer(this, input_shapes, output_shapes, {{1, axis}});
-    const auto& output_shape = output_shapes[0].to_shape();
    if (!output->get_is_allocated()) {
        output->set_shape(output_shape);
    }
-    repeats_val.insert(repeats_val.begin(), output_shape.size() - repeats_rank, 1);
+
    ngraph::runtime::reference::tile(data->get_data_ptr<const char>(),
                                     output->get_data_ptr<char>(),
                                     data->get_shape(),
--- a/src/core/tests/type_prop/lstm_cell.cpp
+++ b/src/core/tests/type_prop/lstm_cell.cpp
@@ -53,9 +53,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
        const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
        FAIL() << "LSTMCell node was created with invalid data.";
    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(
-            error.what(),
-            std::string("Parameter hidden_size not matched for W, R, B, initial_hidden_state and initial_cell_state"));
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in W input."));
    }

    // Invalid R tensor shape.
@@ -66,7 +64,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
        FAIL() << "LSTMCell node was created with invalid data.";
    } catch (const NodeValidationFailure& error) {
        EXPECT_HAS_SUBSTRING(error.what(),
-                             std::string("Parameter hidden_size not matched for W, R, B, "
+                             std::string("Parameter hidden_size not matched for R, "
                                         "initial_hidden_state and initial_cell_state inputs."));
    }

@@ -102,7 +100,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
        const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
        FAIL() << "LSTMCell node was created with invalid data.";
    } catch (const NodeValidationFailure& error) {
-        EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size not matched for W, R, B"));
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in B input."));
    }
 }

@@ -140,8 +138,8 @@ TEST(type_prop, lstm_cell_dynamic_hidden_size) {

    const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);

-    EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
-    EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
+    EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
+    EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
    EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
    EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
 }
@@ -160,8 +158,8 @@ TEST(type_prop, lstm_cell_dynamic_inputs) {

    const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);

-    EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
-    EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
+    EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
+    EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
    EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
    EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
 }
@@ -226,11 +224,9 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
    auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});
    auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});

-    auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
-        const int64_t target_batch_size = batch_size;
-        const int64_t target_hidden_size = hidden_size;
-        return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
-               lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
+    auto check_dynamic_lstm = [](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
+        return lstm->output(0).get_partial_shape() == PartialShape::dynamic() &&
+               lstm->output(1).get_partial_shape() == PartialShape::dynamic() &&
               lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
    };

@@ -269,61 +265,3 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
    lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
    EXPECT_EQ(check_dynamic_lstm(lstm), true);
 }
-
-TEST(type_prop, lstm_cell_shape_from_partial) {
-    const size_t batch_size = 2;
-    const size_t input_size = 3;
-    const size_t hidden_size = 3;
-    const size_t gates_count = 4;
-
-    auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
-        const int64_t target_batch_size = batch_size;
-        const int64_t target_hidden_size = hidden_size;
-        return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
-               lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
-               lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
-    };
-    {
-        // from h & w
-        auto X = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
-        auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, -1});
-        auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
-        EXPECT_EQ(check_dynamic_lstm(lstm), true);
-    }
-
-    {
-        // from x & w
-        auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
-        auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
-        auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
-        EXPECT_EQ(check_dynamic_lstm(lstm), true);
-    }
-
-    {
-        // only valid rank for H_t tensor.
-        auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
-        auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
-        auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
-        auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
-        EXPECT_EQ(check_dynamic_lstm(lstm), true);
-    }
-
-    {
-        //  batch from x, hidden from h_t
-        auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
-        auto W = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{-1, hidden_size});
-        auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
-        auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
-        EXPECT_EQ(check_dynamic_lstm(lstm), true);
-    }
-}
--- a/src/core/tests/type_prop/tile.cpp
+++ b/src/core/tests/type_prop/tile.cpp
@@ -40,11 +40,3 @@ TEST(type_prop, tile_few_repeats_dyn_input) {
    ASSERT_EQ(top->get_element_type(), element::f32);
    ASSERT_EQ(top->get_output_partial_shape(0), (PartialShape{6, Dimension(32, 40), 10}));
 }
-
-TEST(type_prop, tile_out_rank_from_repeats) {
-    auto param0 = make_shared<op::Parameter>(element::f32, Shape{6, 8, 10});
-    auto param1 = make_shared<op::Parameter>(element::i32, Shape{5});
-    auto top = make_shared<op::v0::Tile>(param0, param1);
-    ASSERT_EQ(top->get_element_type(), element::f32);
-    ASSERT_EQ(top->get_output_partial_shape(0).size(), 5);
-}
--- a/src/plugins/intel_gna/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/backend/gna_limitations.cpp
@@ -61,35 +61,55 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
 std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
    std::ostringstream out;
    if (!isValid(h, w)) {
-        out << "Unsupported " << what << " shape, actual WxH: " << w << "x" << h <<
-            ", only vertical vector up to 1x" << maxVectorHeight << ", horizontal up to " << maxVectorWidth <<
-            "x1 or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
+        out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
+            ", only vertical vector up to " << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth <<
+            " or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
    }
    return out.str();
 }

-VectorOrSquareLimit VectorOrSquareLimitByChannels::GetByChannels(const uint32_t channels) const {
-    return channels <= smallChannelMax ? smallChannel : bigChannel;
+
+bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
+    if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth) return true;
+    return false;
 }

-bool VectorOrSquareLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
+std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
+    std::ostringstream out;
+    if (!isValid(h, w)) {
+        out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
+            ", only rectangular shapes up to " << maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
+    }
+    return out.str();
+}
+
+RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
+    for (auto&& limit : limitPerChannel) {
+        if (limit.first >= channels) {
+            return limit.second;
+        }
+    }
+    return RectLimit{ 0, 0 };
+}
+
+bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
    return GetByChannels(channels).isValid(h, w);
 }

-std::string VectorOrSquareLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
    const uint32_t channels, std::string what) const {
    return GetByChannels(channels).GetErrorOrEmpty(h, w, what);
 }

-VectorOrSquareLimitByChannels VectorOrSquareLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
+RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
    return precision == OvGnaTypeInt8 ? lowPrecision : defaultPrecision;
 }

-bool VectorOrSquareLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
+bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
    return GetByPrecision(precision).isValid(h, w, channels);
 }

-std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
    const OvGnaType precision, const uint32_t channels, std::string what) const {
    return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
 }
--- a/src/plugins/intel_gna/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/backend/gna_limitations.hpp
@@ -67,6 +67,13 @@ struct RangeMultipleLimit : public RangeLimit {
    std::string GetErrorOrEmpty(const uint32_t val) const;
 };

+struct RectLimit {
+    uint32_t maxVectorHeight;
+    uint32_t maxVectorWidth;
+    bool isValid(const uint32_t h, const uint32_t w) const;
+    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
+};
+
 struct VectorOrSquareLimit {
    uint32_t maxSquare;
    uint32_t maxVectorHeight;
@@ -75,20 +82,18 @@ struct VectorOrSquareLimit {
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
 };

-struct VectorOrSquareLimitByChannels {
-    uint32_t smallChannelMax;
-    VectorOrSquareLimit smallChannel;
-    VectorOrSquareLimit bigChannel;
-    VectorOrSquareLimit GetByChannels(const uint32_t channels) const;
+struct RectLimitByChannels {
+    std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel;
+    RectLimit GetByChannels(const uint32_t channels) const;
    bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
        const uint32_t channels, std::string what) const;
 };

-struct VectorOrSquareLimitByChannelsAndPrecision {
-    VectorOrSquareLimitByChannels lowPrecision;
-    VectorOrSquareLimitByChannels defaultPrecision;
-    VectorOrSquareLimitByChannels GetByPrecision(const OvGnaType precision) const;
+struct RectLimitByChannelsAndPrecision {
+    RectLimitByChannels lowPrecision;
+    RectLimitByChannels defaultPrecision;
+    RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
    bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
        const OvGnaType precision, const uint32_t channels, std::string what) const;
@@ -98,11 +103,20 @@ class Validator {
    RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
    RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };

-    RangeMultipleLimit kernelNumberLimit{ {8, 256, "number of kernels"}, 8 };
-    VectorOrSquareLimitByChannelsAndPrecision kernelLimit {
-        { 240, { 3, 7, 3 }, { 2, 7, 2 } },
-        { 120, { 3, 7, 3 }, { 1, 7, 1 } } };
-    VectorOrSquareLimitByChannelsAndPrecision& strideLimit = kernelLimit;
+    RangeMultipleLimit kernelNumberLimit{ {8, 1024, "number of kernels"}, 8 };
+    RectLimitByChannelsAndPrecision kernelLimit {
+        { { {96, {7, 7}},
+            {136, {7, 5}},
+            {168, {7, 4}},
+            {240, {7, 3}},
+            {384, {7, 2}} } },
+        { { {48, {7, 7}},
+            {64, {7, 5}},
+            {80, {7, 4}},
+            {120, {7, 3}},
+            {384, {7, 1}} } },
+    };
+    RectLimitByChannelsAndPrecision& strideLimit = kernelLimit;
    RangeLimit2D dilationLimit{ {convDilationHeight, convDilationHeight, "dilation height" },
        { convDilationWidth, convDilationWidth, "dilation width" } };
    const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };
--- a/src/plugins/intel_gna/layers/gna_convolution_layer.cpp
+++ b/src/plugins/intel_gna/layers/gna_convolution_layer.cpp
@@ -30,9 +30,10 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
    using KRT = std::pair<uint32_t, double>;
    // Empirically determined weights reducers for 2D Convolution
    // i.e.:
+    // for kernelSize >= 14      -> 1.7
    // for kernelSize >= 9       -> 1.3
    // for kernelSize in {7, 8}  -> 1.2
-    const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
+    const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} };
    auto reducer = 1.0;
    const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
    const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
--- a/tests/samples_tests/smoke_tests/common/samples_common_test_clas.py
+++ b/tests/samples_tests/smoke_tests/common/samples_common_test_clas.py
@@ -135,6 +135,10 @@ def getting_samples_data_zip(url, samples_path, size_of_chunk=128):
        print("\nExtracting of samples_smoke_tests_data.zip...")
        with zipfile.ZipFile(samples_path, 'r') as samples_zip:
            samples_zip.extractall(Environment.env['smoke_tests_path'])
+        nameFolder = str(Environment.env['samples_data_zip'])[Environment.env['samples_data_zip'].rfind('/')+1:][:-4]
+        smoke_tests_path = os.path.join(Environment.env['smoke_tests_path'])
+        if os.path.exists(os.path.join(smoke_tests_path,nameFolder)):
+            os.rename(os.path.join(smoke_tests_path, nameFolder), os.path.join(smoke_tests_path, 'samples_smoke_tests_data') )
        if os.path.exists(samples_path):
            print("\nRemoving samples_smoke_tests_data.zip...")
            os.remove(samples_path)	
@@ -169,10 +173,16 @@ class SamplesCommonTestClass():

    @staticmethod
    def reset_models_path(model):
-        if ('FP32' in os.path.split(model)[0] or 'FP16' in os.path.split(model)[0]):
-            model = search_model_path_recursively(config_key=Environment.env['icv_model_zoo_models'], model_name=model)
-        else:
-            model = os.path.join(Environment.env['public_models'], model)
+        pathList = model.split(os.sep)
+        modelName = pathList[len(pathList)-1]
+        precision = pathList[len(pathList)-2]
+        for root, subFolder, files in os.walk(Environment.env['models_path']):
+            for item in files:
+                if item.endswith(modelName) :
+                    if precision in root :
+                        model = str(os.path.join(root,item))
+                    else :
+                        model = os.path.join(Environment.env['models_path'], model)
        return model

    @staticmethod
@@ -328,10 +338,8 @@ class SamplesCommonTestClass():
    def setup_class(cls):
        getting_samples_data_zip(Environment.env['samples_data_zip'], Environment.env['samples_path'])
        assert os.environ.get('IE_APP_PATH') is not None, "IE_APP_PATH environment variable is not specified!"
-        assert os.path.exists(Environment.env['public_models']), \
-            "Path for public models {} is not exist!".format(Environment.env['public_models'])
-        assert os.path.exists(Environment.env['icv_model_zoo_models']), \
-            "Path for icv models {} is not exist!".format(Environment.env['icv_model_zoo_models'])
+        assert os.path.exists(Environment.env['models_path']), \
+            "Path for public models {} is not exist!".format(Environment.env['models_path'])
        assert os.path.exists(Environment.env['test_data']), \
            "Path for test data {} is not exist!".format(Environment.env['test_data'])
        cls.output_dir = Environment.env['out_directory']
--- a/tests/samples_tests/smoke_tests/conftest.py
+++ b/tests/samples_tests/smoke_tests/conftest.py
@@ -45,7 +45,7 @@ def pytest_configure(config):
        try:
            Environment.env = fix_env_conf(yaml.safe_load(env_conf))
            # Check mandatory env variables:
-            mandatory_env_varibales = ['out_directory', 'public_models', 'icv_model_zoo_models', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
+            mandatory_env_varibales = ['out_directory', 'models_path', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
            missing_variables = []
            for variable in mandatory_env_varibales:
                if variable not in Environment.env:
--- a/tests/samples_tests/smoke_tests/env_config.yml
+++ b/tests/samples_tests/smoke_tests/env_config.yml
@@ -1,9 +1,8 @@
 out_directory: ${WORKSPACE}/out
-public_models: ${SHARE}/models/public/
-icv_model_zoo_models: ${SHARE}/models/omz_models/
+models_path: ${SHARE}/models/
 test_data: ${SHARE}/validation_set/
 #Performance data:
 perf_result_path: ${SHARE}/validation_set/performance_result/
-samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/samples_smoke_tests_data.zip"
+samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/2021.4/samples_smoke_tests_data_2021.4.zip"
 smoke_tests_path: ${WORKSPACE}/tests/smoke_tests
 samples_path: ${WORKSPACE}/tests/smoke_tests/samples_smoke_tests_data.zip
--- a/tests/samples_tests/smoke_tests/test_benchmark_app.py
+++ b/tests/samples_tests/smoke_tests/test_benchmark_app.py
@@ -21,7 +21,7 @@ log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=s

 test_data_fp32_async = get_tests \
    (cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                 'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                 'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                 'batch': [1],
                 'sample_type': ['C++', 'Python'],
                 'd': ['CPU'],
@@ -33,7 +33,7 @@ test_data_fp32_async = get_tests \

 test_data_fp32_sync = get_tests \
    (cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                 'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                 'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                 'batch': [1],
                 'sample_type': ['C++', 'Python'],
                 'd': ['CPU'],
--- a/tests/samples_tests/smoke_tests/test_classification_sample_async.py
+++ b/tests/samples_tests/smoke_tests/test_classification_sample_async.py
@@ -21,7 +21,7 @@ from common.samples_common_test_clas import get_tests
 log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

 test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                                       'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                                       'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                                       'nt': ['1'],
                                       'sample_type': ['C++','Python'],
                                       'batch': [1, 2, 4],
@@ -30,7 +30,7 @@ test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')]
                           )

 test_data_fp16 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                                       'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP16_batch_1_seqlen_[1]_v10.xml')],
+                                       'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                                       'nt': ['1'],
                                       'sample_type': ['C++','Python'],
                                       'batch': [1, 2, 4],
--- a/tests/samples_tests/smoke_tests/test_hello_classification.py
+++ b/tests/samples_tests/smoke_tests/test_hello_classification.py
@@ -26,15 +26,13 @@ import shutil
 log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

 test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                                       'm': [os.path.join('squeezenet1.1',
-                                                          'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                                       'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                                       'd': ['CPU'],
                                       'sample_type': ['C++', 'C']},
                           use_device=['d'])

 test_data_fp32_unicode = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
-                                               'm': [os.path.join('squeezenet1.1',
-                                                                  'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                                               'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                                               'd': ['CPU'],
                                               'sample_type': ['C++', 'C']},
                                   use_device=['d'])
@@ -91,8 +89,8 @@ class TestHello(SamplesCommonTestClass):

        # Copy files
        shutil.copy(Path(Environment.env['test_data']) / Path(param['i']), tmp_image_dir)
-        shutil.copy(Path(Environment.env['public_models']) / Path(param['m']), tmp_model_dir)
-        shutil.copy(Path(Environment.env['public_models']) / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)
+        shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m']), tmp_model_dir)
+        shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)

        image_path = tmp_image_dir / Path(param['i']).name
        original_image_name = image_path.name.split(sep='.')[0]
--- a/tests/samples_tests/smoke_tests/test_hello_nv12_input_classification.py
+++ b/tests/samples_tests/smoke_tests/test_hello_nv12_input_classification.py
@@ -21,7 +21,7 @@ from common.samples_common_test_clas import SamplesCommonTestClass
 log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

 test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('224x224', 'dog6.yuv')],
-                                       'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
+                                       'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
                                       'size': ['224x224'],
 				       'sample_type': ['C++', 'C'],
                                       'd': ['CPU']},
--- a/tests/samples_tests/smoke_tests/test_hello_reshape_ssd.py
+++ b/tests/samples_tests/smoke_tests/test_hello_reshape_ssd.py
@@ -21,8 +21,7 @@ from common.specific_samples_parsers import parse_hello_reshape_ssd
 log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

 test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('500x500', 'cat.bmp')],
-                                       'm': [os.path.join('ssd300',
-                                                          'caffe_ssd_300_FP32_v10.xml')],
+                                       'm': [os.path.join('ssd512', 'FP32', 'ssd512.xml')],
                                       'd': ['CPU'],
                                       'batch': [1, 2, 4]}, use_device=['d'], use_batch=True
                           )
--- a/tests/samples_tests/smoke_tests/test_speech_sample.py
+++ b/tests/samples_tests/smoke_tests/test_speech_sample.py
@@ -21,7 +21,7 @@ from common.common_utils import parse_avg_err
 log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)

 test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
-                                           'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
+                                           'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
                                           'bs': [1, 2],
                                           'o': ['res_output.ark'],
                                           'r': [os.path.join('ark', 'dev93_scores_10.ark')],
@@ -32,7 +32,7 @@ test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.a
                               )

 test_data_nthreads_negative = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
-                                                    'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
+                                                    'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
                                                    'bs': [1],
                                                    'o': ['res_output.ark'],
                                                    'r': [os.path.join('ark', 'dev93_scores_10.ark')],
--- a/tests/time_tests/.automation/desktop_test_config_cache.yml
+++ b/tests/time_tests/.automation/desktop_test_config_cache.yml
@@ -30,3 +30,243 @@
    precision: FP16-INT8
    framework: caffe2
  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16/faster_rcnn_resnet101_coco.xml
+    name: faster_rcnn_resnet101_coco
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16-INT8/faster_rcnn_resnet101_coco.xml
+    name: faster_rcnn_resnet101_coco
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml
+    name: faster-rcnn-resnet101-coco-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml
+    name: faster-rcnn-resnet101-coco-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml
+    name: googlenet-v1
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml
+    name: googlenet-v1
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml
+    name: googlenet-v3
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml
+    name: googlenet-v3
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml
+    name: ssd512
+    precision: FP16
+    framework: caffe
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml
+    name: ssd512
+    precision: FP16-INT8
+    framework: caffe
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml
+    name: yolo-v2-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml
+    name: yolo-v2-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml
+    name: yolo-v2-ava-sparse-35-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml
+    name: yolo-v2-ava-sparse-35-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml
+    name: yolo-v2-ava-sparse-70-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml
+    name: yolo-v2-ava-sparse-70-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml
+    name: yolo-v2-tiny-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml
+    name: yolo-v2-tiny-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml
+    name: yolo-v2-tiny-ava-sparse-30-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml
+    name: yolo-v2-tiny-ava-sparse-30-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml
+    name: yolo-v2-tiny-ava-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml
+    name: yolo-v2-tiny-ava-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16/squeezenet1.1.xml
+    name: squeezenet1.1
+    precision: FP16
+    framework: caffe2
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16-INT8/squeezenet1.1.xml
+    name: squeezenet1.1
+    precision: FP16-INT8
+    framework: caffe2
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml
+    name: icnet-camvid-ava-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml
+    name: icnet-camvid-ava-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml
+    name: icnet-camvid-ava-sparse-30-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml
+    name: icnet-camvid-ava-sparse-30-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml
+    name: icnet-camvid-ava-sparse-60-0001
+    precision: FP16
+    framework: tf
+  use_model_cache: true
+- device:
+    name: CPU
+  model:
+    path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml
+    name: icnet-camvid-ava-sparse-60-0001
+    precision: FP16-INT8
+    framework: tf
+  use_model_cache: true
--- a/tests/time_tests/scripts/run_timetest.py
+++ b/tests/time_tests/scripts/run_timetest.py
@@ -58,15 +58,33 @@ def aggregate_stats(stats: dict):

 def prepare_executable_cmd(args: dict):
    """Generate common part of cmd from arguments to execute"""
-    return [str(args["executable"].resolve(strict=True)),
-            "-m", str(args["model"].resolve(strict=True)),
-            "-d", args["device"]]
+    return [
+        str(args["executable"].resolve(strict=True)),
+        "-m", str(args["model"].resolve(strict=True)),
+        "-d", args["device"],
+        "-p", args["perf_hint"],
+        "-v" if args["vpu_compiler"] else "", args['vpu_compiler'] if args["vpu_compiler"] else "",
+        "-c" if args["cpu_cache"] else "",
+    ]
+
+
+def get_cache_stats(flatten_data):
+    """Update statistics for run with models cache"""
+    data_cache = {
+        "full_run_using_cache": flatten_data["full_run"],
+        "time_to_inference_using_cache": flatten_data["time_to_inference"],
+        "load_plugin": flatten_data["load_plugin"],
+        "load_network_using_cache": flatten_data["load_network"],
+        "first_inference": flatten_data["first_inference"],
+        "fill_inputs": flatten_data["fill_inputs"],
+    }
+    return data_cache


 def run_timetest(args: dict, log=None):
    """Run provided executable several times and aggregate collected statistics"""
    if log is None:
-        log = logging.getLogger('run_timetest')
+        log = logging.getLogger("run_timetest")

    cmd_common = prepare_executable_cmd(args)

@@ -90,6 +108,9 @@ def run_timetest(args: dict, log=None):
        flatten_data = {}
        parse_stats(raw_data[0], flatten_data)

+        if run_iter > 0 and args["cpu_cache"]:
+            flatten_data = get_cache_stats(flatten_data)
+
        log.debug(f"Statistics after run of executable #{run_iter}: {flatten_data}")

        # Combine statistics from several runs
@@ -108,29 +129,45 @@ def run_timetest(args: dict, log=None):

 def cli_parser():
    """parse command-line arguments"""
-    parser = argparse.ArgumentParser(description='Run timetest executable')
-    parser.add_argument('executable',
+    parser = argparse.ArgumentParser(description="Run timetest executable")
+    parser.add_argument("executable",
                        type=Path,
-                        help='binary to execute')
-    parser.add_argument('-m',
+                        help="Binary to execute")
+    parser.add_argument("-m",
                        required=True,
                        dest="model",
                        type=Path,
-                        help='path to an .xml/.onnx file with a trained model or'
-                             ' to a .blob files with a trained compiled model')
-    parser.add_argument('-d',
+                        help="Path to an .xml/.onnx file with a trained model or"
+                             " to a .blob files with a trained compiled model")
+    parser.add_argument("-d",
                        required=True,
                        dest="device",
                        type=str,
-                        help='target device to infer on')
-    parser.add_argument('-niter',
+                        help="Target device to infer on")
+    parser.add_argument("-niter",
                        default=10,
                        type=check_positive_int,
-                        help='number of times to execute binary to aggregate statistics of')
-    parser.add_argument('-s',
+                        help="Number of times to execute binary to aggregate statistics of")
+    parser.add_argument("-s",
                        dest="stats_path",
                        type=Path,
-                        help='path to a file to save aggregated statistics')
+                        help="path to a file to save aggregated statistics")
+    parser.add_argument("-p",
+                        dest="perf_hint",
+                        choices=["LATENCY", "THROUGHPUT"],
+                        default="LATENCY",
+                        type=str,
+                        help="Enables performance hint for specified device. Default hint is LATENCY")
+    exclusive_group = parser.add_mutually_exclusive_group(required=False)
+    exclusive_group.add_argument("-c",
+                                 dest="cpu_cache",
+                                 action="store_true",
+                                 help="Enable CPU model cache usage")
+    exclusive_group.add_argument("-v",
+                                 dest="vpu_compiler",
+                                 choices=["MCM", "MLIR"],
+                                 type=str,
+                                 help="Change VPUX compiler type")

    args = parser.parse_args()

@@ -143,6 +180,12 @@ if __name__ == "__main__":
    logging.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=logging.DEBUG, stream=sys.stdout)

+    assert not (args.cpu_cache and args.device != "CPU"), \
+        "The cache option is used only for the CPU device."
+
+    assert not (args.vpu_compiler and "VPUX" not in args.device), \
+        "The VPUX compiler option is used only for the VPUX device."
+
    exit_code, _, aggr_stats, _ = run_timetest(
        dict(args._get_kwargs()), log=logging)  # pylint: disable=protected-access
    if args.stats_path:
@@ -159,15 +202,15 @@ if __name__ == "__main__":

 def test_timetest_parser():
    # Example of timetest yml file
-    raw_data_example = [{'full_run': [1, {'first_inference_latency': [2, {'load_plugin': [3]}, {
-        'create_exenetwork': [4, {'read_network': [5]}, {'load_network': [6]}]}]},
-                              {'first_inference': [7, {'fill_inputs': [8]}]}]}]
+    raw_data_example = [{"full_run": [1, {"first_inference_latency": [2, {"load_plugin": [3]}, {
+        "create_exenetwork": [4, {"read_network": [5]}, {"load_network": [6]}]}]},
+                              {"first_inference": [7, {"fill_inputs": [8]}]}]}]

    # Refactoring raw data from yml
    flatten_dict = {}
    parse_stats(raw_data_example, flatten_dict)

-    expected_result = {'full_run': 1, 'first_inference_latency': 2, 'load_plugin': 3, 'create_exenetwork': 4,
-                       'read_network': 5, 'load_network': 6, 'first_inference': 7, 'fill_inputs': 8}
+    expected_result = {"full_run": 1, "first_inference_latency": 2, "load_plugin": 3, "create_exenetwork": 4,
+                       "read_network": 5, "load_network": 6, "first_inference": 7, "fill_inputs": 8}

    assert flatten_dict == expected_result, "Statistics parsing is performed incorrectly!"
--- a/tests/time_tests/src/timetests/timetest_infer.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer.cpp
@@ -17,51 +17,87 @@ using namespace InferenceEngine;
 * main(). The function should not throw any exceptions and responsible for
 * handling it by itself.
 */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
+int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
+                const bool isCacheEnabled, const std::string &vpuCompiler) {
+  auto pipeline = [](const std::string &model, const std::string &device, const std::string &performanceHint,
+                     const bool isCacheEnabled, const std::string &vpuCompiler) {
    Core ie;
    CNNNetwork cnnNetwork;
    ExecutableNetwork exeNetwork;
    InferRequest inferRequest;
    size_t batchSize = 0;

+    if (!performanceHint.empty()) {
+      std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+
+      // enables performance hint for specified device
+      std::string performanceConfig;
+      if (performanceHint == "THROUGHPUT")
+        performanceConfig = CONFIG_VALUE(THROUGHPUT);
+      else if (performanceHint == "LATENCY")
+        performanceConfig = CONFIG_VALUE(LATENCY);
+
+      if (std::find(supported_config_keys.begin(), supported_config_keys.end(), "PERFORMANCE_HINT") ==
+          supported_config_keys.end()) {
+        std::cerr << "Device " << device << " doesn't support config key 'PERFORMANCE_HINT'!\n"
+                  << "Performance config was not set.";
+      }
+      else
+        ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), performanceConfig}}, device);
+    }
+
+    // set config for VPUX device
+    std::map<std::string, std::string> vpuConfig = {};
+    if (vpuCompiler == "MCM")
+      vpuConfig = {{"VPUX_COMPILER_TYPE", "MCM"}};
+    else if (vpuCompiler == "MLIR")
+      vpuConfig = {{"VPUX_COMPILER_TYPE", "MLIR"}};
+
+    // first_inference_latency = time_to_inference + first_inference
    {
-      SCOPED_TIMER(first_inference_latency);
+      SCOPED_TIMER(time_to_inference);
      {
        SCOPED_TIMER(load_plugin);
        ie.GetVersions(device);
-        // enables performance hint for specified device
-        ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), CONFIG_VALUE(LATENCY)}}, device);
+
+        if (isCacheEnabled)
+          ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
      }
      {
-        SCOPED_TIMER(create_exenetwork);
-        if (TimeTest::fileExt(model) == "blob") {
-          SCOPED_TIMER(import_network);
-          exeNetwork = ie.ImportNetwork(model, device);
+        if (!isCacheEnabled) {
+          SCOPED_TIMER(create_exenetwork);
+
+          if (TimeTest::fileExt(model) == "blob") {
+            SCOPED_TIMER(import_network);
+            exeNetwork = ie.ImportNetwork(model, device);
+          }
+          else {
+            {
+              SCOPED_TIMER(read_network);
+              cnnNetwork = ie.ReadNetwork(model);
+              batchSize = cnnNetwork.getBatchSize();
+            }
+
+            {
+              SCOPED_TIMER(load_network);
+              exeNetwork = ie.LoadNetwork(cnnNetwork, device, vpuConfig);
+            }
+          }
        }
        else {
-          {
-            SCOPED_TIMER(read_network);
-            cnnNetwork = ie.ReadNetwork(model);
-            batchSize = cnnNetwork.getBatchSize();
-          }
-
-          {
-            SCOPED_TIMER(load_network);
-            exeNetwork = ie.LoadNetwork(cnnNetwork, device);
-          }
+          SCOPED_TIMER(load_network);
+          exeNetwork = ie.LoadNetwork(model, device);
        }
      }
+      inferRequest = exeNetwork.CreateInferRequest();
    }

    {
      SCOPED_TIMER(first_inference);
-      inferRequest = exeNetwork.CreateInferRequest();
-
      {
-        SCOPED_TIMER(fill_inputs)
-        batchSize = batchSize != 0 ? batchSize : 1;
+        SCOPED_TIMER(fill_inputs);
        const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
+        batchSize = batchSize != 0 ? batchSize : 1;
        fillBlobs(inferRequest, inputsInfo, batchSize);
      }
      inferRequest.Infer();
@@ -69,7 +105,7 @@ int runPipeline(const std::string &model, const std::string &device) {
  };

  try {
-    pipeline(model, device);
+    pipeline(model, device, performanceHint, isCacheEnabled, vpuCompiler);
  } catch (const InferenceEngine::Exception &iex) {
    std::cerr
        << "Inference Engine pipeline failed with Inference Engine exception:\n"
--- a/tests/time_tests/src/timetests/timetest_infer_cache.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_cache.cpp
@@ -1,68 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <inference_engine.hpp>
-#include <ie_plugin_config.hpp>
-#include <iostream>
-
-#include "common_utils.h"
-#include "timetests_helper/timer.h"
-#include "timetests_helper/utils.h"
-using namespace InferenceEngine;
-
-
-/**
- * @brief Function that contain executable pipeline which will be called from
- * main(). The function should not throw any exceptions and responsible for
- * handling it by itself.
- */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
-    Core ie;
-    CNNNetwork cnnNetwork;
-    ExecutableNetwork exeNetwork;
-    InferRequest inferRequest;
-
-    {
-      SCOPED_TIMER(first_inference_latency);
-      {
-        SCOPED_TIMER(load_plugin);
-        ie.GetVersions(device);
-      }
-      {
-        SCOPED_TIMER(load_network);
-        // enables cache
-        ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
-        exeNetwork = ie.LoadNetwork(model, device);
-      }
-      {
-        SCOPED_TIMER(first_inference);
-        inferRequest = exeNetwork.CreateInferRequest();
-        {
-          SCOPED_TIMER(fill_inputs)
-          const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
-          fillBlobs(inferRequest, inputsInfo, 1);
-        }
-        inferRequest.Infer();
-      }
-    }
-  };
-
-  try {
-    pipeline(model, device);
-  } catch (const InferenceEngine::Exception &iex) {
-    std::cerr
-        << "Inference Engine pipeline failed with Inference Engine exception:\n"
-        << iex.what();
-    return 1;
-  } catch (const std::exception &ex) {
-    std::cerr << "Inference Engine pipeline failed with exception:\n"
-              << ex.what();
-    return 2;
-  } catch (...) {
-    std::cerr << "Inference Engine pipeline failed\n";
-    return 3;
-  }
-  return 0;
-}
--- a/tests/time_tests/src/timetests/timetest_infer_vpu_mlir_compiler.cpp
+++ b/tests/time_tests/src/timetests/timetest_infer_vpu_mlir_compiler.cpp
@@ -1,84 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <inference_engine.hpp>
-#include <iostream>
-
-#include "common_utils.h"
-#include "timetests_helper/timer.h"
-#include "timetests_helper/utils.h"
-using namespace InferenceEngine;
-
-
-/**
- * @brief Function that contain executable pipeline which will be called from
- * main(). The function should not throw any exceptions and responsible for
- * handling it by itself.
- */
-int runPipeline(const std::string &model, const std::string &device) {
-  auto pipeline = [](const std::string &model, const std::string &device) {
-    Core ie;
-    CNNNetwork cnnNetwork;
-    ExecutableNetwork exeNetwork;
-    InferRequest inferRequest;
-    size_t batchSize = 0;
-
-    {
-      SCOPED_TIMER(first_inference_latency);
-      {
-        SCOPED_TIMER(load_plugin);
-        ie.GetVersions(device);
-      }
-      {
-        SCOPED_TIMER(create_exenetwork);
-        if (TimeTest::fileExt(model) == "blob") {
-          SCOPED_TIMER(import_network);
-          exeNetwork = ie.ImportNetwork(model, device);
-        }
-        else {
-          {
-            SCOPED_TIMER(read_network);
-            cnnNetwork = ie.ReadNetwork(model);
-            batchSize = cnnNetwork.getBatchSize();
-          }
-
-          {
-            SCOPED_TIMER(load_network);
-            exeNetwork = ie.LoadNetwork(cnnNetwork, device, {{"VPUX_COMPILER_TYPE", "MLIR"}});
-          }
-        }
-      }
-    }
-
-    {
-      SCOPED_TIMER(first_inference);
-      inferRequest = exeNetwork.CreateInferRequest();
-
-      {
-        SCOPED_TIMER(fill_inputs)
-        batchSize = batchSize != 0 ? batchSize : 1;
-        const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
-        fillBlobs(inferRequest, inputsInfo, batchSize);
-      }
-      inferRequest.Infer();
-    }
-  };
-
-  try {
-    pipeline(model, device);
-  } catch (const InferenceEngine::Exception &iex) {
-    std::cerr
-        << "Inference Engine pipeline failed with Inference Engine exception:\n"
-        << iex.what();
-    return 1;
-  } catch (const std::exception &ex) {
-    std::cerr << "Inference Engine pipeline failed with exception:\n"
-              << ex.what();
-    return 2;
-  } catch (...) {
-    std::cerr << "Inference Engine pipeline failed\n";
-    return 3;
-  }
-  return 0;
-}
--- a/tests/time_tests/src/timetests_helper/cli.h
+++ b/tests/time_tests/src/timetests_helper/cli.h
@@ -26,6 +26,18 @@ static const char target_device_message[] =
    "plugin. "
    "The application looks for a suitable plugin for the specified device.";

+/// @brief message for vpu argument
+static const char performance_hint_message[] =
+    "Not required. Enables performance hint for specified device. Available hints are LATENCY and THROUGHPUT.";
+
+/// @brief message for cache argument
+static const char cpu_cache_message[] =
+    "Not required. Use this key to run timetests with CPU models caching.";
+
+/// @brief message for vpu argument
+static const char vpu_compiler_message[] =
+    "Not required. Use this key to run timetests using MLIR or MCM VPUX compiler type.";
+
 /// @brief message for statistics path argument
 static const char statistics_path_message[] =
    "Required. Path to a file to write statistics.";
@@ -44,6 +56,18 @@ DEFINE_string(m, "", model_message);
 /// It is a required parameter
 DEFINE_string(d, "", target_device_message);

+/// @brief Define parameter for set performance hint for target device <br>
+/// It is a non-required parameter
+DEFINE_string(p, "", performance_hint_message);
+
+/// @brief Define parameter for set CPU models caching <br>
+/// It is a non-required parameter
+DEFINE_bool(c, false, cpu_cache_message);
+
+/// @brief Define parameter VPU compiler type <br>
+/// It is a non-required parameter
+DEFINE_string(v, "", vpu_compiler_message);
+
 /// @brief Define parameter for set path to a file to write statistics <br>
 /// It is a required parameter
 DEFINE_string(s, "", statistics_path_message);
@@ -56,10 +80,13 @@ static void showUsage() {
  std::cout << "TimeTests [OPTION]" << std::endl;
  std::cout << "Options:" << std::endl;
  std::cout << std::endl;
-  std::cout << "    -h, --help                " << help_message << std::endl;
+  std::cout << "    -h, --help                  " << help_message << std::endl;
  std::cout << "    -m \"<path>\"               " << model_message << std::endl;
  std::cout << "    -d \"<device>\"             " << target_device_message
            << std::endl;
  std::cout << "    -s \"<path>\"               " << statistics_path_message
            << std::endl;
+  std::cout << "    -p \"<perf_hint>\"          " << performance_hint_message << std::endl;
+  std::cout << "    -c                          " << cpu_cache_message << std::endl;
+  std::cout << "    -v \"<compiler_type>\"      " << vpu_compiler_message << std::endl;
 }
--- a/tests/time_tests/src/timetests_helper/main.cpp
+++ b/tests/time_tests/src/timetests_helper/main.cpp
@@ -8,7 +8,8 @@

 #include <iostream>

-int runPipeline(const std::string &model, const std::string &device);
+int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
+                const bool isCacheEnabled, const std::string &vpuCompiler);

 /**
 * @brief Parses command line and check required arguments
@@ -40,7 +41,7 @@ bool parseAndCheckCommandLine(int argc, char **argv) {
 */
 int _runPipeline() {
  SCOPED_TIMER(full_run);
-  return runPipeline(FLAGS_m, FLAGS_d);
+  return runPipeline(FLAGS_m, FLAGS_d, FLAGS_p, FLAGS_c, FLAGS_v);
 }

 /**
@@ -54,4 +55,4 @@ int main(int argc, char **argv) {
  StatisticsWriter::Instance().setFile(FLAGS_s);
  StatisticsWriter::Instance().write();
  return status;
-}
+}
--- a/tests/time_tests/test_runner/conftest.py
+++ b/tests/time_tests/test_runner/conftest.py
@@ -43,7 +43,7 @@ def pytest_addoption(parser):
    test_args_parser.addoption(
        "--test_conf",
        type=Path,
-        help="path to a test config",
+        help="Path to a test config",
        default=Path(__file__).parent / "test_config.yml"
    )
    test_args_parser.addoption(
@@ -51,20 +51,38 @@ def pytest_addoption(parser):
        required=True,
        dest="executable",
        type=Path,
-        help="path to a timetest binary to execute"
+        help="Path to a timetest binary to execute"
    )
    test_args_parser.addoption(
        "--niter",
        type=check_positive_int,
-        help="number of iterations to run executable and aggregate results",
+        help="Number of iterations to run executable and aggregate results",
        default=3
    )
+    test_args_parser.addoption(
+        "--cpu_cache",
+        action='store_true',
+        help="Enable model CPU cache usage",
+    )
+    test_args_parser.addoption(
+        "--perf_hint",
+        choices=['LATENCY', 'THROUGHPUT'],
+        default='LATENCY',
+        type=str,
+        help='Enables performance hint for specified device. Default hint is LATENCY'
+    )
+    test_args_parser.addoption(
+        "--vpu_compiler",
+        choices=["MCM", "MLIR"],
+        type=str,
+        help="Change VPUX compiler type",
+    )
    db_args_parser = parser.getgroup("timetest database use")
    db_args_parser.addoption(
        '--db_submit',
        metavar="RUN_ID",
        type=str,
-        help='submit results to the database. ' \
+        help='Submit results to the database. ' \
             '`RUN_ID` should be a string uniquely identifying the run' \
             ' (like Jenkins URL or time)'
    )
@@ -79,19 +97,21 @@ def pytest_addoption(parser):
        '--db_collection',
        type=str,
        required=is_db_used,
-        help='collection name in database',
+        help='Collection name in database',
        choices=DB_COLLECTIONS
    )
    db_args_parser.addoption(
        '--db_metadata',
        type=str,
        default=None,
-        help='path to JSON-formatted file to extract additional information')
+        help='Path to JSON-formatted file to extract additional information'
+    )
    db_args_parser.addoption(
        '--manifest',
        type=Path,
        required=is_db_used,
-        help='path to build manifest to extract commit information')
+        help='Path to build manifest to extract commit information'
+    )


@pytest.fixture(scope="session")
@@ -112,8 +132,26 @@ def niter(request):
    return request.config.getoption('niter')


+@pytest.fixture(scope="session")
+def cpu_cache(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('cpu_cache')
+
+
+@pytest.fixture(scope="session")
+def perf_hint(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('perf_hint')
+
+
+@pytest.fixture(scope="session")
+def vpu_compiler(request):
+    """Fixture function for command-line option."""
+    return request.config.getoption('vpu_compiler')
+
 # -------------------- CLI options --------------------

+
@pytest.fixture(scope="function")
 def temp_dir(pytestconfig):
    """Create temporary directory for test purposes.
--- a/tests/time_tests/test_runner/test_timetest.py
+++ b/tests/time_tests/test_runner/test_timetest.py
@@ -34,14 +34,17 @@ from scripts.run_timetest import run_timetest
 REFS_FACTOR = 1.2      # 120%


-def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, test_info, temp_dir, validate_test_case,
-                  prepare_db_info):
+def test_timetest(instance, executable, niter, cl_cache_dir, cpu_cache, vpu_compiler, perf_hint, model_cache_dir,
+                  test_info, temp_dir, validate_test_case, prepare_db_info):
    """Parameterized test.

    :param instance: test instance. Should not be changed during test run
    :param executable: timetest executable to run
    :param niter: number of times to run executable
    :param cl_cache_dir: directory to store OpenCL cache
+    :param cpu_cache: flag to enable model CPU cache
+    :param vpu_compiler: flag to change VPUX compiler type
+    :param perf_hint: performance hint (optimize device for latency or throughput settings)
    :param model_cache_dir: directory to store IE model cache
    :param test_info: custom `test_info` field of built-in `request` pytest fixture
    :param temp_dir: path to a temporary directory. Will be cleaned up after test run
@@ -63,7 +66,10 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, te
        "executable": Path(executable),
        "model": Path(model_path),
        "device": instance["device"]["name"],
-        "niter": niter
+        "niter": niter,
+        "perf_hint": perf_hint,
+        "cpu_cache": cpu_cache,
+        "vpu_compiler": vpu_compiler if vpu_compiler else ""
    }
    logging.info("Run timetest once to generate any cache")
    retcode, msg, _, _ = run_timetest({**exe_args, "niter": 1}, log=logging)
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -5,6 +5,8 @@ import os
 import sys
 from datetime import datetime

+from openvino.runtime import Dimension
+
 from openvino.tools.benchmark.benchmark import Benchmark
 from openvino.tools.benchmark.parameters import parse_args
 from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
@@ -15,8 +17,8 @@ from openvino.tools.benchmark.utils.progress_bar import ProgressBar
 from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \
    process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
    get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, get_inputs_info, \
-    print_inputs_and_outputs_info, get_batch_size, load_config, dump_config, get_latency_groups, \
-    check_for_static
+    print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \
+    check_for_static, can_measure_as_static
 from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport


@@ -225,9 +227,7 @@ def run(args):
                                              ('load network time (ms)', duration_ms)
                                          ])
            app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
-            batch_size = get_batch_size(app_inputs_info)
-            if batch_size.is_dynamic and benchmark.api_type == 'sync':
-                raise Exception("Dynamic batch size is supported only in async mode")
+            batch_size = get_network_batch_size(app_inputs_info)
        elif not is_network_compiled:
            # --------------------- 4. Read the Intermediate Representation of the network -----------------------------
            next_step()
@@ -262,10 +262,7 @@ def run(args):
                                              ])

            # use batch size according to provided layout and shapes
-            batch_size = get_batch_size(app_inputs_info)
-            if batch_size.is_dynamic and benchmark.api_type == 'sync':
-                raise Exception("Dynamic batch size is supported only in async mode")
-
+            batch_size = get_network_batch_size(app_inputs_info)
            logger.info(f'Network batch size: {batch_size}')

            # --------------------- 6. Configuring inputs and outputs of the model --------------------------------------------------
@@ -307,10 +304,7 @@ def run(args):
                                              ('import network time (ms)', duration_ms)
                                          ])
            app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
-            batch_size = get_batch_size(app_inputs_info)
-            if batch_size.is_dynamic and benchmark.api_type == 'sync':
-                raise Exception("Dynamic batch size is supported only in async mode")
-
+            batch_size = get_network_batch_size(app_inputs_info)

        # --------------------- 8. Querying optimal runtime parameters --------------------------------------------------
        next_step()
@@ -353,7 +347,8 @@ def run(args):
        data_queue = get_input_data(paths_to_input, app_inputs_info)

        static_mode = check_for_static(app_inputs_info)
-        if not static_mode and benchmark.api_type == 'sync':
+        allow_inference_only_or_sync = can_measure_as_static(app_inputs_info)
+        if not allow_inference_only_or_sync and benchmark.api_type == 'sync':
            raise Exception("Benchmarking of the model with dynamic shapes is available for async API only."
                                   "Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.")

@@ -362,9 +357,13 @@ def run(args):
                benchmark.inference_only = True
            else:
                benchmark.inference_only = False
-        elif benchmark.inference_only and not static_mode:
+        elif benchmark.inference_only and not allow_inference_only_or_sync:
            raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")

+        # update batch size in case dynamic network with one data_shape
+        if benchmark.inference_only and batch_size.is_dynamic:
+            batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
+
        benchmark.latency_groups = get_latency_groups(app_inputs_info)

        if len(benchmark.latency_groups) > 1:
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
@@ -236,11 +236,17 @@ def get_duration_in_secs(target_device):


 def check_for_static(app_input_info):
-    is_static = True
    for info in app_input_info:
        if info.is_dynamic:
            return False
-    return is_static
+    return True
+
+
+def can_measure_as_static(app_input_info):
+    for info in app_input_info:
+        if info.is_dynamic and (len(info.shapes) > 1 or info.original_shape.is_static):
+            return False
+    return True


 def parse_devices(device_string):
@@ -428,6 +434,7 @@ class AppInputInfo:
    def __init__(self):
        self.element_type = None
        self.layout = Layout()
+        self.original_shape = None
        self.partial_shape = None
        self.data_shapes = []
        self.scale = []
@@ -550,6 +557,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
        # Input name
        info.name = input_names[i]
        # Shape
+        info.original_shape = parameters[i].get_partial_shape()
        if info.name in shape_map.keys():
            info.partial_shape = parse_partial_shape(shape_map[info.name])
            reshape = True
@@ -625,7 +633,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
    return input_info, reshape


-def get_batch_size(inputs_info):
+def get_network_batch_size(inputs_info):
    null_dimension = Dimension(0)
    batch_size = null_dimension
    for info in inputs_info:
--- a/tools/mo/openvino/tools/mo/utils/cli_parser.py
+++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py
@@ -124,12 +124,14 @@ class CanonicalizePathCheckExistenceIfNeededAction(CanonicalizePathCheckExistenc

 class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExistenceAction):
    def __call__(self, parser, namespace, values, option_string=None):
-        super().__call__(parser, namespace, values, option_string)
        dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
            option_string)
        if 'tensorflow_use_custom_operations_config' in option_string:
            dep_msg += 'Please use --transformations_config cli option instead'
+        if 'mean_file' in option_string or 'mean_offset' in option_string:
+            dep_msg += 'Please use --mean_values cli option instead.'
        log.error(dep_msg, extra={'is_warning': True})
+        super().__call__(parser, namespace, values, option_string)


 def readable_file(path: str):
@@ -377,7 +379,7 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                                   'the Inference Engine API in runtime may fail for such an IR.',
                              action='store_true', default=False)
    common_group.add_argument('--keep_shape_ops',
-                              help='The option is ignored. Expected behavior is enabled by default.',
+                              help=argparse.SUPPRESS,
                              action=IgnoredAction, default=True)
    common_group.add_argument('--disable_weights_compression',
                              help='Disable compression and store weights with original precision.',
@@ -524,11 +526,13 @@ def get_caffe_cli_parser(parser: argparse.ArgumentParser = None):
                                                  'CustomLayersMapping.xml'),
                             action=CanonicalizePathCheckExistenceAction)
    caffe_group.add_argument('--mean_file', '-mf',
-                             help='Mean image to be used for the input. Should be a binaryproto file',
+                             help='[DEPRECATED] ' +
+                                  'Mean image to be used for the input. Should be a binaryproto file',
                             default=None,
-                             action=CanonicalizePathCheckExistenceAction)
+                             action=DeprecatedCanonicalizePathCheckExistenceAction)
    caffe_group.add_argument('--mean_file_offsets', '-mo',
-                             help='Mean image offsets to be used for the input binaryproto file. ' +
+                             help='[DEPRECATED] ' +
+                                  'Mean image offsets to be used for the input binaryproto file. ' +
                                  'When the mean image is bigger than the expected input, it is cropped. By default, centers ' +
                                  'of the input image and the mean image are the same and the mean image is cropped by ' +
                                  'dimensions of the input image. The format to pass this option is the following: "-mo (x,y)". In this ' +