Merge remote-tracking branch 'upstream/master'

2021-07-26 11:57:49 +09:00 · 2021-07-26 11:57:49 +09:00 · ebdbea67cb
commit ebdbea67cb
parent 4880bd11d4 d921e7a9c4
181 changed files with 5953 additions and 2352 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -88,6 +88,11 @@ jobs:
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
      # For running Python API tests
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
+      # For running nGraph unit tests dependent on Python frameworks
+      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test.txt
+      # For MO unit tests
+      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt
+      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt
      # Speed up build
      wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
      unzip ninja-linux.zip
@ -109,6 +114,7 @@ jobs:
        -DENABLE_WHEEL=ON
        -DENABLE_TESTS=ON
        -DNGRAPH_ONNX_IMPORT_ENABLE=ON
+        -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
        -DENABLE_FASTER_BUILD=ON
        -DENABLE_STRICT_DEPENDENCIES=OFF
        -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
@ -149,6 +155,12 @@ jobs:
    workingDirectory: $(BUILD_SAMPLES_DIR)
    displayName: 'Build c samples'

+  - script: |
+      export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer
+      . $(SETUPVARS) -pyver 3.6 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml
+    displayName: 'Model Optimizer UT'
+    continueOnError: false
+
  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
    displayName: 'nGraph UT'
    continueOnError: false
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@ -95,6 +95,7 @@ jobs:
        -DENABLE_SAMPLES=OFF
        -DENABLE_SPEECH_DEMO=OFF
        -DNGRAPH_ONNX_IMPORT_ENABLE=ON
+        -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
        -DNGRAPH_DEBUG_ENABLE=OFF
        $(REPO_DIR)
      workingDirectory: $(BUILD_DIR)
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@ -69,6 +69,7 @@ RUN cmake .. \
    -DENABLE_PYTHON=ON \
    -DPYTHON_EXECUTABLE=/usr/bin/python3 \
    -DNGRAPH_ONNX_IMPORT_ENABLE=ON \
+    -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
    -DNGRAPH_DEBUG_ENABLE=OFF \
    -DCMAKE_INSTALL_PREFIX=/openvino/dist \
    -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE}
--- a/cmake/coverage.cmake
+++ b/cmake/coverage.cmake
@ -92,9 +92,15 @@ ie_coverage_genhtml(INFO_FILE "ngraph"

 if(NGRAPH_ONNX_IMPORT_ENABLE)
    ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer"
-        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_common*"
-                 "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_editor*"
-        "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_import*")
+        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*"
+        "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*")
    ie_coverage_genhtml(INFO_FILE "onnx_importer"
        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()
+
+if(NGRAPH_ONNX_FRONTEND_ENABLE)
+    ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend"
+        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*")
+    ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend"
+        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
+endif()
--- a/cmake/developer_package/compile_flags/sanitizer.cmake
+++ b/cmake/developer_package/compile_flags/sanitizer.cmake
@ -34,7 +34,7 @@ endif()
 # common sanitizer options
 if (DEFINED SANITIZER_COMPILER_FLAGS)
    # ensure sumbols are present
-    set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer")
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
    # prevent unloading libraries at runtime, so sanitizer can resolve their symbols
    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")

--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -38,8 +38,6 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL
 find_package(PythonInterp 3 QUIET)
 ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)

-ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF)
-
 #
 # Inference Engine specific options
 #
@ -112,7 +110,11 @@ ie_dependent_option(ENABLE_TBB_RELEASE_ONLY "Only Release TBB libraries are link

 ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF)

-ie_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" OFF)
+ie_option (ENABLE_DEBUG_CAPS "enable OpenVINO debug capabilities at runtime" OFF)
+
+ie_dependent_option (ENABLE_GPU_DEBUG_CAPS "enable GPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF)
+
+ie_dependent_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF)

 if(ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)))
    set(protoc_available OFF)
@ -121,9 +123,12 @@ else()
 endif()

 ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF)
+ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
 ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" OFF
    "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
+ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
+    "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
    "NGRAPH_UNIT_TEST_ENABLE" OFF)
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -221,6 +221,7 @@ limitations under the License.
                <tab type="user" title="PriorBox-1" url="@ref openvino_docs_ops_detection_PriorBox_1"/>
                <tab type="user" title="Proposal-1" url="@ref openvino_docs_ops_detection_Proposal_1"/>
                <tab type="user" title="Proposal-4" url="@ref openvino_docs_ops_detection_Proposal_4"/>
+                <tab type="user" title="RandomUniform-8" url="@ref openvino_docs_ops_generation_RandomUniform_8"/>
                <tab type="user" title="Range-1" url="@ref openvino_docs_ops_generation_Range_1"/>
                <tab type="user" title="Range-4" url="@ref openvino_docs_ops_generation_Range_4"/>
                <tab type="user" title="ReadValue-3" url="@ref openvino_docs_ops_infrastructure_ReadValue_3"/>
--- a/docs/ops/generation/RandomUniform_8.md
+++ b/docs/ops/generation/RandomUniform_8.md
@ -0,0 +1,231 @@
+## RandomUniform <a name="RandomUniform"></a> {#openvino_docs_ops_generation_RandomUniform_8}
+
+**Versioned name**: *RandomUniform-8*
+
+**Category**: Generation
+
+**Short description**: *RandomUniform* operation generates a sequence of random values from a uniform distribution.
+
+**Detailed description**:
+
+*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`. 
+The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm 
+is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns 
+four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized 
+with *seed* and *seed2* attributes respectively.
+
+\f[
+key = seed\\
+counter = seed2
+\f]
+
+Link to the original paper [Parallel Random Numbers: As Easy as 1, 2, 3](https://www.thesalmons.org/john/random123/papers/random123sc11.pdf)
+
+The result of Philox is calculated by applying a fixed number of *key* and *counter* updating so-called "rounds". 
+This implementation uses 4x32_10 version of Philox algorithm, where number of rounds = 10.
+
+Suppose we have *n* which determines *n*-th 4 elements of random sequence.
+In each round *key*, *counter* and *n* are splitted to pairs of uint32 values:
+
+\f[
+R = cast\_to\_uint32(value)\\
+L = cast\_to\_uint32(value >> 32),
+\f]
+where *cast\_to\_uint32* - static cast to uint32, *value* - uint64 input value, *L*, *R* - uint32 
+result values, >> - bitwise right shift.
+
+Then *n* and *counter* are updated with the following formula:
+
+\f[
+L'= mullo(R, M)\\
+R' = mulhi(R, M) {\oplus} k {\oplus} L \\
+mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\
+mullo(a, b) = (a {\times} b) \mod 2^{32}
+\f]
+where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*, 
+*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*.
+
+After each round *key* is raised by summing with another pair of const values:
+\f[
+L += 0x9E3779B9 \\
+R += 0xBB67AE85
+\f]
+Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers.
+
+Float values between [0..1) are obtained from 32-bit integers by the following rules.
+
+Float16 is formatted as follows: *sign*(1 bit) *exponent*(5 bits) *mantissa*(10 bits). The value is interpreted 
+using following formula:
+\f[
+(-1)^{sign} * 1, mantissa * 2 ^{exponent - 15}
+\f]
+
+so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows:
+``` 
+sign = 0
+exponent = 15 - representation of a zero exponent.
+mantissa = 10 right bits from generated uint32 random value.
+``` 
+
+So the resulting float16 value is:
+``` 
+x_uint16 = x // Truncate the upper 16 bits.
+val = ((exponent << 10) | x_uint16 & 0x3ffu) - 1.0,
+```
+where x is uint32 generated random value.
+
+Float32 is formatted as follows: *sign*(1 bit) *exponent*(8 bits) *mantissa*(23 bits). The value is interpreted 
+using following formula:
+\f[
+(-1)^{sign} * 1, mantissa * 2 ^{exponent - 127}
+\f]
+
+so to obtain float values *sign*, *exponent* and *mantissa* are set as follows:
+``` 
+sign = 0
+exponent = 127 - representation of a zero exponent.
+mantissa = 23 right bits from generated uint32 random value.
+``` 
+
+So the resulting float value is:
+``` 
+val = ((exponent << 23) | x & 0x7fffffu) - 1.0,
+```
+where x is uint32 generated random value.
+
+Double is formatted as follows: *sign*(1 bit) *exponent*(11 bits) *mantissa*(52 bits). The value is interpreted 
+using following formula:
+\f[
+(-1)^{sign} * 1, mantissa * 2 ^{exponent - 1023}
+\f]
+
+so to obtain double values *sign*, *exponent* and *mantissa* are set as follows:
+``` 
+sign = 0
+exponent = 1023 - representation of a zero exponent.
+mantissa = 52 right bits from two concatinated uint32 values from random integer generator.
+``` 
+
+So the resulting double is obtained as follows:
+``` 
+mantissa_h = x0 & 0xfffffu;  // upper 20 bits of mantissa
+mantissa_l = x1;             // lower 32 bits of mantissa
+mantissa = (mantissa_h << 32) | mantissa_l;
+val = ((exponent << 52) | mantissa) - 1.0,
+```
+where x0, x1 are uint32 generated random values.
+
+To obtain a value in a specified range each value is processed with the following formulas:
+
+For float values:
+\f[
+result = x * (maxval - minval) + minval,
+\f]
+where *x* is random float or double value between [0..1).
+
+For integer values:
+\f[
+result = x \mod (maxval - minval) + minval,
+\f]
+where *x* is uint32 random value.
+
+
+Example 1. *RandomUniform* output with `seed` = 150, `seed2` = 10, `output_type` = f32:
+
+``` 
+input_shape    = [ 3, 3 ]
+output  = [[0.7011236  0.30539632 0.93931055]
+          [0.9456035   0.11694777 0.50770056]
+          [0.5197197   0.22727466 0.991374  ]]
+```
+
+Example 2. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = double:
+
+``` 
+input_shape    = [ 2, 2 ]
+
+minval = 2
+
+maxval = 10
+
+output  = [[5.65927959 4.23122376]
+          [2.67008206 2.36423758]]
+```
+
+Example 3. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = i32:
+
+``` 
+input_shape    = [ 2, 3 ]
+
+minval = 50
+
+maxval = 100
+
+output  = [[65 70 56]
+          [59 82 92]]
+```
+
+**Attributes**:
+
+* *output_type*
+
+    * **Description**: the type of the output. Determines generation algorithm and affects resulting values. 
+      Output numbers generated for different values of *output_type* may not be equal.
+    * **Range of values**: "i32", "i64", "f16", "bf16", "f32", "f64".
+    * **Type**: string
+    * **Required**: *Yes*
+
+* *seed*
+
+    * **Description**: global seed value.
+    * **Range of values**: positive integers
+    * **Type**: `int`
+    * **Required**: *Yes*
+
+* *seed2*
+
+    * **Description**: operational seed value.
+    * **Range of values**: positive integers
+    * **Type**: `int`
+    * **Required**: *Yes*
+
+**Inputs**:
+
+*   **1**: `shape` - 1D tensor of type *T_SHAPE* describing output shape. **Required.**
+
+*   **2**: `minval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, 
+    defines the lower bound on the range of random values to generate (inclusive). **Required.**
+
+*   **3**: `maxval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, 
+    defines the upper bound on the range of random values to generate (exclusive). **Required.**
+
+
+**Outputs**:
+
+* **1**: A tensor with type specified by the attribute *output_type* and shape defined by `shape` input tensor.
+
+**Types**
+
+* *T_SHAPE*: `int32` or `int64`.
+
+*Example 1: IR example.*
+
+```xml
+<layer ... name="RandomUniform" type="RandomUniform">
+    <data output_type="f32" seed="234" seed2="148"/>
+    <input>
+        <port id="0" precision="I32">  <!-- shape value: [2, 3, 10] -->
+            <dim>3</dim>
+        </port>
+        <port id="1" precision="FP32"/> <!-- min value -->
+        <port id="2" precision="FP32"/> <!-- max value -->
+    </input>
+    <output>
+        <port id="3" precision="FP32" names="RandomUniform:0">
+            <dim>2</dim>
+            <dim>3</dim>
+            <dim>10</dim>
+        </port>
+    </output>
+</layer>
+```
--- a/docs/ops/opset8.md
+++ b/docs/ops/opset8.md
@ -115,6 +115,7 @@ declared in `namespace opset8`.
 * [PriorBox](detection/PriorBox_1.md)
 * [Proposal](detection/Proposal_4.md)
 * [PSROIPooling](detection/PSROIPooling_1.md)
+* [RandomUniform](generation/RandomUniform_8.md)
 * [Range](generation/Range_4.md)
 * [ReLU](activation/ReLU_1.md)
 * [ReadValue](infrastructure/ReadValue_3.md)
--- a/docs/template_plugin/tests/functional/op_reference/acosh.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/acosh.cpp
@ -0,0 +1,81 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <vector>
+
+#include "base_reference_test.hpp"
+
+using namespace ngraph;
+
+namespace reference_tests {
+namespace {
+
+struct AcoshParams {
+    Tensor input;
+    Tensor expected;
+};
+
+struct Builder : ParamsBuilder<AcoshParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, input);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected);
+};
+
+class ReferenceAcoshLayerTest : public testing::TestWithParam<AcoshParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.input.shape, params.input.type);
+        inputData = {params.input.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<AcoshParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "shape=" << param.input.shape << "_";
+        result << "type=" << param.input.type;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const Shape& shape, const element::Type& type) {
+        const auto in = std::make_shared<op::Parameter>(type, shape);
+        const auto acosh = std::make_shared<op::Acosh>(in);
+        return std::make_shared<Function>(NodeVector {acosh}, ParameterVector {in});
+    }
+};
+
+TEST_P(ReferenceAcoshLayerTest, AcoshWithHardcodedRefs) {
+    Exec();
+}
+
+}  // namespace
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Acosh_With_Hardcoded_Refs, ReferenceAcoshLayerTest,
+    ::testing::Values(Builder {}
+                          .input({{8}, element::f16, std::vector<ngraph::float16> {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}})
+                          .expected({{8}, element::f16, std::vector<ngraph::float16> {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}),
+                      Builder {}
+                          .input({{8}, element::f32, std::vector<float> {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}})
+                          .expected({{8}, element::f32, std::vector<float> {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}),
+                      Builder {}
+                          .input({{8}, element::i32, std::vector<int32_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
+                          .expected({{8}, element::i32, std::vector<int32_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
+                      Builder {}
+                          .input({{8}, element::i64, std::vector<int64_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
+                          .expected({{8}, element::i64, std::vector<int64_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
+                      Builder {}
+                          .input({{8}, element::u32, std::vector<uint32_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
+                          .expected({{8}, element::u32, std::vector<uint32_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
+                      Builder {}
+                          .input({{8}, element::u64, std::vector<uint64_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
+                          .expected({{8}, element::u64, std::vector<uint64_t> {0, 1, 2, 2, 2, 3, 5, 8}})),
+    ReferenceAcoshLayerTest::getTestCaseName);
+}  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp
@ -9,6 +9,8 @@

 using namespace InferenceEngine;

+namespace reference_tests {
+
 CommonReferenceTest::CommonReferenceTest(): targetDevice("TEMPLATE") {
    core = PluginCache::get().ie(targetDevice);
 }
@ -171,3 +173,5 @@ void CommonReferenceTest::ValidateBlobs(const InferenceEngine::Blob::Ptr& refBlo
        FAIL() << "Comparator for " << precision << " precision isn't supported";
    }
 }
+
+}  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp
+++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp
@ -5,8 +5,12 @@
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>

+namespace reference_tests {
+
 class CommonReferenceTest {
 public:
    CommonReferenceTest();
@ -51,3 +55,55 @@ InferenceEngine::Blob::Ptr CreateBlob(const ngraph::element::Type& element_type,
    return blob;
 }

+///
+/// Class which should help to build data for single input
+///
+struct Tensor {
+    Tensor() = default;
+
+    Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const InferenceEngine::Blob::Ptr& data): shape {shape}, type {type}, data {data} {}
+
+    template <typename T>
+    Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const std::vector<T>& data_elements)
+        : Tensor {shape, type, CreateBlob(type, data_elements)} {}
+
+    ngraph::Shape shape;
+    ngraph::element::Type type;
+    InferenceEngine::Blob::Ptr data;
+};
+
+///
+/// Class which should helps build test parameters.
+///
+/// e.g.:
+/// struct Params {
+///     Tensor i,o;
+///     int mul;
+/// };
+/// struct TestParamsBuilder : ParamsBuilder<Params>
+///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, i);
+///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, o);
+///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, mul);
+/// };
+///
+/// const Params p = TestParamsBuilder{}
+///                  .i(Tensor{{0}, i32, {1}})
+///                  .o(Tensor{{0}, i32, {1}})
+///                  .mul(10);
+template <typename Params>
+class ParamsBuilder {
+protected:
+    Params params;
+
+public:
+    operator Params() const {
+        return params;
+    }
+};
+#define REFERENCE_TESTS_ADD_SET_PARAM(builder_type, param_to_set) \
+    builder_type& param_to_set(decltype(params.param_to_set) t) { \
+        params.param_to_set = std::move(t);                       \
+        return *this;                                             \
+    }
+
+}  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/convert.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp
@ -12,6 +12,7 @@

 #include "base_reference_test.hpp"

+using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;

--- a/docs/template_plugin/tests/functional/op_reference/grn.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/grn.cpp
@ -12,21 +12,22 @@

 #include "base_reference_test.hpp"

+using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;

 namespace {
 struct GrnParams {
    template <class IT>
-    GrnParams(const float bias, const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues,
+    GrnParams(const float bias, const PartialShape& shape, const element::Type& iType, const std::vector<IT>& iValues,
              const std::vector<IT>& oValues)
        : bias(bias), pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {}
    float bias;
-    ngraph::PartialShape pshape;
-    ngraph::element::Type inType;
-    ngraph::element::Type outType;
-    InferenceEngine::Blob::Ptr inputData;
-    InferenceEngine::Blob::Ptr refData;
+    PartialShape pshape;
+    element::Type inType;
+    element::Type outType;
+    Blob::Ptr inputData;
+    Blob::Ptr refData;
 };

 class ReferenceGrnLayerTest : public testing::TestWithParam<GrnParams>, public CommonReferenceTest {
@ -60,21 +61,21 @@ TEST_P(ReferenceGrnLayerTest, CompareWithHardcodedRefs) {
 }

 template <element::Type_t IN_ET>
-std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
+std::vector<GrnParams> generateGrnParams(const element::Type& type) {
    using T = typename element_type_traits<IN_ET>::value_type;
    std::vector<GrnParams> grnParams {
        // bias 1e-6 // 2D // 3D // 4D
-        GrnParams(1e-6, ngraph::PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(1e-6, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                  std::vector<T> {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}),
-        GrnParams(1e-6, ngraph::PartialShape {2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784,
                                  0.433574,  0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}),
-        GrnParams(1e-6, ngraph::PartialShape {1, 2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
                                  0.997055,  0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}),
-        GrnParams(1e-6, ngraph::PartialShape {2, 2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type,
                  std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                  std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
@ -82,17 +83,17 @@ std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
                                  0.559857,  0.564684, 0.56921,  0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6,
                                  0.828589,  0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}),
        // bias 100.25 // 2D // 3D // 4D
-        GrnParams(100.25, ngraph::PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(100.25, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                  std::vector<T> {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}),
-        GrnParams(100.25, ngraph::PartialShape {2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641,
                                  0.41125,   0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503,  0.547945, 0.664327, 0.662047, 0.659774, 0.657534}),
-        GrnParams(100.25, ngraph::PartialShape {1, 2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {1, 2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
                                  0.790789,  0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}),
-        GrnParams(100.25, ngraph::PartialShape {2, 2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {2, 2, 3, 4}, type,
                  std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                  std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
@ -103,9 +104,9 @@ std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
 }

 std::vector<GrnParams> generateGrnCombinedParams() {
-    const std::vector<std::vector<GrnParams>> grnTypeParams {generateGrnParams<element::Type_t::bf16>(ngraph::element::bf16),
-                                                             generateGrnParams<element::Type_t::f16>(ngraph::element::f16),
-                                                             generateGrnParams<element::Type_t::f32>(ngraph::element::f32)};
+    const std::vector<std::vector<GrnParams>> grnTypeParams {generateGrnParams<element::Type_t::bf16>(element::bf16),
+                                                             generateGrnParams<element::Type_t::f16>(element::f16),
+                                                             generateGrnParams<element::Type_t::f32>(element::f32)};
    std::vector<GrnParams> combinedParams;
    std::for_each(grnTypeParams.begin(), grnTypeParams.end(), [&](std::vector<GrnParams> params) {
        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
--- a/docs/template_plugin/tests/functional/op_reference/mvn.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/mvn.cpp
@ -0,0 +1,254 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <tuple>
+
+#include "base_reference_test.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace reference_tests;
+
+// ------------------------------ V0 ------------------------------
+
+struct MVN1Params {
+    MVN1Params(const Tensor& paramInput, const ngraph::AxisSet& paramReductionAxes, const bool paramAcrossChannels, const bool paramNormalizeVariance,
+               const double paramEps, const Tensor& paramExpected)
+        : input(paramInput),
+          reductionAxes(paramReductionAxes),
+          acrossChannels(paramAcrossChannels),
+          normalizeVariance(paramNormalizeVariance),
+          eps(paramEps),
+          expected(paramExpected) {}
+    Tensor input;
+    ngraph::AxisSet reductionAxes;
+    bool acrossChannels;
+    bool normalizeVariance;
+    double eps;
+    Tensor expected;
+};
+
+class ReferenceMVN1LayerTest : public testing::TestWithParam<MVN1Params>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.input, params.reductionAxes, params.acrossChannels, params.normalizeVariance, params.eps);
+        inputData = {params.input.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<MVN1Params>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "shape=" << param.input.shape;
+        result << "_iType=" << param.input.type;
+        if (!param.reductionAxes.empty()) {
+            result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.to_vector());
+        } else {
+            result << "_acrossChannels=" << (param.acrossChannels ? "TRUE" : "FALSE");
+        }
+        result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE");
+        result << "_eps=" << param.eps;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const Tensor& input, const ngraph::AxisSet& reductionAxes, const bool acrossChannels,
+                                                    const bool normalizeVariance, const double eps) {
+        const auto in = std::make_shared<op::Parameter>(input.type, input.shape);
+        auto mvn = std::make_shared<op::MVN>(in, acrossChannels, normalizeVariance, eps);
+        if (!reductionAxes.empty()) {
+            mvn = std::make_shared<op::MVN>(in, reductionAxes, normalizeVariance, eps);
+        }
+        return std::make_shared<Function>(NodeVector {mvn}, ParameterVector {in});
+    }
+};
+
+TEST_P(ReferenceMVN1LayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+const ngraph::AxisSet emptyReductionAxes {};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_MVN1_With_Hardcoded_Refs, ReferenceMVN1LayerTest,
+    ::testing::Values(
+        // across_channels=false, variance=false
+        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   emptyReductionAxes,
+                   false,
+                   false,
+                   1e-9,
+                   Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {-4, -3, -2, -1, 0,  1,  2,  3,  4, -4, -3, -2, -1, 0,
+                                                                                   1,  2,  3,  4,  -4, -3, -2, -1, 0, 1,  2,  3,  4}}),
+        // across_channels=true, variance=false
+        MVN1Params(
+            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}},
+            emptyReductionAxes,
+            true,
+            false,
+            1e-9,
+            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}),
+        // across_channels=false, variance=true
+        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   emptyReductionAxes,
+                   false,
+                   true,
+                   1e-9,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
+        // across_channels=true, variance=true
+        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   emptyReductionAxes,
+                   true,
+                   true,
+                   1e-9,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
+        // reductionAxes, variance=false
+        MVN1Params(
+            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}},
+            {1, 2, 3},
+            false,
+            false,
+            1e-9,
+            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}),
+        // reductionAxes, variance=true
+        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   {2, 3},
+                   false,
+                   true,
+                   1e-9,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})),
+    ReferenceMVN1LayerTest::getTestCaseName);
+
+// ------------------------------ V6 ------------------------------
+
+struct MVN6Params {
+    MVN6Params(const Tensor& paramInput, const Tensor& paramReductionAxes, const bool paramNormalizeVariance, const double paramEps,
+               const ngraph::op::MVNEpsMode mode, const Tensor& paramExpected)
+        : input(paramInput),
+          reductionAxes(paramReductionAxes),
+          normalizeVariance(paramNormalizeVariance),
+          eps(paramEps),
+          epsMode(mode),
+          expected(paramExpected) {}
+    Tensor input;
+    Tensor reductionAxes;
+    bool normalizeVariance;
+    double eps;
+    ngraph::op::MVNEpsMode epsMode;
+    Tensor expected;
+};
+
+class ReferenceMVN6LayerTest : public testing::TestWithParam<MVN6Params>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.input, params.reductionAxes, params.normalizeVariance, params.eps, params.epsMode);
+        inputData = {params.input.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<MVN6Params>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "shape=" << param.input.shape;
+        result << "_iType=" << param.input.type;
+        result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.shape);
+        result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE");
+        result << "_eps=" << param.eps;
+        result << "_eps_mode=" << param.epsMode;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const Tensor& input, const Tensor& reductionAxes, const bool normalizeVariance, const double eps,
+                                                    const ngraph::op::MVNEpsMode epsMode) {
+        std::vector<int64_t> dataVector(reductionAxes.shape[0]);
+        const auto in = std::make_shared<op::Parameter>(input.type, input.shape);
+        auto mRef = as<InferenceEngine::MemoryBlob>(reductionAxes.data);
+        IE_ASSERT(mRef);
+        const auto refLockMemory = mRef->rmap();
+        const auto refBuffer = refLockMemory.as<const std::uint64_t*>();
+        for (size_t i = 0; i < dataVector.size(); ++i) {
+            dataVector[i] = refBuffer[i];
+        }
+        const auto axes = std::make_shared<op::Constant>(reductionAxes.type, reductionAxes.shape, dataVector);
+        auto mvn = std::make_shared<op::v6::MVN>(in, axes, normalizeVariance, eps, epsMode);
+        return std::make_shared<Function>(NodeVector {mvn}, ParameterVector {in});
+    }
+};
+
+TEST_P(ReferenceMVN6LayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_MVN6_With_Hardcoded_Refs, ReferenceMVN6LayerTest,
+    ::testing::Values(
+        // variance=false, OUTSIDE_SQRT
+        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
+                   false,
+                   1e-9,
+                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
+                   Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {-4, -3, -2, -1, 0,  1,  2,  3,  4, -4, -3, -2, -1, 0,
+                                                                                   1,  2,  3,  4,  -4, -3, -2, -1, 0, 1,  2,  3,  4}}),
+        // variance=true, OUTSIDE_SQRT
+        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
+                   true,
+                   1e-9,
+                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
+        // variance=true, INSIDE_SQRT
+        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
+                   true,
+                   1e-9,
+                   ngraph::op::MVNEpsMode::INSIDE_SQRT,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
+        // variance=true, another reductionAxes, OUTSIDE_SQRT
+        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
+                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9})},
+                   Tensor {Shape {3}, ngraph::element::i64, std::vector<int64_t>({1, 2, 3})},
+                   true,
+                   1e-9,
+                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
+                   Tensor {{1, 3, 3, 3},
+                           ngraph::element::f32,
+                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
+                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})),
+    ReferenceMVN6LayerTest::getTestCaseName);
--- a/docs/template_plugin/tests/functional/op_reference/select.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/select.cpp
@ -12,6 +12,7 @@

 #include "base_reference_test.hpp"

+using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;

@ -33,12 +34,12 @@ struct SelectParams {
    element::Type data_type;
    op::AutoBroadcastSpec broadcast;
    PartialShape select_input_pshape;
-    InferenceEngine::Blob::Ptr select_input;
+    Blob::Ptr select_input;
    PartialShape if_input_pshape;
-    InferenceEngine::Blob::Ptr if_input;
+    Blob::Ptr if_input;
    PartialShape else_input_pshape;
-    InferenceEngine::Blob::Ptr else_input;
-    InferenceEngine::Blob::Ptr expected_output;
+    Blob::Ptr else_input;
+    Blob::Ptr expected_output;
 };

 class ReferenceSelectLayerTest : public testing::TestWithParam<SelectParams>, public CommonReferenceTest {
--- a/docs/template_plugin/tests/functional/op_reference/sign.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/sign.cpp
@ -12,19 +12,20 @@

 #include "base_reference_test.hpp"

+using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;

 struct SignParams {
    template <class IT, class OT>
-    SignParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const ngraph::element::Type& oType, const std::vector<IT>& iValues,
+    SignParams(const PartialShape& shape, const element::Type& iType, const element::Type& oType, const std::vector<IT>& iValues,
                  const std::vector<OT>& oValues)
        : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(oType, oValues)) {}
-    ngraph::PartialShape pshape;
-    ngraph::element::Type inType;
-    ngraph::element::Type outType;
-    InferenceEngine::Blob::Ptr inputData;
-    InferenceEngine::Blob::Ptr refData;
+    PartialShape pshape;
+    element::Type inType;
+    element::Type outType;
+    Blob::Ptr inputData;
+    Blob::Ptr refData;
 };

 class ReferenceSignLayerTest : public testing::TestWithParam<SignParams>, public CommonReferenceTest {
@ -59,22 +60,22 @@ TEST_P(ReferenceSignLayerTest, CompareWithHardcodedRefs) {
 INSTANTIATE_TEST_SUITE_P(
    smoke_Sign_With_Hardcoded_Refs, ReferenceSignLayerTest,
    ::testing::Values(
-        SignParams(ngraph::PartialShape {6}, ngraph::element::f32, ngraph::element::f32,
+        SignParams(PartialShape {6}, element::f32, element::f32,
                      std::vector<float> {1, -2, 0, -4.8f, 4.8f, -0.0f},
                      std::vector<float> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::f16, ngraph::element::f16,
+        SignParams(PartialShape {6}, element::f16, element::f16,
                      std::vector<float16> {1, -2, 0, -4.8f, 4.8f, -0.0f},
                      std::vector<float16> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::u64, ngraph::element::u64,
+        SignParams(PartialShape {6}, element::u64, element::u64,
                      std::vector<uint64_t> {1, 2, 0, 4, 4, 0},
                      std::vector<uint64_t> {1, 1, 0, 1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::u32, ngraph::element::u32,
+        SignParams(PartialShape {6}, element::u32, element::u32,
                      std::vector<uint32_t> {1, 2, 0, 4, 4, 0},
                      std::vector<uint32_t> {1, 1, 0, 1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::i32, ngraph::element::i32,
+        SignParams(PartialShape {6}, element::i32, element::i32,
                      std::vector<int32_t> {1, -2, 0, -4, 4, -0},
                      std::vector<int32_t> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::i64, ngraph::element::i64,
+        SignParams(PartialShape {6}, element::i64, element::i64,
                      std::vector<int64_t> {1, -2, 0, -4, 4, -0},
                      std::vector<int64_t> {1, -1, 0, -1, 1, 0})),
    ReferenceSignLayerTest::getTestCaseName);
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@ -6,14 +6,14 @@ include_guard(GLOBAL)

 set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
 set(VPU_SUPPORTED_FIRMWARES_HASH
-    "d55a824838accec31733e4d4c45e8774bdd5690da8beefe41360f1983476e3d0"
-    "61797a77b38fc677be4cc63d730e8871bbf169686b88eabb7066b01f9d156129")
+    "54a732b5fb17a0124652bc5113fa628c718a5af40621bca309471cb5ffd9271b"
+    "5750b2831c77ef54b8e243d3840c5ed1c9509681d55aee7e369d558cef628735")

 #
 # Default packages
 #

-set(FIRMWARE_PACKAGE_VERSION 1714)
+set(FIRMWARE_PACKAGE_VERSION 1717)
 set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")

 #
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@ -95,6 +95,7 @@ Options:
    -layout                     Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
    -cache_dir "<path>"         Optional. Enables caching of loaded models to specified directory.
    -load_from_file             Optional. Loads model from file directly without ReadNetwork.
+    -latency_percentile         Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).

  CPU-specific performance options:
    -nstreams "<integer>"       Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@ -56,6 +56,10 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
                                                "Also, using nstreams>1 is inherently throughput-oriented option, "
                                                "while for the best-latency estimations the number of streams should be set to 1.";

+/// @brief message for latency percentile settings
+static const char infer_latency_percentile_message[] =
+    "Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).";
+
 /// @brief message for enforcing of BF16 execution where it is possible
 static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced "
                                           "if supported by platform.\n"
@ -189,6 +193,9 @@ DEFINE_uint32(nthreads, 0, infer_num_threads_message);
 /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
 DEFINE_string(nstreams, "", infer_num_streams_message);

+/// @brief The percentile which will be reported in latency metric
+DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message);
+
 /// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability
 DEFINE_bool(enforcebf16, false, enforce_bf16_message);

@ -278,6 +285,7 @@ static void showUsage() {
    std::cout << "    -layout                   " << layout_message << std::endl;
    std::cout << "    -cache_dir \"<path>\"        " << cache_dir_message << std::endl;
    std::cout << "    -load_from_file           " << load_from_file_message << std::endl;
+    std::cout << "    -latency_percentile       " << infer_latency_percentile_message << std::endl;
    std::cout << std::endl << "  device-specific performance options:" << std::endl;
    std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -52,6 +52,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
        throw std::logic_error("Model is required but not set. Please set -m option.");
    }

+    if (FLAGS_latency_percentile > 100 || FLAGS_latency_percentile < 1) {
+        showUsage();
+        throw std::logic_error("The percentile value is incorrect. The applicable values range is [1, 100].");
+    }
    if (FLAGS_api != "async" && FLAGS_api != "sync") {
        throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value.");
    }
@ -100,11 +104,10 @@ static void next_step(const std::string additional_info = "") {
 }

 template <typename T>
-T getMedianValue(const std::vector<T>& vec) {
+T getMedianValue(const std::vector<T>& vec, std::size_t percentile) {
    std::vector<T> sortedVec(vec);
    std::sort(sortedVec.begin(), sortedVec.end());
-    return (sortedVec.size() % 2 != 0) ? sortedVec[sortedVec.size() / 2ULL]
-                                       : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+    return sortedVec[(sortedVec.size() / 100) * percentile];
 }

 /**
@ -624,7 +627,7 @@ int main(int argc, char* argv[]) {
        // wait the latest inference executions
        inferRequestsQueue.waitAll();

-        double latency = getMedianValue<double>(inferRequestsQueue.getLatencies());
+        double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
        double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
        double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;

@ -634,8 +637,14 @@ int main(int argc, char* argv[]) {
                                                                                         {"total number of iterations", std::to_string(iteration)},
                                                                                     });
            if (device_name.find("MULTI") == std::string::npos) {
+                std::string latency_label;
+                if (FLAGS_latency_percentile == 50) {
+                    latency_label = "latency (ms)";
+                } else {
+                    latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
+                }
                statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
-                                                                                             {"latency (ms)", double_to_string(latency)},
+                                                                                             {latency_label, double_to_string(latency)},
                                                                                         });
            }
            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}});
@ -684,8 +693,15 @@ int main(int argc, char* argv[]) {

        std::cout << "Count:      " << iteration << " iterations" << std::endl;
        std::cout << "Duration:   " << double_to_string(totalDuration) << " ms" << std::endl;
-        if (device_name.find("MULTI") == std::string::npos)
-            std::cout << "Latency:    " << double_to_string(latency) << " ms" << std::endl;
+        if (device_name.find("MULTI") == std::string::npos) {
+            std::cout << "Latency";
+            if (FLAGS_latency_percentile == 50) {
+                std::cout << ":    ";
+            } else {
+                std::cout << " (" << FLAGS_latency_percentile << " percentile):    ";
+            }
+            std::cout << double_to_string(latency) << " ms" << std::endl;
+        }
        std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl;
    } catch (const std::exception& ex) {
        slog::err << ex.what() << slog::endl;
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@ -12,7 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
    endif()
 endif()

-if(GPU_DEBUG_CONFIG)
+if(ENABLE_GPU_DEBUG_CAPS)
  add_definitions(-DGPU_DEBUG_CONFIG=1)
 endif()

--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@ -194,11 +194,11 @@ REGISTER_FACTORY(v5, LSTMSequence);
 //REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion
 REGISTER_FACTORY(v5, Round);
 REGISTER_FACTORY(v5, GatherND);
+REGISTER_FACTORY(v5, Loop);

 // ----------------------------- Unsupported v5 ops ----------------------------- //
 // REGISTER_FACTORY(v5, BatchNormInference);
 // REGISTER_FACTORY(v5, GRUSequence);
-// REGISTER_FACTORY(v5, Loop);
 // REGISTER_FACTORY(v5, RNNSequence);

 // ------------------------------ Supported v6 ops ------------------------------ //
--- a/inference-engine/src/cldnn_engine/ops/loop.cpp
+++ b/inference-engine/src/cldnn_engine/ops/loop.cpp
@ -0,0 +1,227 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "cldnn_engine.h"
+
+#include <cpp/ie_cnn_network.h>
+
+#include "ngraph/op/loop.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/util/sub_graph_base.hpp"
+#include "transformations/utils/utils.hpp"
+#include "ie_ngraph_utils.hpp"
+
+#include "cldnn/primitives/loop.hpp"
+#include "cldnn/primitives/mutable_data.hpp"
+#include "cldnn/primitives/data.hpp"
+#include "cldnn/primitives/reorder.hpp"
+#include "cldnn/graph/topology.hpp"
+
+#include <vector>
+#include <algorithm>
+
+using Loop = ngraph::op::v5::Loop;
+
+namespace CLDNNPlugin {
+
+template<class DATA_TYPE>
+static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
+    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
+    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
+    *ptr.begin() = num;
+    return {id, mem};
+}
+
+static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
+                                                        const cldnn::primitive_id& id, const cldnn::primitive_id& input,
+                                                        const int32_t output_idx) {
+    const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
+    const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
+    const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
+    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
+    auto mem = p.GetEngine().allocate_memory(output_layout);
+    auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
+    return md;
+}
+
+static void UpdateBackedge(std::vector<cldnn::loop::backedge_mapping>& back_edges,
+                            const cldnn::primitive_id& old_primitive_id, const cldnn::primitive_id& new_primitive_id) {
+    for (auto& back_edge : back_edges) {
+        if (back_edge.from == old_primitive_id) {
+            back_edge.from = new_primitive_id;
+        }
+    }
+}
+
+static std::string GetExternalInputName(const int64_t body_parameter_index,
+                                        const std::shared_ptr<Loop>& op) {
+    const auto& loop_input_descs = op->get_input_descriptions();
+    for (const auto& loop_input_desc : loop_input_descs) {
+        if (loop_input_desc->m_body_parameter_index == body_parameter_index) {
+            auto external_node = op->get_input_node_shared_ptr(loop_input_desc->m_input_index);
+            return layer_type_name_ID(external_node);
+        }
+    }
+    return {""};
+}
+
+void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
+    const std::string layerName = layer_type_name_ID(op);
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    const auto& loop_input_descs = op->get_input_descriptions();
+    const auto& loop_output_descs = op->get_output_descriptions();
+    const auto& body_inputs = op->get_function()->get_parameters();
+    const auto& body_outputs = op->get_function()->get_results();
+
+    InferenceEngine::CNNNetwork body_network(op->get_function());
+    auto networkInputs = body_network.getInputsInfo();
+    auto networkOutputs = body_network.getOutputsInfo();
+
+    // Set special body ports: current_iteration input , execution condition output
+    auto special_body_ports = op->get_special_body_ports();
+
+    std::string body_current_iteration_id;
+    if (special_body_ports.current_iteration_input_idx >= 0) {
+        auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx);
+        body_current_iteration_id = layer_type_name_ID(current_iteration_input);
+        std::string input_name = ngraph::op::util::create_ie_output_name(current_iteration_input);
+        const auto networkInput = networkInputs.at(input_name);
+        auto precision = InferenceEngine::details::convertPrecision(current_iteration_input->get_element_type());
+        networkInput->setPrecision(precision);
+    }
+
+    cldnn::primitive_id body_execution_condition_id;
+    if (special_body_ports.body_condition_output_idx >= 0) {
+        auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0);
+        body_execution_condition_id = layer_type_name_ID(body_condition_output);
+        std::string output_name = ngraph::op::util::create_ie_output_name(body_condition_output);
+        const auto networkOutput = networkOutputs.at(output_name);
+        networkOutput->setPrecision(InferenceEngine::Precision::I64);
+    }
+
+    // get body topology from ngraph function
+    Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true);
+    auto body_topology = *body_program.GetTopology();
+
+    // setup input_primitive_maps/ output_primitive_maps and back_edges
+    std::vector<cldnn::loop::io_primitive_map> input_primitive_maps;
+    std::vector<cldnn::loop::io_primitive_map> output_primitive_maps;
+    std::vector<cldnn::loop::backedge_mapping> back_edges;
+
+    // set input mapping & back edges
+    for (const auto& loop_input_desc : loop_input_descs) {
+        const cldnn::primitive_id& external_id = inputPrimitives.at(loop_input_desc->m_input_index);
+        auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index);
+        cldnn::primitive_id internal_id = layer_type_name_ID(body_input);
+
+        // set input mapping
+        if (const auto& sliceInfo =
+            std::dynamic_pointer_cast<Loop::SliceInputDescription>(loop_input_desc)) {
+            // sliced input
+            input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis,
+                sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride);
+        } else {
+            // input without slicing
+            input_primitive_maps.emplace_back(external_id, internal_id);
+        }
+
+        // set back edges
+        if (const auto& mergedInput =
+            std::dynamic_pointer_cast<Loop::MergedInputDescription>(loop_input_desc)) {
+            // backedge
+            const auto& to = body_inputs.at(mergedInput->m_body_parameter_index);
+            const auto& from = body_outputs.at(mergedInput->m_body_value_index);
+
+            cldnn::primitive_id to_id = layer_type_name_ID(to);
+            cldnn::primitive_id from_id = layer_type_name_ID(from);
+
+            // reset output data type because the data types of the outputs of the
+            // body topology are always FP32 regardless of ngraph data type
+            {
+                const auto from_prim = body_topology.at(from_id);
+                const auto& to_ngraph_type = to->get_element_type();
+                const auto to_cldnn_type = DataTypeFromPrecision(to_ngraph_type);
+                from_prim->output_data_type = to_cldnn_type;
+            }
+            back_edges.emplace_back(from_id, to_id);
+        }
+    }
+
+    // set trip count, initial execution condition, num iteration primitives
+    // they should be mutable_data to prevent from being optimized out
+    const cldnn::primitive_id trip_count_id = layer_type_name_ID(op->get_input_node_shared_ptr(0));
+    const cldnn::primitive_id execution_condition_id = layer_type_name_ID(op->get_input_node_shared_ptr(1));
+    const int64_t num_iterations = op->get_num_iterations();
+    if (num_iterations < 0) {
+        IE_THROW() << "loop's num_iteration cannot be negative";
+    }
+    const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
+    {
+        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
+        p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
+        p.primitiveIDs[num_iteration_id] = num_iteration_id;
+        p.AddPrimitive(num_iteration);
+        p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
+    }
+
+    // set output mapping
+    for (const auto& loop_output_desc : loop_output_descs) {
+        const uint64_t output_idx = loop_output_desc->m_output_index;
+
+        // Add additional mutable_data for multiple outputs
+        // primitive ID should be <TI primitive ID>.<output_idx> if output_idx > 0
+        // otherwise primitive ID should be equals to TI primitive ID
+        const std::string layerNameWithIndex = layerName + "." + std::to_string(output_idx);
+        std::string external_id;
+        if (output_idx > 0) {
+            cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx);
+            p.AddPrimitive(output_data);
+            p.AddInnerPrimitiveToProfiler(layerNameWithIndex, layerName, op);
+            p.primitiveIDs[layerNameWithIndex] = layerNameWithIndex;
+            external_id = layerNameWithIndex;
+        } else {
+            p.primitiveIDs[layerNameWithIndex] = layerName;
+            p.primitiveIDs[layerName] = layerName;
+            external_id = layerName;
+        }
+        const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index);
+        cldnn::primitive_id internal_id = layer_type_name_ID(body_output);
+
+        // update primitive_map
+        if (const auto& concatOutput =
+            std::dynamic_pointer_cast<Loop::ConcatOutputDescription>(loop_output_desc)) {
+            // output which requires concatenation
+            output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis,
+                concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride);
+        }
+        if (std::dynamic_pointer_cast<Loop::BodyOutputDescription>(loop_output_desc)) {
+            // output which requires no concatenation
+            output_primitive_maps.emplace_back(external_id, internal_id);
+        }
+    }
+
+    const cldnn::loop loopPrimitive(
+        layerName,              /* layer name of this primitive (output id) */
+        inputPrimitives,        /* inputs of this layer */
+        body_topology,          /* body network */
+        trip_count_id,          /* trip_count data in outer network, always same as num_iterations in TI */
+        execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */
+        num_iteration_id,       /* actual number of iteration data in body network */
+        input_primitive_maps,         /* input mappings connecting outer network and inner network */
+        output_primitive_maps,        /* output mappings connecting outer network and inner network */
+        back_edges,             /* back edge mapping */
+        num_iterations,         /* max iteration, i.e. length of iteration axis */
+        body_current_iteration_id,
+        body_execution_condition_id);
+
+    p.AddPrimitive(loopPrimitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v5, Loop);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
@ -25,6 +25,7 @@ public:
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
    static bool canBeTransformedToGroupConvolution(const std::shared_ptr<const Node>& layer) noexcept;
+    static bool isDynamicOrScalar(const std::shared_ptr<const Node>& node);

    void setGroupSize(const size_t groupSize);
    size_t getGroupSize() const;
--- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@ -72,7 +72,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
                         NetworkHelper::getDequantization(reshapeFromWeights);
        if (dequantization.empty()) {
            const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
-            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+            auto constantShape = fqOnWeights->input(1).get_partial_shape();
+            if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) {
+                return false;
+            }
+
+            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(
+                fqOnWeights,
+                false,
+                (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul);
            if (reshapeFromWeights != nullptr) {
                resultConstant = fold_reshape<opset1::Reshape>(
                        resultConstant,
--- a/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp
@ -42,7 +42,15 @@ bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, n
        return false;
    }

-    const auto resultConstant = NetworkHelper::fold_fake_quantize(fakeQuantize, false);
+    const auto constantShape = fakeQuantize->input(1).get_partial_shape();
+    if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) {
+        return false;
+    }
+
+    std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(
+        fakeQuantize,
+        false,
+        (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul);
    if (is_type<opset1::Constant>(resultConstant)) {
        replace_node(fakeQuantize, resultConstant);
        return true;
--- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
@ -47,6 +47,9 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
    }

    auto dequantization = NetworkHelper::getDequantization(multiply, inputIndex);
+    if (dequantization.data.get_node() == nullptr) {
+        return false;
+    }
    if (dequantization.subtractConvert != nullptr) {
        dequantization = NetworkHelper::foldDequantization(multiply, inputIndex);
    }
@ -176,12 +179,6 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
        return false;
    }

-    const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex);
-
-    if (dequantization.empty()) {
-        return false;
-    }
-
    for (size_t i = 2; i < constShape.size(); ++i) {
        if (constShape[i] != 1) {
            return false;
@ -189,9 +186,13 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
    }

    if (updatePrecisions && restrictions.size() > 0) {
-        const element::Type parentPrecision = dequantization.data.get_element_type();
-
        const auto& availablePreisions = restrictions[0].second;
+        if (availablePreisions.empty()) {
+            return false;
+        }
+
+        const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex);
+        const element::Type parentPrecision = dequantization.data.get_element_type();
        if (std::find(availablePreisions.begin(), availablePreisions.end(), parentPrecision) == availablePreisions.end()) {
            return false;
        }
@ -221,6 +222,35 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolutio
    return (pShape.rank().get_length() == 4ul) || (pShape.rank().get_length() == 5ul);
 }

+bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr<const Node>& node) {
+    auto getConstantIndex = [](const std::shared_ptr<const Node>& node) -> int {
+        if (is_type<opset1::Constant>(node->get_input_node_shared_ptr(1))) {
+            return 1;
+        }
+        if (is_type<opset1::Constant>(node->get_input_node_shared_ptr(0))) {
+            return 0;
+        }
+        return -1;
+    };
+
+    const int constantIndex = getConstantIndex(node);
+    if (constantIndex == -1) {
+        return false;
+    }
+
+    const Input<const Node> constantInput = node->input(constantIndex);
+    const auto shape = constantInput.get_partial_shape();
+    if (shape.is_dynamic() || shape.rank().is_dynamic()) {
+        return true;
+    }
+
+    if (std::all_of(shape.begin(), shape.end(), [](const Dimension& dimension) { return dimension == 1ul; })) {
+        return true;
+    }
+
+    return false;
+}
+
 void MultiplyToGroupConvolutionTransformation::setGroupSize(const size_t groupSize) {
    this->groupSize = groupSize;
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -357,6 +357,9 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>([](const_node_ptr& node) -> bool {
            return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
        });
+        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>([](const_node_ptr& node) -> bool {
+            return MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node);
+        });
        lptManager.run_passes(nGraphFunc);
    }

--- a/inference-engine/src/vpu/CMakeLists.txt
+++ b/inference-engine/src/vpu/CMakeLists.txt
@ -20,6 +20,9 @@ if(ENABLE_MYRIAD)
        install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/custom_kernels/
                DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels
                COMPONENT myriad)
+        install(DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vpu_custom_kernels/
+                DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels
+                COMPONENT myriad)
        install(DIRECTORY ${VPU_CLC_MA2X8X_ROOT}/
                DESTINATION deployment_tools/tools/cl_compiler
                COMPONENT myriad
--- a/inference-engine/src/vpu/graph_transformer/src/stages/mvn.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/mvn.cpp
@ -48,10 +48,12 @@ private:
    void serializeParamsImpl(BlobSerializer& serializer) const override {
        auto normalize = attrs().get<int>("normalize");
        auto across_channels = attrs().get<int>("across_channels");
+        auto across_width = attrs().get<int>("across_width");
        auto eps = attrs().get<float>("eps");

        serializer.append(static_cast<int32_t>(normalize));
        serializer.append(static_cast<int32_t>(across_channels));
+        serializer.append(static_cast<int32_t>(across_width));
        serializer.append(static_cast<float>(eps));
    }

@ -88,11 +90,13 @@ void FrontEnd::parseMVN(const Model& model, const ie::CNNLayerPtr& layer, const
    for (int i = 0; i < indicesSize; i++) {
        axes.insert(getDimFromAxis(ndims, indicesPtr[i]));
    }
+    const auto width = axes.count(Dim::W);

-    VPU_THROW_UNLESS(!axes.count(Dim::N) && axes.count(Dim::H) && axes.count(Dim::W),
+    VPU_THROW_UNLESS(!axes.count(Dim::N) && width,
                     "Unsupported combination of indices in layer \"%s\". "
-                     "Only across channel and full batch supported.", layer->name);
+                     "Only across channel, width and full batch supported.", layer->name);
    const auto acrossChannels = axes.count(Dim::C) != 0;
+    const auto acrossWidth = width == 1 && axes.count(Dim::H) == 0;

    const auto normVariance = layer->GetParamAsBool("normalize_variance");
    const auto eps = layer->GetParamAsFloat("eps");
@ -104,6 +108,7 @@ void FrontEnd::parseMVN(const Model& model, const ie::CNNLayerPtr& layer, const
    auto stage = model->addNewStage<MVNStage>(layer->name, StageType::MVN, layer, inputs, outputs);
    stage->attrs().set<int>("normalize", normVariance);
    stage->attrs().set<int>("across_channels", acrossChannels);
+    stage->attrs().set<int>("across_width", acrossWidth);
    stage->attrs().set<float>("eps", eps);
 }

--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
@ -24,6 +24,13 @@ using namespace testing;
 using namespace ngraph;
 using namespace ngraph::pass;

+using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
+using callback_function_type = std::function<bool(const_node_ptr&)>;
+
+bool empty_callback(const std::shared_ptr<const ngraph::Node>& node) {
+    return false;
+}
+
 class ConvolutionBackpropDataTransformationTestValues {
 public:
    class Actual {
@ -33,26 +40,31 @@ public:
        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
        builder::subgraph::DequantizationOperations dequantizationOnWeights;
        std::shared_ptr<ngraph::opset1::Constant> weights;
+        callback_function_type callback;

        Actual() = default;
        Actual(
            const ngraph::element::Type& precisionBeforeDequantization,
            const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
            const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
-            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+            const std::shared_ptr<ngraph::opset1::Constant>& weights,
+            const callback_function_type& callback = empty_callback) :
                precisionBeforeDequantization(precisionBeforeDequantization),
                dequantizationOnActivations(dequantizationOnActivations),
                fakeQuantizeOnWeights(fakeQuantizeOnWeights),
-                weights(weights) {}
+                weights(weights),
+                callback(callback) {}
        Actual(
            const  ngraph::element::Type& precisionBeforeDequantization,
            const  ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
            const  builder::subgraph::DequantizationOperations& dequantizationOnWeights,
-            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+            const std::shared_ptr<ngraph::opset1::Constant>& weights,
+            const callback_function_type& callback = empty_callback) :
            precisionBeforeDequantization(precisionBeforeDequantization),
            dequantizationOnActivations(dequantizationOnActivations),
            dequantizationOnWeights(dequantizationOnWeights),
-            weights(weights) {}
+            weights(weights),
+            callback(callback) {}
    };

    class Expected {
@ -124,10 +136,11 @@ public:
                actualWeights);

        SimpleLowPrecisionTransformer transform;
-        transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
+        transform.add<low_precision::ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(testValues.params);
+        transform.get_pass_config()->set_callback<low_precision::ConvolutionBackpropDataTransformation>(testValues.actual.callback);
        transform.transform(actualFunction);

-        std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
+        std::shared_ptr<Node> refWeights = low_precision::fold<opset1::Broadcast>(
                testValues.expected.weights,
                opset1::Constant::create(
                        element::i64,
@ -179,7 +192,7 @@ public:

 TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
    actualFunction->validate_nodes_and_infer_types();
-    auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
+    auto res = compare_functions(referenceFunction, actualFunction, true, true, false);
    ASSERT_TRUE(res.first) << res.second;
 }

@ -455,6 +468,27 @@ const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues =
            true
        }
    },
+    //  issue #59593: subtract on activations, non-asymmetric
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.01f}},
+            { 255ul, Shape({ 1, 2, 1, 1 }), { 0.f }, { 254.f }, { 0.f }, { 25.4f } },
+            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            low_precision::LayerTransformation::isAsymmetricQuantization
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128.f}, {0.01f}},
+            {},
+            {},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            false // weights are not folded because of callback returning true
+        }
+    },
 };

 INSTANTIATE_TEST_SUITE_P(
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
@ -71,7 +71,7 @@ public:
        SimpleLowPrecisionTransformer transform;
        transform.add<ngraph::pass::low_precision::ConvolutionTransformation, ngraph::opset1::Convolution>(testValues.params);
        if (testValues.params.supportAsymmetricQuantization == false) {
-            transform.set_callback<ngraph::pass::low_precision::ConvolutionTransformation>(
+            transform.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::LayerTransformation::isAsymmetricQuantization(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
@ -84,7 +84,7 @@ public:
        SimpleLowPrecisionTransformer transform;
        transform.add<ngraph::pass::low_precision::GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(testValues.params);
        if (testValues.params.supportAsymmetricQuantization == false) {
-            transform.set_callback<ngraph::pass::low_precision::GroupConvolutionTransformation>(
+            transform.get_pass_config()->set_callback<ngraph::pass::low_precision::GroupConvolutionTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::LayerTransformation::isAsymmetricQuantization(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp
@ -105,7 +105,7 @@ public:
        SimpleLowPrecisionTransformer transformer;
        transformer.add<ngraph::pass::low_precision::MatMulTransformation, ngraph::opset1::MatMul>(testValues.params);
        if (testValues.params.support3DTensorOnActivations == false) {
-            transformer.set_callback<ngraph::pass::low_precision::MatMulTransformation>(
+            transformer.get_pass_config()->set_callback<ngraph::pass::low_precision::MatMulTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::MatMulTransformation::is3DTensorOnActivations(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
@ -21,9 +21,10 @@ using namespace ngraph::pass;
 SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
    const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions,
    const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions) {
+    auto passConfig = get_pass_config();

    // TODO: use one pass manager
-    markup = std::make_shared<ngraph::pass::Manager>();
+    markup = std::make_shared<ngraph::pass::Manager>(passConfig);
    markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>();
    markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions);
    markup->register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
@ -32,15 +33,20 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
    markup->register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>();
    markup->register_pass<ngraph::pass::low_precision::AlignQuantizationParameters>();

-    common = std::make_shared<ngraph::pass::Manager>();
+    common = std::make_shared<ngraph::pass::Manager>(passConfig);
    commonGraphRewrite = common->register_pass<ngraph::pass::GraphRewrite>();
    cleanup = common->register_pass<ngraph::pass::GraphRewrite>();
 }

 void SimpleLowPrecisionTransformer::transform(std::shared_ptr<ngraph::Function>& function) {
+    run_on_function(function);
+}
+
+bool SimpleLowPrecisionTransformer::run_on_function(std::shared_ptr<ngraph::Function> function) {
    ngraph::pass::low_precision::TypeRelaxedReplacer pass;
    pass.run_on_function(function);

    markup->run_passes(function);
    common->run_passes(function);
+    return true;
 }
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
@ -14,7 +14,7 @@
 #include "low_precision/common/operation_precision_restriction.hpp"
 #include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"

-class SimpleLowPrecisionTransformer {
+class SimpleLowPrecisionTransformer : public ngraph::pass::FunctionPass{
 public:
    SimpleLowPrecisionTransformer(
        const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions = {},
@ -25,12 +25,8 @@ public:
        commonGraphRewrite->add_matcher<T>(TestTransformationParams::toParams(params));
    }

-    template <class T>
-    void set_callback(const std::function<bool(const std::shared_ptr<const ::ngraph::Node>)>& callback) {
-        common->get_pass_config()->set_callback<T>(callback);
-    }
-
    void transform(std::shared_ptr<ngraph::Function>& function);
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;

    std::shared_ptr<ngraph::pass::Manager> markup;
    std::shared_ptr<ngraph::pass::Manager> common;
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
@ -36,6 +36,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
+        {Acosh,                 {}},
        {Asin,                  {}},
        {Asinh,                  {}},
        {Atan,                  {}},
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/mvn.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/mvn.cpp
@ -17,22 +17,34 @@ const std::vector<bool> normalizeVariance = {true, false};
 const std::vector<std::vector<size_t>> inputShapes = {{1, 10, 5, 7, 8},
                                                      {1, 3, 8, 9, 49}};

+const std::vector<ngraph::AxisSet> axes = {{1, 2, 3}, {2, 3}};
 const std::vector<bool> acrossChannels = {true, false};
+const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
+const std::vector<bool> emptyAcrossChannels = {{}};

 const std::vector<double> epsilon = {0.000000001};

-const auto MvnCases = ::testing::Combine(
+const auto MvnAcrossChannels = ::testing::Combine(
    ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(dataPrecisions),
-    ::testing::ValuesIn(acrossChannels), ::testing::ValuesIn(normalizeVariance),
-    ::testing::ValuesIn(epsilon),
+    ::testing::ValuesIn(emptyReductionAxes), ::testing::ValuesIn(acrossChannels),
+    ::testing::ValuesIn(normalizeVariance), ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU));

-TEST_P(MvnLayerTest, Serialize) {
+const auto MvnReductionAxes = ::testing::Combine(
+    ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(dataPrecisions),
+    ::testing::ValuesIn(axes), ::testing::ValuesIn(emptyAcrossChannels),
+    ::testing::ValuesIn(normalizeVariance), ::testing::ValuesIn(epsilon),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+TEST_P(Mvn1LayerTest, Serialize) {
    Serialize();
 }

-INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN, MvnLayerTest, MvnCases,
-                        MvnLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_across_channels, Mvn1LayerTest, MvnAcrossChannels,
+                        Mvn1LayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_reduction_axes, Mvn1LayerTest, MvnReductionAxes,
+                        Mvn1LayerTest::getTestCaseName);

 // ------------------- MVN-6 -------------------------------------------------

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -12,14 +12,47 @@ const std::vector<element::Type> precisions = {
    element::f32
 };

-const std::vector< ngraph::PartialShape > inputShapes = {
-    { 1ul, 4ul, 16ul, 16ul }, { 1ul, 4ul, 16ul, 16ul, 16ul }
+namespace shape4d {
+const std::vector<ngraph::PartialShape> inputShapes = {
+    { 1ul, 3ul, 16ul, 16ul },
+    { 4ul, 3ul, 16ul, 16ul }
 };

-const std::vector<builder::subgraph::FakeQuantizeOnData> fqOnData = {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 10.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { -12.8f }, { 12.7f } }
+const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // Multiply with scalar is not transformed to GroupConvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{1, 1, 1, 1}},
+        "output/GroupConvolution",
+        ""
+    },
+    // Multiply with scalar is not transformed to GroupConvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{}},
+        "output/GroupConvolution",
+        ""
+    },
+    // Zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // Zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    }
 };

 INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
@ -27,6 +60,59 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
        ::testing::ValuesIn(precisions),
        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::ValuesIn(fqOnData)),
+        ::testing::ValuesIn(params)),
    MultiplyToGroupConvolutionTransformation::getTestCaseName);
+}  // namespace shape4d
+
+namespace shape5d {
+const std::vector<ngraph::PartialShape> inputShapes = {
+    { 1ul, 3ul, 16ul, 16ul, 16ul },
+    { 4ul, 3ul, 16ul, 16ul, 16ul }
+};
+
+const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // Multiply with scalar is not transformed to GroupConvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{1, 1, 1, 1, 1}},
+        "output/GroupConvolution",
+        ""
+    },
+    // Multiply with scalar is not transformed to GroupConvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{}},
+        "output/GroupConvolution",
+        ""
+    },
+    // Zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // Zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
+     ::testing::Combine(
+         ::testing::ValuesIn(precisions),
+         ::testing::ValuesIn(inputShapes),
+         ::testing::Values(CommonTestUtils::DEVICE_CPU),
+         ::testing::ValuesIn(params)),
+     MultiplyToGroupConvolutionTransformation::getTestCaseName);
+}  // namespace shape5d
 }  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -37,6 +37,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
+        {Acosh,                  {}},
        {Asin,                  {}},
        {Asinh,                 {}},
        {Atan,                  {}},
@ -66,6 +67,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes

 // List of operations that should be tested also with integer precision
 const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTypes = {
+        {Acosh,                 {}},
        {Asinh,                 {}},
        {Atan,                  {}},
        {Negative,              {}},
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
@ -56,7 +56,8 @@ namespace {
                                    Values<int64_t>(7),
                                    Values<InferenceEngine::SizeVector>({2, 1, 4}),
                                    Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
-                                    Values(CommonTestUtils::DEVICE_CPU)));
+                                    Values(CommonTestUtils::DEVICE_CPU),
+                                    Values<std::map<std::string, std::string>>({})));
    using namespace testing;
    INSTANTIATE_TEST_SUITE_P(smoke_TrivialLoop, TrivialLoopTest,
                            Combine(
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp
@ -9,6 +9,9 @@

 using namespace LayerTestsDefinitions;

+const std::vector<bool> emptyAcrossChannels = {{}};
+const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
+
 const std::vector<std::vector<size_t>> inputShapes = {
    {8},
    {1, 16},
@ -41,23 +44,35 @@ const std::vector<double> epsilon = {
    0.000000001
 };

-const auto MvnCases = ::testing::Combine(
+std::vector<InferenceEngine::Precision> dataPrecisions = {
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32
+};
+
+const auto MvnAcrossChannels = ::testing::Combine(
    ::testing::ValuesIn(inputShapes),
-    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::ValuesIn(dataPrecisions),
+    ::testing::ValuesIn(emptyReductionAxes),
    ::testing::ValuesIn(acrossChannels),
    ::testing::ValuesIn(normalizeVariance),
    ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN, MvnLayerTest, MvnCases, MvnLayerTest::getTestCaseName);
+const auto MvnReductionAxes = ::testing::Combine(
+    ::testing::ValuesIn(std::vector<std::vector<size_t>>{{1, 10, 5, 17}, {1, 3, 8, 9}}),
+    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::ValuesIn(std::vector<ngraph::AxisSet>{{1, 2, 3}, {2, 3}}),
+    ::testing::ValuesIn(emptyAcrossChannels),
+    ::testing::ValuesIn(normalizeVariance),
+    ::testing::ValuesIn(epsilon),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);

+INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_AcrossChannels, Mvn1LayerTest, MvnAcrossChannels, Mvn1LayerTest::getTestCaseName);

+INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_ReductionAxes, Mvn1LayerTest, MvnReductionAxes, Mvn1LayerTest::getTestCaseName);

-std::vector<InferenceEngine::Precision> dataPrecisions = {
-    InferenceEngine::Precision::FP32,
-    InferenceEngine::Precision::FP16
-};

 std::vector<InferenceEngine::Precision> idxPrecisions = {
    InferenceEngine::Precision::I32,
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
@ -13,7 +13,7 @@ using namespace CPUTestUtils;
 namespace CPULayerTestsDefinitions {

 typedef std::tuple<
-        LayerTestsDefinitions::mvnParams,
+        LayerTestsDefinitions::mvn1Params,
        CPUSpecificParams,
        fusingSpecificParams,
        Precision, // CNNNetwork input precision
@ -24,14 +24,14 @@ class MvnLayerCPUTest : public testing::WithParamInterface<MvnLayerCPUTestParamS
                        virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<MvnLayerCPUTestParamSet> obj) {
-        LayerTestsDefinitions::mvnParams basicParamsSet;
+        LayerTestsDefinitions::mvn1Params basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        Precision inputPrecision, outputPrecision;
        std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param;

        std::ostringstream result;
-        result << LayerTestsDefinitions::MvnLayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvnParams>(
+        result << LayerTestsDefinitions::Mvn1LayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvn1Params>(
                basicParamsSet, 0));

        result << "_" << "CNNInpPrc=" << inputPrecision.name();
@ -45,7 +45,7 @@ public:
    }
 protected:
    void SetUp() override {
-        LayerTestsDefinitions::mvnParams basicParamsSet;
+        LayerTestsDefinitions::mvn1Params basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam();
@ -55,13 +55,17 @@ protected:

        InferenceEngine::SizeVector inputShapes;
        InferenceEngine::Precision netPrecision;
+        ngraph::AxisSet axes;
        bool acrossChanels, normalizeVariance;
        double eps;
-        std::tie(inputShapes, netPrecision, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
+        std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
        auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto param = ngraph::builder::makeParams(netPrc, {inputShapes});
        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
        auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps);
+        if (!axes.empty()) {
+             mvn = ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps);
+        }

        selectedType = getPrimitiveType() + "_" + inPrc.name();

@ -128,6 +132,8 @@ const std::vector<double> epsilon = {
        0.000000001
 };

+const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
+
 std::vector<Precision> inpPrc = {Precision::I8, Precision::BF16, Precision::FP32};
 std::vector<Precision> outPrc = {Precision::BF16, Precision::FP32};

@ -162,6 +168,7 @@ const auto Mvn3D = ::testing::Combine(
        ::testing::Combine(
            ::testing::ValuesIn(inputShapes_3D),
            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(emptyReductionAxes),
            ::testing::ValuesIn(acrossChannels),
            ::testing::ValuesIn(normalizeVariance),
            ::testing::ValuesIn(epsilon),
@ -177,6 +184,7 @@ const auto Mvn4D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_4D),
                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -192,6 +200,7 @@ const auto Mvn5D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_5D),
                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -216,6 +225,7 @@ const auto Mvn1D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_1D),
                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -232,6 +242,7 @@ const auto Mvn2D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_2D),
                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(false),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -248,6 +259,7 @@ const auto Mvn2DTrans = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_2D),
                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(true),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -9,18 +9,50 @@ using namespace InferenceEngine::details;

 namespace {
 const std::vector<element::Type> precisions = {
-    element::f32,
-    element::f16
+    element::f32
 };

-const std::vector<ngraph::PartialShape>inputShapes = {
-    { 1ul, 4ul, 16ul, 16ul }, { 1ul, 4ul, 16ul, 16ul, 16ul }
+namespace shape4d {
+const std::vector<ngraph::PartialShape> inputShapes = {
+    { 1ul, 3ul, 16ul, 16ul },
+    { 4ul, 3ul, 16ul, 16ul }
 };

-const std::vector<builder::subgraph::FakeQuantizeOnData> fqOnData = {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 10.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { -12.8f }, { 12.7f } }
+const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // Multiply with scalar is transformed to GroupConvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{1, 1, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // multiply with scalar is transformed to groupconvolution
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        {{4.f}, element::f32, Shape{}},
+        "output/GroupConvolution",
+        "U8"
+    },
+    // zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "I8"
+    },
+    // zero point
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
+        "output/GroupConvolution",
+        "U8"
+    }
 };

 INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
@ -28,6 +60,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
        ::testing::ValuesIn(precisions),
        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_GPU),
-        ::testing::ValuesIn(fqOnData)),
+        ::testing::ValuesIn(params)),
    MultiplyToGroupConvolutionTransformation::getTestCaseName);
+}  // namespace shape4d
+
 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -34,6 +34,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
+        {Acosh,                 {}},
        {Asin,                  {}},
        {Asinh,                  {}},
        {Atan,                  {}},
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mvn.cpp
@ -9,6 +9,8 @@

 using namespace LayerTestsDefinitions;

+const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
+
 const std::vector<std::vector<size_t>> inputShapes = {
    {1, 32, 17},
    {1, 37, 9},
@ -41,13 +43,14 @@ const std::vector<double> epsilon = {
 const auto MvnCases = ::testing::Combine(
    ::testing::ValuesIn(inputShapes),
    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::ValuesIn(emptyReductionAxes),
    ::testing::ValuesIn(acrossChannels),
    ::testing::ValuesIn(normalizeVariance),
    ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_GPU)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, MvnLayerTest, MvnCases, MvnLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, Mvn1LayerTest, MvnCases, Mvn1LayerTest::getTestCaseName);

 std::vector<InferenceEngine::Precision> dataPrecisions = {
    InferenceEngine::Precision::FP32,
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@ -58,6 +58,9 @@ std::vector<std::string> disabledTestPatterns() {
            R"(.*IEClassImportExportTestP.*)",

            // TODO: Issue: 59586, NormalizeL2 output mismatch for empty axes case
-            R"(.*NormalizeL2LayerTest.*axes=\(\).*)"
+            R"(.*NormalizeL2LayerTest.*axes=\(\).*)",
+
+            // Not allowed dynamic loop tests on GPU
+            R"(.*smoke_StaticShapeLoop_dynamic_exit.*)"
    };
 }
--- a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/loop.cpp
@ -0,0 +1,140 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include "single_layer_tests/loop.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine;
+
+namespace {
+    std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::I32
+    };
+
+    std::map<std::string, std::string> netConfigurations = {
+        {GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}
+    };
+
+    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_axis_0 {
+        //  GCC4.8 limitation: have to specify type of each element in list
+        //                               static_trip_count |  max | dynamic_exit | axis
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1, 0 },  // n_iter 10, no dynamic exit
+    };
+
+    std::vector<InferenceEngine::SizeVector> inputs_0 = {
+        {1, 4, 2}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_0, StaticShapeLoopTest,
+                            testing::Combine(
+                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
+                            /* static_continue_cond */ testing::Values(true),
+                            /* args_papck */ testing::ValuesIn(static_loop_types_axis_0),
+                            /* start_value */ testing::Values<int64_t>(0),
+                            /* data_shape */ testing::ValuesIn(inputs_0),
+                            /* data_prc */ testing::ValuesIn(netPrecisions),
+                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
+                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
+                            StaticShapeLoopTest::getTestCaseName);
+
+    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_1 {
+        //  GCC4.8 limitation: have to specify type of each element in list
+        //                               static_trip_count |  max | dynamic_exit | axis
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5, -1,  1 },  // n_iter 5, no dynamic exit
+    };
+
+    std::vector<InferenceEngine::SizeVector> inputs_1 = {
+        {2, 1, 4, 6}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_1, StaticShapeLoopTest,
+                            testing::Combine(
+                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
+                            /* static_continue_cond */ testing::Values(true),
+                            /* args_papck */ testing::ValuesIn(static_loop_types_1),
+                            /* start_value */ testing::Values<int64_t>(0),
+                            /* data_shape */ testing::ValuesIn(inputs_1),
+                            /* data_prc */ testing::ValuesIn(netPrecisions),
+                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
+                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
+                            StaticShapeLoopTest::getTestCaseName);
+
+    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_2 {
+        //  GCC4.8 limitation: have to specify type of each element in list
+        //                               static_trip_count |  max | dynamic_exit | axis
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1,  2 },  // n_iter 10, no dynamic exit
+    };
+
+    std::vector<InferenceEngine::SizeVector> inputs_2 = {
+        {2, 4, 1, 6}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_2, StaticShapeLoopTest,
+                            testing::Combine(
+                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
+                            /* static_continue_cond */ testing::Values(true),
+                            /* args_papck */ testing::ValuesIn(static_loop_types_2),
+                            /* start_value */ testing::Values<int64_t>(0),
+                            /* data_shape */ testing::ValuesIn(inputs_2),
+                            /* data_prc */ testing::ValuesIn(netPrecisions),
+                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
+                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
+                            StaticShapeLoopTest::getTestCaseName);
+
+    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_no_auto_concat {
+        //  GCC4.8 limitation: have to specify type of each element in list
+        //                               static_trip_count |  max | dynamic_exit | axis
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1, -1 },  // n_iter 5, no dynamic exit
+    };
+
+    std::vector<InferenceEngine::SizeVector> inputs_no_auto_concat = {
+        {4, 20, 12}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_no_auto_concat, StaticShapeLoopTest,
+                            testing::Combine(
+                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
+                            /* static_continue_cond */ testing::Values(true),
+                            /* args_papck */ testing::ValuesIn(static_loop_types_no_auto_concat),
+                            /* start_value */ testing::Values<int64_t>(0),
+                            /* data_shape */ testing::ValuesIn(inputs_no_auto_concat),
+                            /* data_prc */ testing::ValuesIn(netPrecisions),
+                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
+                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
+                            StaticShapeLoopTest::getTestCaseName);
+
+    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_dynamic_exit {
+        //  GCC4.8 limitation: have to specify type of each element in list
+        //                               static_trip_count |  max | dynamic_exit | axis
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  3,  -1 },  // n_iter 3, dynamic exit on 3
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  7,   1 },  // n_iter 5, dynamic exit not reached
+        std::tuple<bool, int64_t, int64_t, int64_t>{  true , -1,  5,  -1 },  // n_iter 5, inf loop with dynamic exit on 5
+        std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  3,  -1 },  // | same with dynamic trip count
+        std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  7,   1 },  // |
+        std::tuple<bool, int64_t, int64_t, int64_t>{ false , -1,  5,  -1 }   // |
+    };
+
+    std::vector<InferenceEngine::SizeVector> inputs_dynamic_exit = {
+        {4, 1, 2}
+    };
+
+    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_dynamic_exit, StaticShapeLoopTest,
+                            testing::Combine(
+                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
+                            /* static_continue_cond */ testing::Values(true),
+                            /* args_papck */ testing::ValuesIn(static_loop_types_dynamic_exit),
+                            /* start_value */ testing::Values<int64_t>(0),
+                            /* data_shape */ testing::ValuesIn(inputs_dynamic_exit),
+                            /* data_prc */ testing::ValuesIn(netPrecisions),
+                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
+                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
+                            StaticShapeLoopTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/mvn.cpp
@ -15,6 +15,8 @@ const std::vector<std::vector<int>> indices_4D = {
 };

 const std::vector<std::vector<int>> indices_3D = {
+        {2},
+        {0, 2},
        {1, 2},     // equivalent MVN-1 across_channel=0
        {0, 1, 2}   // equivalent MVN-1 across_channel=1
 };
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp
@ -8,6 +8,7 @@
 #include <memory>

 #include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
+#include "lpt_ngraph_functions/common/constant.hpp"
 #include "lpt_ngraph_functions/common/dequantization_operations.hpp"
 #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"

@ -15,11 +16,19 @@ using namespace ngraph;

 namespace LayerTestsDefinitions {

+class MultiplyToGroupConvolutionTransformationParam {
+public:
+    builder::subgraph::FakeQuantizeOnData fqOnData;
+    builder::subgraph::Constant constant;
+    std::string layerName;
+    std::string expectedKernelType;
+};
+
 typedef std::tuple <
    element::Type,
    PartialShape,
    std::string,
-    builder::subgraph::FakeQuantizeOnData> MultiplyToGroupConvolutionTransformationParams;
+    MultiplyToGroupConvolutionTransformationParam> MultiplyToGroupConvolutionTransformationParams;

 class MultiplyToGroupConvolutionTransformation :
    public testing::WithParamInterface<MultiplyToGroupConvolutionTransformationParams>,
@ -29,6 +38,7 @@ public:

 protected:
    void SetUp() override;
+    void Run() override;
 };

 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp
@ -8,10 +8,15 @@

 namespace LayerTestsDefinitions {

+// DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 TEST_P(MvnLayerTest, CompareWithRefs) {
    Run();
 };

+TEST_P(Mvn1LayerTest, CompareWithRefs) {
+    Run();
+};
+
 TEST_P(Mvn6LayerTest, CompareWithRefs) {
    Run();
 };
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -26,24 +26,41 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(testing::T
    ngraph::element::Type precision;
    ngraph::PartialShape shape;
    auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8();
-    builder::subgraph::FakeQuantizeOnData fqOnData;
-    std::tie(precision, shape, targetDevice, fqOnData) = obj.param;
+    MultiplyToGroupConvolutionTransformationParam param;
+    std::tie(precision, shape, targetDevice, param) = obj.param;

    std::ostringstream result;
-    result << getTestCaseNameByParams(precision, shape, targetDevice, params) << "_" << fqOnData;
+    result << getTestCaseNameByParams(precision, shape, targetDevice, params) << "_" <<
+        param.fqOnData << "_" <<
+        param.constant << "_" <<
+        param.layerName << "_" <<
+        param.expectedKernelType;
    return result.str();
 }

 void MultiplyToGroupConvolutionTransformation::SetUp() {
    ngraph::PartialShape shape;
    ngraph::element::Type precision;
-    builder::subgraph::FakeQuantizeOnData fqOnData;
-    std::tie(precision, shape, targetDevice, fqOnData) = this->GetParam();
+    MultiplyToGroupConvolutionTransformationParam param;
+    std::tie(precision, shape, targetDevice, param) = this->GetParam();

    function = ngraph::builder::subgraph::MultiplyToGroupConvolutionFunction::getOriginal(
        precision,
        shape,
-        fqOnData);
+        param.fqOnData,
+        param.constant);
+}
+
+void MultiplyToGroupConvolutionTransformation::Run() {
+    LayerTestsCommon::Run();
+
+    const auto param = std::get<3>(GetParam());
+    const auto actualPrecision = getRuntimePrecision(param.layerName);
+    auto expectedPrecision = param.expectedKernelType;
+    if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
+        expectedPrecision = "FP16";
+    }
+    EXPECT_EQ(actualPrecision, expectedPrecision);
 }

 TEST_P(MultiplyToGroupConvolutionTransformation, CompareWithRefImpl) {
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
@ -39,6 +39,7 @@ static std::map<ngraph::helpers::ActivationTypes, std::string> activationNames =
        {ngraph::helpers::ActivationTypes::Clamp,                 "Clamp"},
        {ngraph::helpers::ActivationTypes::Negative,              "Negative"},
        {ngraph::helpers::ActivationTypes::Acos,                  "Acos"},
+        {ngraph::helpers::ActivationTypes::Acosh,                 "Acosh"},
        {ngraph::helpers::ActivationTypes::Asin,                  "Asin"},
        {ngraph::helpers::ActivationTypes::Asinh,                 "Asinh"},
        {ngraph::helpers::ActivationTypes::Atan,                  "Atan"},
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp
@ -50,7 +50,8 @@ using StaticShapeLoopParams = typename std::tuple<
        int64_t,
        InferenceEngine::SizeVector,
        InferenceEngine::Precision,
-        std::string
+        std::string,
+        std::map<std::string, std::string>
        >;

 /**
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp
@ -11,6 +11,7 @@

 namespace LayerTestsDefinitions {

+// DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 typedef std::tuple<
        InferenceEngine::SizeVector, // Input shapes
        InferenceEngine::Precision,  // Input precision
@ -27,6 +28,24 @@ protected:
    void SetUp() override;
 };

+typedef std::tuple<
+        InferenceEngine::SizeVector, // Input shapes
+        InferenceEngine::Precision,  // Input precision
+        ngraph::AxisSet,             // Reduction axes
+        bool,                        // Across channels
+        bool,                        // Normalize variance
+        double,                      // Epsilon
+        std::string                  // Device name
+    > mvn1Params;
+
+class Mvn1LayerTest : public testing::WithParamInterface<mvn1Params>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<mvn1Params>& obj);
+
+protected:
+    void SetUp() override;
+};
+
 typedef std::tuple<
        InferenceEngine::SizeVector, // Input shapes
        InferenceEngine::Precision,  // Data precision
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
@ -84,6 +84,12 @@ InferenceEngine::Blob::Ptr ActivationLayerTest::GenerateInput(const InferenceEng
            resolution = 32768;
            break;
        }
+        case ngraph::helpers::ActivationTypes::Acosh: {
+            data_start_from = 1;
+            data_range = 200;
+            resolution = 32768;
+            break;
+        }
        case ngraph::helpers::ActivationTypes::Ceiling: {
            data_start_from = -1000;
            data_range = 2000;
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp
@ -140,6 +140,47 @@ namespace LayerTestsDefinitions {
        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result0, result1, result2}, params, "loop");
    }

+    std::string StaticShapeLoopTest::getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj) {
+        bool unrolling;
+        bool static_iter_num;
+        bool static_continue_cond;
+        int64_t max_iter_num;
+        int64_t dynamic_exit;
+        int64_t axis;
+        int64_t start_value;
+        InferenceEngine::SizeVector data_shape;
+        InferenceEngine::Precision data_prc;
+        std::string targetDevice;
+        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
+        std::map<std::string, std::string> configuration;
+        std::tie(
+            unrolling,
+            static_continue_cond,
+            args_papck,
+            start_value,
+            data_shape,
+            data_prc,
+            targetDevice,
+            configuration) = obj.param;
+
+        std::ostringstream result;
+        result << "unrolling=" << std::to_string(unrolling) << "_";
+        result << "static_iter_num=" << std::to_string(static_iter_num) << "_";
+        result << "static_continue_cond=" << std::to_string(static_continue_cond) << "_";
+        result << "max_iter_num=" << std::to_string(max_iter_num) << "_";
+        result << "dynamic_exit=" << std::to_string(dynamic_exit) << "_";
+        result << "axis=" << std::to_string(axis) << "_";
+        result << "start_value=" << std::to_string(start_value) << "_";
+        result << "max_iter_num=" << std::to_string(max_iter_num) << "_";
+        result << "IS=" << CommonTestUtils::vec2str(data_shape) << "_";
+        result << "netPRC=" << std::to_string(data_prc) << "_";
+        result << "targetDevice=" << targetDevice << "_";
+
+        auto res_str = result.str();
+        std::replace(res_str.begin(), res_str.end(), '-', '_');
+        return res_str;
+    }
+
    void StaticShapeLoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
@ -150,7 +191,8 @@ namespace LayerTestsDefinitions {
            start_value,
            data_shape,
            data_prc,
-            targetDevice) = GetParam();
+            targetDevice,
+            configuration) = GetParam();

        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
        const auto ngShape = ngraph::Shape{data_shape};
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp
@ -7,6 +7,7 @@

 namespace LayerTestsDefinitions {

+// DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 std::string MvnLayerTest::getTestCaseName(const testing::TestParamInfo<mvnParams>& obj) {
    InferenceEngine::SizeVector inputShapes;
    InferenceEngine::Precision inputPrecision;
@ -38,6 +39,46 @@ void MvnLayerTest::SetUp() {
    function = std::make_shared<ngraph::Function>(results, param, "mvn");
 }

+std::string Mvn1LayerTest::getTestCaseName(const testing::TestParamInfo<mvn1Params>& obj) {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    ngraph::AxisSet axes;
+    bool acrossChannels, normalizeVariance;
+    double eps;
+    std::string targetDevice;
+    std::tie(inputShapes, inputPrecision, axes, acrossChannels, normalizeVariance, eps, targetDevice) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "Precision=" << inputPrecision.name() << "_";
+    if (!axes.empty()) {
+        result << "ReductionAccess=" << CommonTestUtils::vec2str(axes.to_vector()) << "_";
+    } else {
+        result << "AcrossChannels=" << (acrossChannels ? "TRUE" : "FALSE") << "_";
+    }
+    result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_";
+    result << "Epsilon=" << eps << "_";
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void Mvn1LayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    ngraph::AxisSet axes;
+    bool acrossChanels, normalizeVariance;
+    double eps;
+    std::tie(inputShapes, inputPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = this->GetParam();
+    auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+    auto param = ngraph::builder::makeParams(inType, {inputShapes});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
+    auto mvn = std::dynamic_pointer_cast<ngraph::op::MVN>(ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps));
+    if (!axes.empty()) {
+        mvn = std::dynamic_pointer_cast<ngraph::op::MVN>(ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps));
+    }
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(mvn)};
+    function = std::make_shared<ngraph::Function>(results, param, "MVN1");
+}
+

 std::string Mvn6LayerTest::getTestCaseName(const testing::TestParamInfo<mvn6Params>& obj) {
    InferenceEngine::SizeVector inputShapes;
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@ -4,6 +4,7 @@
 VERIFIED_OP_REFERENCES = [
    'Abs-1',
    'Acos-1',
+    'Acosh-3',
    'Add-1',
    'Asin-1',
    'Asinh-3',
@ -56,6 +57,7 @@ VERIFIED_OP_REFERENCES = [
    'LSTMSequence-5',
    'LogSoftmax-5',
    'Loop-5',
+    'MVN-1',
    'MVN-6',
    'Maximum-1',
    'MaxPool-1',
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp
@ -9,6 +9,8 @@
 #include <ngraph/op/constant.hpp>
 #include <ngraph/opsets/opset1.hpp>

+#include "lpt_ngraph_functions/common/constant.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
 #include "lpt_ngraph_functions/common/dequantization_operations.hpp"

 namespace ngraph {
@ -26,7 +28,8 @@ public:
    static std::shared_ptr<ngraph::Function> getOriginal(
        const ngraph::element::Type precision,
        const ngraph::PartialShape& inputShape,
-        const FakeQuantizeOnData& fqOnData);
+        const FakeQuantizeOnData& fqOnData,
+        const Constant& constant);

    static std::shared_ptr<ngraph::Function> getReference(
        const ngraph::PartialShape& inputShape,
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp
@ -38,18 +38,27 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
 std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOriginal(
    const ngraph::element::Type precision,
    const ngraph::PartialShape& inputShape,
-    const FakeQuantizeOnData& fqOnData) {
+    const FakeQuantizeOnData& fqOnData,
+    const Constant& constant) {
    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
-    const auto fakeQuantizeOnActivations = makeFakeQuantize(input, precision, fqOnData);
-    const auto reshape = std::make_shared<ngraph::opset1::Reshape>(
-        fakeQuantizeOnActivations,
-        std::make_shared<ngraph::opset1::Constant>(element::i32, Shape{ static_cast<size_t>(inputShape.rank().get_length()) }, inputShape.to_shape()),
-        true);
-    reshape->set_friendly_name("output");
+    const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);

-    ngraph::ResultVector results{
-        std::make_shared<ngraph::opset1::Result>(reshape)
-    };
+    const auto rank = inputShape.rank();
+    assert(rank.is_static());
+    const size_t size = rank.get_length() - 2;
+    const auto maxPool = std::make_shared<opset1::MaxPool>(
+        fakeQuantize,
+        Strides(size, 1),
+        Shape(size, 1),
+        Shape(size, 0),
+        Shape(size, 2));
+
+    const auto multiply = std::make_shared<ngraph::opset1::Multiply>(
+        maxPool,
+        std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
+    multiply->set_friendly_name("output");
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(multiply)};
    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
 }

--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@ -291,6 +291,11 @@ std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
                                      bool normalizeVariance,
                                      double eps);

+std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
+                                      const ngraph::AxisSet &axes,
+                                      bool normalizeVariance,
+                                      double eps);
+
 std::shared_ptr<ngraph::Node> makeMVN6(const Output<Node>& in,
                                       const Output<Node>& axesNode,
                                       bool normalizeVariance,
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@ -99,6 +99,7 @@ enum ActivationTypes {
    Clamp,
    Negative,
    Acos,
+    Acosh,
    Asin,
    Asinh,
    Atan,
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/activation.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/activation.cpp
@ -46,6 +46,8 @@ std::shared_ptr<ngraph::Node> makeActivation(const ngraph::Output<Node> &in,
            return std::make_shared<ngraph::op::Negative>(in);
        case ngraph::helpers::ActivationTypes::Acos:
            return std::make_shared<ngraph::op::Acos>(in);
+        case ngraph::helpers::ActivationTypes::Acosh:
+            return std::make_shared<ngraph::op::Acosh>(in);
        case ngraph::helpers::ActivationTypes::Asin:
            return std::make_shared<ngraph::op::Asin>(in);
        case ngraph::helpers::ActivationTypes::Asinh:
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp
@ -24,6 +24,15 @@ std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
    return mvnNode;
 }

+std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
+                                      const ngraph::AxisSet &axes,
+                                      bool normalizeVariance,
+                                      double eps) {
+    auto mvnNode = std::make_shared<ngraph::op::MVN>(in, axes, normalizeVariance, eps);
+
+    return mvnNode;
+}
+
 std::shared_ptr<Node> makeMVN6(const Output<Node>& in,
                               const Output<Node>& axesNode,
                               bool normalizeVariance,
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@ -24,7 +24,6 @@ if (ENABLE_CLDNN)
        set(CLDNN__INCLUDE_TESTS OFF CACHE BOOL "" FORCE)
    endif()
    set(CLDNN_THREADING "${THREADING}" CACHE STRING "" FORCE)
-    set(GPU_DEBUG_CONFIG OFF CACHE BOOL "Enable debug config feature")
    add_subdirectory(clDNN)
 endif()

--- a/inference-engine/thirdparty/clDNN/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/CMakeLists.txt
@ -20,7 +20,7 @@ else()
    add_definitions(-DCLDNN_THREADING=CLDNN_THREADING_THREADPOOL)
 endif()

-if(GPU_DEBUG_CONFIG)
+if(ENABLE_GPU_DEBUG_CAPS)
  add_definitions(-DGPU_DEBUG_CONFIG=1)
 endif()

--- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
@ -5,6 +5,8 @@
 #include "cldnn/runtime/debug_configuration.hpp"
 #include <iostream>
 #include <memory>
+#include <vector>
+#include <sstream>

 namespace cldnn {

@ -13,22 +15,81 @@ const char *debug_configuration::prefix = "GPU_Debug: ";
 // Default policy is that dump_configuration will override other configuration from IE.

 #ifdef GPU_DEBUG_CONFIG
-static void print_option(std::string option_name, std::string option_value) {
+
+template<typename T>
+void print_option(std::string option_name, T option_value) {
    GPU_DEBUG_COUT << "Config " << option_name << " = " << option_value << std::endl;
 }

-static void get_int_env(const std::string &var, int &val) {
-    if (const auto env_var = std::getenv(var.c_str())) {
-        val = std::stoi(env_var);
-        print_option(var, std::to_string(val));
+static std::string to_upper_case(const std::string& var) {
+    std::stringstream s;
+
+    for (size_t i = 0; i < var.size(); i++) {
+        if (std::isupper(var[i])) {
+            if (i != 0) {
+                s << "_";
+            }
+            s << var[i];
+        } else {
+            s << static_cast<char>(std::toupper(var[i]));
+        }
+    }
+
+    return s.str();
+}
+
+static std::vector<std::string> get_possible_option_names(const std::string& var, std::vector<std::string> allowed_option_prefixes) {
+    std::vector<std::string> result;
+
+    for (auto& prefix : allowed_option_prefixes) {
+        result.push_back(prefix + var);
+        result.push_back(prefix + to_upper_case(var));
+    }
+
+    return result;
+}
+
+template <typename T>
+T convert_to(const std::string &str) {
+    std::istringstream ss(str);
+    T res;
+    ss >> res;
+    return res;
+}
+
+template<typename T>
+void get_debug_env_var(const std::string &var, T &val, std::vector<std::string> allowed_option_prefixes) {
+    bool found = false;
+    for (auto o : get_possible_option_names(var, allowed_option_prefixes)) {
+        if (const auto env_var = std::getenv(o.c_str())) {
+            val = convert_to<T>(env_var);
+            found = true;
+        }
+    }
+
+    if (found) {
+        print_option(var, val);
    }
 }

-static void get_str_env(const std::string &var, std::string &val) {
-    if (const auto env_var = std::getenv(var.c_str())) {
-        val = env_var;
-        print_option(var, val);
-    }
+template<typename T>
+void get_gpu_debug_env_var(const std::string &var, T &val) {
+    return get_debug_env_var(var, val, {"OV_GPU_"});
+}
+
+template<typename T>
+void get_common_debug_env_var(const std::string &var, T &val) {
+    // The list below should be prioritized from lowest to highest prefix priority
+    // If an option is set several times with different prefixes, version with the highest priority will be actually used.
+    // This may allow to enable global option with some value and override this value for GPU plugin
+    // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2
+    // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2
+    std::vector<std::string> allowed_option_prefixes = {
+        "OV_",
+        "OV_GPU_"
+    };
+
+    return get_debug_env_var(var, val, allowed_option_prefixes);
 }

 #endif
@ -42,13 +103,13 @@ debug_configuration::debug_configuration()
        , dump_layers(std::string())
        , dump_layers_dst_only(0) {
 #ifdef GPU_DEBUG_CONFIG
-    get_int_env("OV_GPU_Verbose", verbose);
-    get_int_env("OV_GPU_PrintMultiKernelPerf", print_multi_kernel_perf);
-    get_int_env("OV_GPU_DisableUsm", disable_usm);
-    get_str_env("OV_GPU_DumpGraphs", dump_graphs);
-    get_str_env("OV_GPU_DumpLayersPath", dump_layers_path);
-    get_str_env("OV_GPU_DumpLayers", dump_layers);
-    get_int_env("OV_GPU_DumpLayersDstOnly", dump_layers_dst_only);
+    get_common_debug_env_var("Verbose", verbose);
+    get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
+    get_gpu_debug_env_var("DisableUsm", disable_usm);
+    get_gpu_debug_env_var("DumpGraphs", dump_graphs);
+    get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
+    get_gpu_debug_env_var("DumpLayers", dump_layers);
+    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
    if (dump_layers_path.length() > 0 && !disable_usm) {
        disable_usm = 1;
        GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
--- a/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
@ -25,73 +25,6 @@ struct loop_impl : typed_primitive_impl<loop> {
    loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
    explicit loop_impl(const loop_node& node) : node(node) {}

-    // read scala value from data primitive
-    static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
-        int64_t trip_count = 0;
-        const layout& prim_layout = mem->get_layout();
-
-        switch (prim_layout.data_type) {
-        case data_types::u8: {
-            mem_lock<uint8_t> lock_prim_output{mem, stream};
-            trip_count = *lock_prim_output.data();
-            break;
-        }
-        case data_types::i8: {
-            mem_lock<int8_t> lock_prim_output{mem, stream};
-            trip_count = *lock_prim_output.data();
-            break;
-        }
-        case data_types::i32: {
-            mem_lock<int32_t> lock_prim_output{mem, stream};
-            trip_count = *lock_prim_output.data();
-            break;
-        }
-        case data_types::i64: {
-            mem_lock<int64_t> lock_prim_output{mem, stream};
-            trip_count = *lock_prim_output.data();
-            break;
-        }
-        default:
-            assert(false);
-        }
-        return trip_count;
-    }
-
-    static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) {
-        const layout& prim_layout = mem->get_layout();
-
-        switch (prim_layout.data_type) {
-        case data_types::u8: {
-            assert(input >= std::numeric_limits<uint8_t>::min() &&
-                   input <= std::numeric_limits<uint8_t>::max());
-            mem_lock<uint8_t> lock_prim_output{mem, stream};
-            *lock_prim_output.data() = static_cast<uint8_t>(input);
-            break;
-        }
-        case data_types::i8: {
-            assert(input >= std::numeric_limits<int8_t>::min() &&
-                   input <= std::numeric_limits<int8_t>::max());
-            mem_lock<int8_t> lock_prim_output{mem, stream};
-            *lock_prim_output.data() = static_cast<int8_t>(input);
-            break;
-        }
-        case data_types::i32: {
-            assert(input >= std::numeric_limits<int32_t>::min() &&
-                   input <= std::numeric_limits<int32_t>::max());
-            mem_lock<int32_t> lock_prim_output{mem, stream};
-            *lock_prim_output.data() = static_cast<int32_t>(input);
-            break;
-        }
-        case data_types::i64: {
-            mem_lock<int64_t> lock_prim_output{mem, stream};
-            *lock_prim_output.data() = input;
-            break;
-        }
-        default:
-            assert(false);
-        }
-    }
-
    event::ptr execute_impl(const std::vector<event::ptr>& events, loop_inst& instance) override {
        auto& outer_network = instance.get_network();
        auto& stream = outer_network.get_stream();
@ -104,29 +37,37 @@ struct loop_impl : typed_primitive_impl<loop> {
            instance.preprocess_output_memory();
            instance.preprocess_input_memory();
            instance.preprocess_backedge_memory();
+
+            // set input data for current_iteration primitive if current_iteration is used
+            if (node.is_current_iteration_used()) {
+                const primitive_id& current_iteration_id = node.get_current_iteration_id();
+                auto current_iteration_prim = body_network->get_primitive(current_iteration_id);
+                auto input_layout_prim = std::dynamic_pointer_cast<input_layout_inst>(current_iteration_prim);
+                if (input_layout_prim == nullptr) {
+                    CLDNN_ERROR_MESSAGE(node.id(), "current_iteration primitive is not input_layout");
+                }
+
+                const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping();
+                input_layout_prim->set_data(backedge_mapping.initial_mem);
+            }
            instance.preproc_memories_done = true;
        }

        // read trip_count from outer network
+        bool update_num_iterations = false;
        const primitive_id& trip_count_id = node.get_trip_count_id();
        memory::ptr trip_count_mem = outer_network.get_primitive(trip_count_id)->output_memory_ptr();
-        int64_t trip_count = read_scalar_value(trip_count_mem, stream);
+        int64_t trip_count = loop_node::read_scalar_value(trip_count_mem, stream);
        if (trip_count < 0) {
            const int64_t max_iteration = node.get_max_iteration();
            trip_count = max_iteration;
+            update_num_iterations = true;
        }

        // read initial execution condition from outer network
        const primitive_id& initial_execution_id = node.get_initial_execution_id();
        memory::ptr initial_execution_mem = outer_network.get_primitive(initial_execution_id)->output_memory_ptr();
-        int64_t execution_condition = read_scalar_value(initial_execution_mem, stream);
-
-        // shortcut of current_iteration memory in body network (slice of input)
-        memory::ptr current_iteration_mem = nullptr;
-        if (node.is_current_iteration_used()) {
-            const primitive_id& current_iteration_id = node.get_current_iteration_id();
-            current_iteration_mem = body_network->get_primitive(current_iteration_id)->output_memory_ptr();
-        }
+        int64_t execution_condition = loop_node::read_scalar_value(initial_execution_mem, stream);

        // shortcut of execution_condition memory in body network
        memory::ptr execution_condition_mem = nullptr;
@ -135,11 +76,6 @@ struct loop_impl : typed_primitive_impl<loop> {
            execution_condition_mem = body_network->get_primitive(condition_id)->output_memory_ptr();
        }

-        int64_t current_iteration = 0;
-        if (node.is_current_iteration_used()) {
-            write_scalar_value(current_iteration_mem, stream, current_iteration);
-        }
-
        const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings;
        const auto& concatenated_output_mem_mappings = instance.concatenated_output_mem_mappings;

@ -155,12 +91,12 @@ struct loop_impl : typed_primitive_impl<loop> {
        }

        std::vector<event::ptr> loop_carried_dep(events.begin(), events.end());
-
-        while (current_iteration < trip_count && execution_condition) {
+        int64_t current_iteration_idx = 0;
+        while (current_iteration_idx < trip_count && execution_condition) {
            // Copy & Set sliced input memory
            for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) {
                const auto& concatenated_input = concatenated_input_mem_mappings.at(i);
-                memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration);
+                memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration_idx);
                if (mem) {
                    concatenated_input.sliced_data_prim->set_output_memory(mem);
                } else {
@ -170,12 +106,12 @@ struct loop_impl : typed_primitive_impl<loop> {

            // Set backedges
            for (const auto& backedge_memory_mapping : instance.backedge_memory_mappings) {
-                backedge_memory_mapping.setup_iteration(current_iteration);
+                backedge_memory_mapping.setup_iteration(current_iteration_idx);
            }

            // Set sliced output memory
            for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) {
-                concat_output_mem_mapping.setup_concatenated_output_memory(current_iteration);
+                concat_output_mem_mapping.setup_concatenated_output_memory(current_iteration_idx);
            }

            // execute body network
@ -187,17 +123,16 @@ struct loop_impl : typed_primitive_impl<loop> {
                loop_carried_dep.emplace_back(body_event);
            }

-            //TODO: "curreint_iteration primitive and execution_condition is prepared
-            //as they are presented in the ngraph opset document for loop operation.
-            //However they are not being used yet and only TensorIterator which has fixed sequence length is being validated.
-            if (node.is_current_iteration_used()) {
-                write_scalar_value(current_iteration_mem, stream, current_iteration);
-            }
+            //TODO: execution_condition is prepared as they are presented in the
+            //      ngraph opset document for loop operation.
+            // However they are not being used yet and only TensorIterator which
+            // has fixed sequence length is being validated.
            if (node.is_execution_condition_used()) {
-                execution_condition = read_scalar_value(execution_condition_mem, stream);
+                execution_condition = loop_node::read_scalar_value(execution_condition_mem, stream);
            }
+
            // update index & execution condition for the next iteration
-            ++current_iteration;
+            ++current_iteration_idx;
        }

        body_network->reset_execution();
@ -208,9 +143,21 @@ struct loop_impl : typed_primitive_impl<loop> {
            concat_output.restore_concatenated_mem();
        }

-        const primitive_id& num_iteration_id = node.get_num_iteration_id();
-        memory::ptr num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory_ptr();
-        write_scalar_value(num_actual_iterations_mem, stream, current_iteration);
+        if (update_num_iterations) {
+            // update num_iterations (actual number of iterations)
+            int64_t actual_iterations = 0;
+            if (node.is_current_iteration_used()) {
+                const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping();
+                auto current_iteration_mem = backedge_mapping.from_primitive->output_memory_ptr();
+                actual_iterations = loop_node::read_scalar_value(current_iteration_mem, stream);
+            } else {
+                actual_iterations = current_iteration_idx;
+            }
+
+            const primitive_id& num_iteration_id = node.get_num_iteration_id();
+            memory::ptr num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory_ptr();
+            loop_node::write_scalar_value(num_actual_iterations_mem, stream, actual_iterations);
+        }

        ev->set();
        return ev;
--- a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
@ -7,7 +7,9 @@

 #include "cldnn/primitives/loop.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
+#include "cldnn/primitives/data.hpp"
 #include "cldnn/primitives/input_layout.hpp"
+#include "cldnn/primitives/eltwise.hpp"
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/error_handler.hpp"

@ -27,24 +29,10 @@ private:

    std::vector<loop::io_primitive_map> input_primitive_maps;
    std::vector<loop::io_primitive_map> output_primitive_maps;
-    std::vector<cldnn::loop::backedge_mapping> back_edges;
+    mutable std::vector<loop::backedge_mapping> back_edges;
    bool use_current_iteration;
    bool use_execution_condition;
    mutable program_impl::ptr body_program;
-    mutable std::map<primitive_id, memory::ptr> backedge_mem_impls;
-    mutable std::map<primitive_id, std::shared_ptr<mutable_data>> backedge_layers;
-    mutable std::map<primitive_id, std::shared_ptr<memory>> backedge_mem;
-
-    mutable bool output_is_backedge;
-
-    void setup_internal_mutabledata_node(primitive_id md_id, layout md_layout, std::vector<primitive_id> md_inputs_id = {}, uint32_t net_id = 0) const {
-        if (body.get_primitives().count(md_id) == 0) {
-            backedge_mem_impls[md_id] = get_program().get_engine().allocate_memory(md_layout, net_id);
-            backedge_mem[md_id] = backedge_mem_impls[md_id];
-            backedge_layers[md_id] = std::make_shared<mutable_data>(md_id, md_inputs_id, backedge_mem[md_id]);
-            body.add(backedge_layers[md_id]);
-        }
-    }

 public:
    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) :
@ -63,7 +51,6 @@ public:

    int64_t get_max_iteration() const { return max_iteration; }
    program_impl::ptr get_body_program() const { return body_program; }
-    bool is_output_working_as_backedge() const { return output_is_backedge; }
    bool is_current_iteration_used() const { return use_current_iteration; }
    bool is_execution_condition_used() const { return use_execution_condition; }

@ -99,19 +86,95 @@ public:

    static size_t convert_to_raw_axis(size_t axis, size_t ndim) {
        // convert between bfyx, bfzyx, bfzyxw and tensor.size.raw
-        assert(axis < ndim);
+        if (axis >= ndim) {
+            throw std::runtime_error("axis should be less than ndim");
+        }
+
        if (axis < 2) {
            return axis;
        }
        return (ndim - 1) - (axis - 2);
    }

+    // read scala value from data primitive
+    static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
+        int64_t trip_count = 0;
+        const layout& prim_layout = mem->get_layout();
+
+        switch (prim_layout.data_type) {
+        case data_types::u8: {
+            mem_lock<uint8_t> lock_prim_output{mem, stream};
+            trip_count = *lock_prim_output.data();
+            break;
+        }
+        case data_types::i8: {
+            mem_lock<int8_t> lock_prim_output{mem, stream};
+            trip_count = *lock_prim_output.data();
+            break;
+        }
+        case data_types::i32: {
+            mem_lock<int32_t> lock_prim_output{mem, stream};
+            trip_count = *lock_prim_output.data();
+            break;
+        }
+        case data_types::i64: {
+            mem_lock<int64_t> lock_prim_output{mem, stream};
+            trip_count = *lock_prim_output.data();
+            break;
+        }
+        default:
+            throw std::runtime_error("Invalid data type : " + data_type_traits::name(prim_layout.data_type));
+        }
+        return trip_count;
+    }
+
+    template<typename T>
+    static inline void validate_input_value(int64_t input) {
+        if (input < std::numeric_limits<T>::min() || input > std::numeric_limits<T>::max()) {
+            throw std::runtime_error("Invalid data value : " + std::to_string(input));
+        }
+    }
+
+    static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) {
+        const layout& prim_layout = mem->get_layout();
+
+        switch (prim_layout.data_type) {
+        case data_types::u8: {
+            validate_input_value<uint8_t>(input);
+            mem_lock<uint8_t> lock_prim_output{mem, stream};
+            lock_prim_output[0] = static_cast<uint8_t>(input);
+            break;
+        }
+        case data_types::i8: {
+            validate_input_value<int8_t>(input);
+            mem_lock<int8_t> lock_prim_output{mem, stream};
+            lock_prim_output[0] = static_cast<int8_t>(input);
+            break;
+        }
+        case data_types::i32: {
+            validate_input_value<int32_t>(input);
+            mem_lock<int32_t> lock_prim_output{mem, stream};
+            lock_prim_output[0] = static_cast<int32_t>(input);
+            break;
+        }
+        case data_types::i64: {
+            mem_lock<int64_t> lock_prim_output{mem, stream};
+            lock_prim_output[0] = input;
+            break;
+        }
+        default:
+            throw std::runtime_error("Invalid data type : " + data_type_traits::name(prim_layout.data_type));
+        }
+    }
+
    layout calc_body_input_layout(const loop::io_primitive_map& inputDesc) const {
        const auto& dependency_list = this->get_dependencies();
        auto input = std::find_if(dependency_list.begin(), dependency_list.end(), [&inputDesc](const program_node* p){
            return p->id() == inputDesc.external_id;
        });
-        assert(input != dependency_list.end());
+        if (input == dependency_list.end()) {
+            throw std::runtime_error("Can't find input from dependency_list");
+        }
        layout calculated_layout = (*input)->get_output_layout();
        auto shape = calculated_layout.size.sizes(calculated_layout.format);

@ -164,6 +227,7 @@ public:

    static bool is_integer(const data_types& data_type) {
        switch (data_type) {
+            case data_types::u8:
            case data_types::i8:
            case data_types::i32:
            case data_types::i64:
@ -173,54 +237,73 @@ public:
        }
    }

-    void process_single_int_input(const primitive_id& id) const {
+    void process_current_iteration() const {
+        const primitive_id& current_iteration_id = get_current_iteration_id();
+        if (current_iteration_id.empty()) {
+            return;
+        }
+
        const topology_map& body_topology_map = body.get_primitives();
-        if (!id.empty()) {
-            // add input_layout if not exist
-            if (body_topology_map.count(id)) {
-                layout body_input_layout(data_types::i32, format::bfyx, {1, 1, 1, 1});
-                body.add(std::make_shared<input_layout>(id, body_input_layout));
+        const layout body_input_layout(data_types::i64, format::bfyx, {1, 1, 1, 1});
+
+        // add current_iteration primitive if current_iteration primitive is not exist in body
+        if (body_topology_map.find(current_iteration_id) == body_topology_map.end()) {
+            body.add(std::make_shared<input_layout>(current_iteration_id, body_input_layout));
+        } else {
+            const auto& body_input_prim = body.at(current_iteration_id);
+            const auto input_layout_prim = std::dynamic_pointer_cast<input_layout>(body_input_prim);
+            if (!input_layout_prim) {
+                CLDNN_ERROR_MESSAGE(this->id(), "current_iteration primitive should be cldnn::input_layout");
            } else {
-                const auto& body_input_prim = body.at(id);
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
-                    body_input_prim->type != input_layout::type_id(),
-                    id + " is not cldnn::input_layout");
-                const auto input_layout_prim = static_cast<const input_layout*>(body_input_prim.get());
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
-                    !static_cast<bool>(input_layout_prim->output_data_type),
-                    "data_type of " + id + " is not specified");
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
-                    !is_integer(*input_layout_prim->output_data_type),
-                    id + " is not integer type");
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
-                    input_layout_prim->layout.count() != 1,
-                    id + " should have 1 element");
+                input_layout_prim->change_layout(body_input_layout);
+            }
+        }
+
+        // add incremental data: 1
+        // it is used to update current_iteration in body network
+        const primitive_id increment_value_id = current_iteration_id + "_inc";
+        auto mem = get_program().get_engine().allocate_memory(body_input_layout);
+        auto& stream = get_program().get_stream();
+        write_scalar_value(mem, stream, 1);
+        body.add(std::make_shared<data>(increment_value_id, mem));
+
+        // add eltwise sum updating current_iteration with incremental data
+        const primitive_id updated_currnet_iteration_id = current_iteration_id + "_update";
+        body.add(std::make_shared<eltwise>(updated_currnet_iteration_id,
+            current_iteration_id, increment_value_id, eltwise_mode::sum));
+
+        // set backedge
+        back_edges.emplace_back(updated_currnet_iteration_id, current_iteration_id);
+    }
+
+    void process_single_int_output(const primitive_id& id) const {
+        // add mutable if not exist
+        const topology_map& body_topology_map = body.get_primitives();
+        layout body_output_layout(data_types::i64, format::bfyx, {1, 1, 1, 1});
+        if (!id.empty()) {
+            auto body_output = body_topology_map.find(id);
+            if (body_output == body_topology_map.end()) {
+                auto mem = get_program().get_engine().allocate_memory(body_output_layout);
+                auto md = std::make_shared<data>(id, mem);
+                body.add(md);
+            } else {
+                auto body_output_prim = body.at(body_output->first);
+                auto mem = get_program().get_engine().allocate_memory(body_output_layout);
+                body_output_prim.reset(new mutable_data(body_output->first, mem));
            }
        }
    }

    void build_body_program() const {
-        const std::vector<cldnn::program_node *>& deps = get_dependencies();
-        // setup internal inputs
-        const primitive_id& trip_count_id = get_trip_count_id();
-        const primitive_id& initial_execution = get_initial_execution_id();
-        const primitive_id& num_iteration = get_num_iteration_id();
-        for (const cldnn::program_node * dep : deps) {
-            const primitive_id& id = dep->id();
-            if (id == trip_count_id || id == initial_execution || id == num_iteration) {
-                continue;
-            }
+        for (const auto& pm : input_primitive_maps) {
+            layout calculated_layout = calc_body_input_layout(pm);
+            const primitive_id& internal_input_id = pm.internal_id;

-            for (const auto& pm : input_primitive_maps) {
-                layout calculated_layout = calc_body_input_layout(pm);
-                const primitive_id& internal_input_id = pm.internal_id;
-
-                // add inputs for body network if not exist
-                if (body.get_primitives().count(internal_input_id) == 0) {
-                    body.add(std::make_shared<input_layout>(internal_input_id, calculated_layout));
-                } else {
-                    body.change_input_layout(internal_input_id, calculated_layout);
-                }
+            // add inputs for body network if not exist
+            if (body.get_primitives().count(internal_input_id) == 0) {
+                body.add(std::make_shared<input_layout>(internal_input_id, calculated_layout));
+            } else {
+                body.change_input_layout(internal_input_id, calculated_layout);
            }
        }

@ -230,39 +313,35 @@ public:
        }
        std::set<primitive_id> output_names;
        output_names.insert(output_primitive_maps.front().internal_id);
-        const auto& back_edges_list = this->get_primitive()->back_edges;

        // add current_iteration_id in body network, condition_id if exist
-        process_single_int_input(get_current_iteration_id());
-        process_single_int_input(get_condition_id());
+        process_current_iteration();
+        process_single_int_output(get_condition_id());

        // setup outputs for backedges
-        for (auto& back_edge : back_edges_list) {
+        for (auto& back_edge : back_edges) {
            // check whether the back_edge.to has its corresponding io_primitive_map
            const auto& input_map = std::find_if(input_primitive_maps.begin(), input_primitive_maps.end(),
                [&](const loop::io_primitive_map& pm) {
                    return pm.internal_id == back_edge.to;
                });
-            if (input_map == input_primitive_maps.end()) {
+
+            // backedge which is current_iteration does not have
+            // input primitive map because its initial value is always
+            // zero and the value will be set in execute_impl()
+            if (back_edge.to != get_current_iteration_id() && input_map == input_primitive_maps.end()) {
                std::string msg = "No primitive mapping for backedge (internal_id: " + back_edge.to + ')';
                CLDNN_ERROR_MESSAGE(this->id(), msg.c_str());
            }

-            for (const auto& prim : body.get_primitives()) {
-                if (prim.first != back_edge.from) {
-                    continue;
-                }
-                const auto dependencies_ref = prim.second->dependencies();
-                std::vector<primitive_id> dep_pids(dependencies_ref.size());
-                for (const auto& dep : dependencies_ref) {
-                    dep_pids.emplace_back(dep.get());
-                }
-                setup_internal_mutabledata_node(back_edge.from, calc_body_input_layout(*input_map), dep_pids);
-            }
-
            output_names.insert(back_edge.from);
        }

+        // if execution_condition_id is specified, we need to add the id in build_option::outputs
+        if (!get_condition_id().empty()) {
+            output_names.insert(get_condition_id());
+        }
+
        auto opts = get_program().get_options();
        std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
        opts.set_option(build_option::outputs(output_names_vec));
@ -310,6 +389,7 @@ public:
            from_primitive(from_primitive),
            to_primitive(to_primitive),
            from_mems(from_mems),
+            initial_mem(initial_mem),
            stream(stream),
            type(type),
            total_bytes(initial_mem->get_layout().bytes_count()) {
@ -396,7 +476,10 @@ private:
            bytes_iteration_initial_offset(initial_offset * bytes_iteration) {}

        static int64_t get_batch_size(layout mem_layout, int64_t axis) {
-            assert(axis >= 0);
+            if (axis < 0) {
+                throw std::runtime_error("axis should be positive integer or zero");
+            }
+
            int64_t batch_size = 1;
            for (int64_t i = 0; i < axis; ++i) {
                batch_size *= mem_layout.size.raw[i];
@ -472,6 +555,7 @@ private:
    std::vector<concatenated_memory_mapping> concatenated_output_mem_mappings;

    static std::string to_string(const loop_node& node);
+    size_t current_iteratoin_backedge_mapping_idx = 0;

 public:
    typed_primitive_inst(network_impl& network, const loop_node& node);
@ -479,6 +563,12 @@ public:
    void preprocess_input_memory();
    void preprocess_output_memory();
    void preprocess_backedge_memory();
+    const backedge_memory_mapping& get_current_iteration_backedge_mapping() const {
+        if (!node.is_current_iteration_used()) {
+            CLDNN_ERROR_MESSAGE(node.id(), "no backedge mapping for current_iteration");
+        }
+        return backedge_memory_mappings.at(current_iteratoin_backedge_mapping_idx);
+    }

 private:
    network_impl::ptr body_network;
--- a/inference-engine/thirdparty/clDNN/src/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/loop.cpp
@ -279,12 +279,24 @@ void loop_inst::preprocess_backedge_memory() {
    for (const auto& back_edge : back_edges) {
        //find corresponding input of the backedge
        const auto input_map_ptrs = node.find_io_primitive_maps(back_edge.to, false);
-        assert(input_map_ptrs.size() == 1);
-        const auto& input_map = input_map_ptrs.front();
-        auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from);
        const auto backedge_to_prim = body_network->get_primitive(back_edge.to);
        const auto backedge_from_prim = body_network->get_primitive(back_edge.from);
-        memory::ptr initial_mem = get_external_memory(input_map->external_id);
+
+        memory::ptr initial_mem;
+        if (back_edge.to == node.get_current_iteration_id()) {
+            const layout current_iteration_layout = backedge_to_prim->output_memory().get_layout();
+            initial_mem = get_network().get_engine().allocate_memory(current_iteration_layout);
+            auto& stream = get_network().get_stream();
+            loop_node::write_scalar_value(initial_mem, stream, 0);
+            current_iteratoin_backedge_mapping_idx = backedge_memory_mappings.size();
+        } else {
+            if (input_map_ptrs.empty()) {
+                CLDNN_ERROR_MESSAGE(id(), "no input_mapping for backedged input");
+            }
+            initial_mem = get_external_memory(input_map_ptrs.front()->external_id);
+        }
+
+        auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from);
        if (backedged_sliced_output_mems.empty()) {
            // backedge output which does not need concatenation
            // input memory = output memory = loop output memory
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@ -945,14 +945,17 @@ bool program_impl::extract_and_remove(program_node& node) {

    // update primitive_map of loop primitive,
    // if extracted node is input of loop
-    for (const auto user : node.users) {
+    for (const auto& user : node.users) {
        if (user->is_type<loop>()) {
            loop_node& loop = *user;
            loop.update_primitive_map(node.id(), input.id());
        }
-        if (node.dependencies.front()->is_type<loop>()) {
-            loop_node& loop = *node.dependencies.front();
-            loop.update_primitive_map(node.id(), user->id());
+
+        for (auto& dep : node.dependencies) {
+            if (dep->is_type<loop>()) {
+                loop_node& loop = *dep;
+                loop.update_primitive_map(node.id(), user->id());
+            }
        }
    }
    input.users.remove(&node);
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
@ -90,9 +90,6 @@ TEST(loop_gpu, basic_no_concat)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);

-    mem_lock<int32_t> ptr{num_iteration_mem, get_test_stream()};
-    EXPECT_EQ(ptr[0], trip_count);
-
    // value check
    mem_lock<float> output_ptr{output, get_test_stream()};
    EXPECT_EQ(output_ptr.size(), input_data.size());
@ -164,10 +161,6 @@ TEST(loop_gpu, basic_concat)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);

-    mem_lock<int32_t> ptr{num_iteration_mem, get_test_stream()};
-    const int32_t actual_iterations = ptr[0];
-    EXPECT_EQ(actual_iterations, trip_count);
-
    // value check
    mem_lock<float> output_ptr{output, get_test_stream()};
    for (size_t i=0, iend = input_data.size(); i<iend; ++i) {
@ -303,14 +296,6 @@ TEST(loop_gpu, basic_concat_nested)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);

-    // check trip count = actual iteration
-    mem_lock<int64_t> inner_num_iteration_ptr{inner_num_iteration_mem, get_test_stream()};
-    int64_t inner_actual_iterations = inner_num_iteration_ptr[0];
-    EXPECT_EQ(inner_actual_iterations, inner_trip_count);
-    mem_lock<int64_t> num_iteration_ptr{num_iteration_mem, get_test_stream()};
-    int64_t actual_iterations = num_iteration_ptr[0];
-    EXPECT_EQ(actual_iterations, outer_trip_count);
-
    // check output values
    EXPECT_EQ(output_layout.count(), expected.size());
    mem_lock<float> output_ptr{output, get_test_stream()};
--- a/model-optimizer/CMakeLists.txt
+++ b/model-optimizer/CMakeLists.txt
@ -49,3 +49,8 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests
        DESTINATION deployment_tools/model_optimizer
        COMPONENT tests
        EXCLUDE_FROM_ALL)
+
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/automation
+        DESTINATION deployment_tools/model_optimizer
+        COMPONENT tests
+        EXCLUDE_FROM_ALL)
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@ -29,7 +29,6 @@ extensions/back/GroupedConvWeightsNormalize.py
 extensions/back/insert_compatibility_l2normalization.py
 extensions/back/InterpolateReshape.py
 extensions/back/kaldi_remove_memory_output.py
-extensions/back/LayoutChangeForEinsum.py
 extensions/back/LayoutChangeForGatherND.py
 extensions/back/LeakyReLUMutation.py
 extensions/back/LinearToLinearONNXReplacer.py
@ -597,6 +596,7 @@ extensions/middle/InsertSelect.py
 extensions/middle/InterpolateSequenceToInterpolate.py
 extensions/middle/L2NormFusing.py
 extensions/middle/LayoutChangeForConstantShapePaths.py
+extensions/middle/LayoutChangeForEinsum.py
 extensions/middle/LeakyReluPattern.py
 extensions/middle/LSTMRNNSequenceToTensorIterator.py
 extensions/middle/MakeKaldiConstReshapable.py
@ -1070,6 +1070,7 @@ mo/utils/ir_reader/extenders/topk_extender.py
 mo/utils/ir_reader/extenders/variadic_split_extender.py
 mo/utils/ir_reader/layer_to_class.py
 mo/utils/ir_reader/restore_graph.py
+mo/utils/json_schema.py
 mo/utils/logger.py
 mo/utils/model_analysis.py
 mo/utils/pipeline_config.py
--- a/model-optimizer/extensions/middle/LayoutChangeForEinsum.py
+++ b/model-optimizer/extensions/middle/LayoutChangeForEinsum.py
@ -1,12 +1,14 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+from extensions.middle.InsertLayoutPropagationTransposes import is_input_data_in_correct_layout, \
+    is_output_data_in_correct_layout
 from extensions.ops.einsum import Einsum
-from mo.back.replacement import BackReplacementPattern
 from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern


-class LayoutChangeForEinsum(BackReplacementPattern):
+class LayoutChangeForEinsum(MiddleReplacementPattern):
    """
    The transformation adjusts Einsum equation to NCHW layout.
    Subscripts for tensor of rank greater than three must be adjusted
@ -19,7 +21,15 @@ class LayoutChangeForEinsum(BackReplacementPattern):
    """
    enabled = True
    force_shape_inference = True
-    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
+    graph_condition = [lambda graph: graph.graph['layout'] == 'NHWC']
+
+    def run_after(self):
+        from extensions.middle.MarkSubgraphsWithCorrectLayout import MarkSubGraphsWithCorrectLayout
+        return [MarkSubGraphsWithCorrectLayout]
+
+    def run_before(self):
+        from extensions.middle.InsertLayoutPropagationTransposes import InsertLayoutPropagationTranspose
+        return [InsertLayoutPropagationTranspose]

    def find_and_replace_pattern(self, graph: Graph):
        import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
@ -31,27 +41,35 @@ class LayoutChangeForEinsum(BackReplacementPattern):
            connected_in_ports = [port for port in einsum.in_ports().values() if not port.disconnected()]
            num_inputs = len(connected_in_ports)

-            # compute a mask of inputs of rank greater than 3 that are required original layout (NCHW)
-            # due to presence of ellipsis covering multiple tail dimensions in the corresponding input subscript
+            # check if correct_data_layout attribute is set for inputs and output
+            # this attribute can be set up within MarkSubgraphWithCorrectLayout transformation
+            # for example, when Einsum is located near to MatMul operation in a graph
+            input_correct_layout_mask = []
+            for input_ind in range(num_inputs):
+                input_correct_layout_mask.append(is_input_data_in_correct_layout(einsum, input_ind))
+            is_output_layout_correct = is_output_data_in_correct_layout(einsum, 0)
+
+            # compute a mask of which inputs/output are adjusted to the required layout
+            # if they are not adjusted, it means to require transpose
            input_ranks = [len(einsum.in_port(port_idx).data.get_shape()) for port_idx in range(num_inputs)]
            output_rank = len(einsum.out_port(0).data.get_shape())
-            permuted_equation, is_inputs_permuted, is_output_permuted = Einsum.adjust_equation_with_NCHW_layout(
+            permuted_equation, are_inputs_adjusted, is_output_adjusted = Einsum.adjust_equation_with_NCHW_layout(
                einsum_name,
                equation,
                input_ranks,
-                output_rank)
-            assert len(is_inputs_permuted) == num_inputs
+                output_rank, input_correct_layout_mask, is_output_layout_correct)
+            assert len(are_inputs_adjusted) == num_inputs

            # setup adjusted equation
            einsum.equation = permuted_equation

            # insert Transpose node to get NHWC layout back (for inputs) that is required due to specifics of equation
            for input_ind in range(num_inputs):
-                if not is_inputs_permuted[input_ind]:
+                if not are_inputs_adjusted[input_ind]:
                    # that means Einsum can only accept input in NHWC layout
                    # so the inserted transpose before the Einsum will convert the layout to NHWC
                    InsertTransposes.insert_transpose(graph, einsum.in_port(input_ind), before_input=True)
-            if not is_output_permuted:
+            if not is_output_adjusted:
                # that means Einsum can only generate output in NHWC layout
                # so the inserted transpose followed after the output will convert the layout back into NCHW layout
                InsertTransposes.insert_transpose(graph, einsum.out_port(0), before_input=False)
--- a/model-optimizer/extensions/ops/einsum.py
+++ b/model-optimizer/extensions/ops/einsum.py
@ -137,7 +137,8 @@ class Einsum(Op):
        return labels

    @staticmethod
-    def adjust_equation_with_NCHW_layout(node_name: str, equation: str, input_ranks: list, output_rank: int) -> (
+    def adjust_equation_with_NCHW_layout(node_name: str, equation: str, input_ranks: list, output_rank: int,
+                                         input_correct_layout_mask: list, output_correct_layout_mask: bool) -> (
            str, list, bool):
        """
        In order to satisfy NCHW layout, subscripts for tensors with rank greater than three must be adjusted by moving labels
@ -151,11 +152,13 @@ class Einsum(Op):
        :param output_rank: output rank
        :return: adjusted equation, boolean mask for inputs, and boolean flag if output subscript is adjusted
        """
-        is_inputs_permuted = []
+        is_inputs_adjusted = []
        input_subscripts, output_subscript = Einsum.parse_equation(node_name, equation)
        num_inputs = len(input_ranks)
        assert len(input_subscripts) == num_inputs, "The number of inputs must match a number " \
                                                    "of input subscripts"
+        assert len(input_correct_layout_mask) == num_inputs, "The number of inputs must match a number " \
+                                                             "elements in input_correct_layout_mask list"

        # permute labels in input subscripts and mark inputs for which inference in NCHW layout is acceptable
        # in case ellipsis covering multiple dimensions in the end, the permutation is impossible
@ -166,31 +169,35 @@ class Einsum(Op):
            input_rank = input_ranks[input_ind]
            labels = Einsum.extract_subscript_labels(node_name, input_subscript)
            num_broadcasted_dims = input_rank - len(labels) + 1
-            if input_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
-                is_inputs_permuted.append(True)
+            if input_correct_layout_mask[input_ind]:
+                is_inputs_adjusted.append(True)
+            elif input_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
+                is_inputs_adjusted.append(True)
                labels.insert(1, labels[-1])
                del labels[-1]
            else:
-                is_inputs_permuted.append(False)
+                is_inputs_adjusted.append(False)
            permuted_input_subscript = ''.join(labels)
            permuted_input_subscripts.append(permuted_input_subscript)

        # perform the same procedure for the output subscript as for the inputs subscripts
        labels = Einsum.extract_subscript_labels(node_name, output_subscript)
        num_broadcasted_dims = output_rank - len(labels) + 1
-        if output_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
-            is_output_permuted = True
+        if output_correct_layout_mask:
+            is_output_adjusted = True
+        elif output_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
+            is_output_adjusted = True
            labels.insert(1, labels[-1])
            del labels[-1]
        else:
-            is_output_permuted = False
+            is_output_adjusted = False
        permuted_output_subscript = ''.join(labels)

        # concatenate the left and right hands of the resulted equation
        left_hand = ','.join(permuted_input_subscripts)
        right_hand = permuted_output_subscript
        permuted_equation = left_hand + "->" + right_hand
-        return permuted_equation, is_inputs_permuted, is_output_permuted
+        return permuted_equation, is_inputs_adjusted, is_output_adjusted

    @staticmethod
    def infer(node: Node):
--- a/model-optimizer/mo/utils/custom_replacement_config.py
+++ b/model-optimizer/mo/utils/custom_replacement_config.py
@ -1,6 +1,7 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+import fastjsonschema as json_validate
 import json
 import logging as log
 import os
@ -9,7 +10,8 @@ from re import compile, match
 from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes
-from mo.utils.utils import refer_to_faq_msg
+from mo.utils.json_schema import schema_dict
+from mo.utils.utils import get_mo_root_dir, refer_to_faq_msg


 class CustomReplacementDescriptor(object):
@ -297,12 +299,12 @@ class CustomReplacementDescriptorScope(CustomReplacementDescriptor):
                log.debug("Node {} doesn't have output edges. Consider it output".format(node_name))
                output_tensors.add((generate_pattern_for_node(graph, pattern, node_name), 0))

-        if not self.has('inputs'):
+        if not self.has('inputs') or len(self._replacement_desc['inputs']) == 0:
            self._replacement_desc['inputs'] = [[{'node': desc[0], 'port': desc[1]} for desc in inp]
                                                for inp in sorted(input_nodes_mapping.values())]
            log.debug('Updated inputs of sub-graph for instance "{}"'.format(self.instances))

-        if not self.has('outputs'):
+        if not self.has('outputs') or len(self._replacement_desc['outputs']) == 0:
            self._replacement_desc['outputs'] = [{'node': node, 'port': port} for node, port in sorted(output_tensors)]
            log.debug('Updated outputs of sub-graph for instance "{}"'.format(self.instances))

@ -342,13 +344,8 @@ def parse_custom_replacement_config_file(file_name: str):
    if not os.path.exists(file_name):
        raise Error("Custom replacements configuration file '{}' does not exist. ".format(file_name) +
                    refer_to_faq_msg(69))
-    try:
-        with open(file_name, 'r') as f:
-            data = json.load(f)
-    except Exception as exc:
-        raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(file_name, exc) +
-                    refer_to_faq_msg(70)) from exc

+    data = load_and_validate_json_config(file_name)
    result = list()
    validation_errors = list()
    for attrs in data:
@ -394,3 +391,22 @@ def generate_pattern_for_node(graph: Graph, sub_graph_pattern: str, node_name: s

    raise RuntimeError('The pattern that uniquely identifies node "{}" using sub-graph pattern "{}" has not been found'.
                       format(node_name, sub_graph_pattern))
+
+
+def load_and_validate_json_config(config_file_name: str):
+    """
+    Reads and validate custom replacement configuration file config_file_name.
+    :param config_file_name: name of the file to read from.
+    :return: A dictionary serialized from json config file.
+    """
+
+    try:
+        with open(config_file_name, 'r') as f:
+            json_config = json.load(f)
+            validator = json_validate.compile(schema_dict)
+            validator(json_config)
+    except Exception as e:
+        raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(config_file_name, e) +
+                    refer_to_faq_msg(70)) from e
+
+    return json_config
--- a/model-optimizer/mo/utils/json_schema.py
+++ b/model-optimizer/mo/utils/json_schema.py
@ -0,0 +1,129 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+schema_dict = {
+    "definitions": {},
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "title": "Root",
+    "type": "array",
+    "default": [],
+    "items": {
+        "$id": "#root/items",
+        "title": "Items",
+        "type": "object",
+        "required": [
+            "id",
+            "match_kind"
+        ],
+        "properties": {
+            "custom_attributes": {
+                "$id": "#root/items/custom_attributes",
+                "title": "Custom_attributes",
+                "type": "object",
+                "properties": {
+                }
+            },
+            "id": {
+                "$id": "#root/items/id",
+                "title": "Id",
+                "type": "string",
+                "pattern": "^.*$",
+                "minLength": 1
+            },
+            "inputs": {
+                "$id": "#root/items/inputs",
+                "title": "Inputs",
+                "type": "array",
+                "default": [],
+                "items": {
+                    "$id": "#root/items/inputs/items",
+                    "title": "Items",
+                    "type": "array",
+                    "default": [],
+                    "items": {
+                        "$id": "#root/items/inputs/items/items",
+                        "title": "Items",
+                        "type": "object",
+                        "properties": {
+                            "node": {
+                                "$id": "#root/items/inputs/items/items/node",
+                                "title": "Node",
+                                "type": "string",
+                                "default": "",
+                                "pattern": "^.*$"
+                            },
+                            "port": {
+                                "$id": "#root/items/inputs/items/items/port",
+                                "title": "Port",
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "required": ["node", "port"]
+                    }
+
+                }
+            },
+            "instances": {
+                "$id": "#root/items/instances",
+                "title": "Instances",
+                "type": ["array", "object"],
+                "items": {
+                    "$id": "#root/items/instances/items",
+                    "title": "Items",
+                    "type": "string",
+                    "default": "",
+                    "pattern": "^.*$"
+                }
+            },
+            "match_kind": {
+                "$id": "#root/items/match_kind",
+                "title": "Match_kind",
+                "type": "string",
+                "enum": ["points", "scope", "general"],
+                "default": "points",
+                "pattern": "^.*$"
+            },
+            "outputs": {
+                "$id": "#root/items/outputs",
+                "title": "Outputs",
+                "type": "array",
+                "default": [],
+                "items": {
+                    "$id": "#root/items/outputs/items",
+                    "title": "Items",
+                    "type": "object",
+                    "properties": {
+                        "node": {
+                            "$id": "#root/items/outputs/items/node",
+                            "title": "Node",
+                            "type": "string",
+                            "default": "",
+                            "pattern": "^.*$"
+                        },
+                        "port": {
+                            "$id": "#root/items/outputs/items/port",
+                            "title": "Port",
+                            "type": "integer",
+                            "default": 0
+                        }
+                    },
+                    "required": ["node", "port"]
+                }
+
+            },
+            "include_inputs_to_sub_graph": {
+                "$id": "#root/items/include_inputs_to_sub_graph",
+                "title": "Include_inputs_to_sub_graph",
+                "type": "boolean",
+                "default": False
+            },
+            "include_outputs_to_sub_graph": {
+                "$id": "#root/items/include_outputs_to_sub_graph",
+                "title": "Include_outputs_to_sub_graph",
+                "type": "boolean",
+                "default": False
+            }
+        }
+    }
+}
--- a/model-optimizer/mo/utils/summarize_graph.py
+++ b/model-optimizer/mo/utils/summarize_graph.py
@ -70,9 +70,10 @@ if __name__ == "__main__":  # pragma: no cover
    if argv.input_model and argv.saved_model_dir:
        print("[ ERROR ] Both keys were provided --input_model and --input_dir. Please, provide only one of them")
        sys.exit(1)
-    graph_def, _ = load_tf_graph_def(graph_file_name=argv.input_model, is_binary=not argv.text,
-                                     checkpoint=argv.input_checkpoint,
-                                     model_dir=argv.saved_model_dir, saved_model_tags=argv.saved_model_tags)
+    tags = argv.saved_model_tags.split(",")
+    graph_def, _, _ = load_tf_graph_def(graph_file_name=argv.input_model, is_binary=not argv.text,
+                                        checkpoint=argv.input_checkpoint,
+                                        model_dir=argv.saved_model_dir, saved_model_tags=tags)
    summary = summarize_graph(graph_def)
    print("{} input(s) detected:".format(len(summary['inputs'])))
    for input in summary['inputs']:
--- a/model-optimizer/requirements.txt
+++ b/model-optimizer/requirements.txt
@ -8,3 +8,4 @@ onnx>=1.8.1
 defusedxml>=0.7.1
 urllib3>=1.26.4
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_caffe.txt
+++ b/model-optimizer/requirements_caffe.txt
@ -3,3 +3,4 @@ numpy>=1.16.6,<1.20
 protobuf>=3.15.6
 defusedxml>=0.7.1
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_dev.txt
+++ b/model-optimizer/requirements_dev.txt
@ -6,3 +6,4 @@ test-generator==0.1.1
 defusedxml>=0.5.0
 requests>=2.20.0
 pytest>=6.2.4
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_kaldi.txt
+++ b/model-optimizer/requirements_kaldi.txt
@ -2,3 +2,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_mxnet.txt
+++ b/model-optimizer/requirements_mxnet.txt
@ -5,3 +5,4 @@ numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 urllib3>=1.26.4
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_onnx.txt
+++ b/model-optimizer/requirements_onnx.txt
@ -3,3 +3,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_tf.txt
+++ b/model-optimizer/requirements_tf.txt
@ -4,3 +4,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.19
 defusedxml>=0.7.1
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_tf2.txt
+++ b/model-optimizer/requirements_tf2.txt
@ -3,3 +3,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
+fastjsonschema~=2.15.1
--- a/model-optimizer/unit_tests/extensions/middle/LayoutChangeForEinsum_test.py
+++ b/model-optimizer/unit_tests/extensions/middle/LayoutChangeForEinsum_test.py
@ -5,7 +5,7 @@ import unittest

 import numpy as np

-from extensions.back.LayoutChangeForEinsum import LayoutChangeForEinsum
+from extensions.middle.LayoutChangeForEinsum import LayoutChangeForEinsum
 from mo.front.common.partial_infer.utils import int64_array
 from mo.utils.ir_engine.compare_graphs import compare_graphs
 from unit_tests.utils.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect
@ -47,7 +47,7 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
                                # this input does not require additional transpose
                                # since the corresponding subscript can be adjusted
                                'placeholder_2_d': {'shape': np.array([3, 8, 5, 7])},
-                                # [3, 5, 10, 12] - NHWC, [3, 12, 5, 10] - NCHW
+                                # [3, 8, 10, 12] - NHWC, [3, 12, 8, 10] - NCHW
                                # the third input must be transposed to NHWC layout
                                # since ellipsis covers multiple dimensions in the end
                                # the corresponding subscript is not changed
@ -60,7 +60,7 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
                                # and additional transpose to NCHW will be inserted
                                'einsum_d': {'shape': np.array([2, 12, 7, 8, 10])},
                            }, nodes_with_edges_only=True)
-        graph.graph['fw'] = 'tf'
+        graph.graph['layout'] = 'NHWC'

        graph_ref = build_graph(nodes_attributes,
                                [*connect('placeholder_3', '0:transpose_1'),
@ -80,3 +80,46 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
        LayoutChangeForEinsum().find_and_replace_pattern(graph)
        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
        self.assertTrue(flag, resp)
+
+    def test_no_adjustment_layout_einsum(self):
+        graph = build_graph(nodes_attributes,
+                            [*connect('placeholder_1', '0:einsum'),
+                             *connect('placeholder_2', '1:einsum'),
+                             *connect('placeholder_3', '2:einsum'),
+                             *connect('einsum', 'output')],
+                            {  # this input stays as is since it is of a rank equal to 3
+                                'placeholder_1_d': {'shape': np.array([2, 3, 5])},
+                                # [3, 5, 7, 8] - NHWC
+                                # this input does not require additional transpose
+                                # since the corresponding layout is correct
+                                'placeholder_2_d': {'shape': np.array([3, 5, 7, 8])},
+                                # [3, 8, 10, 12] - NHWC
+                                # this input does not require additional transpose
+                                # since the corresponding layout is correct
+                                'placeholder_3_d': {'shape': np.array([3, 8, 10, 12])},
+                                # equation is still for NHWC layout
+                                'einsum': {'equation': "abc,bcde,bc...->ade...",
+                                           'correct_in_data_layout': [0, 1, 2],
+                                           'correct_out_data_layout': [0]},
+                                # [2, 7, 8, 10, 12] - NHWC
+                                # this output does not require additional transpose
+                                # since the corresponding layout is correct
+                                'einsum_d': {'shape': np.array([2, 7, 8, 10, 12])},
+                            }, nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [*connect('placeholder_1', '0:einsum'),
+                                 *connect('placeholder_2', '1:einsum'),
+                                 *connect('placeholder_3', '2:einsum'),
+                                 *connect('einsum', 'output')],
+                                {'placeholder_1_d': {'shape': np.array([2, 3, 5])},
+                                 'placeholder_2_d': {'shape': np.array([3, 5, 7, 8])},
+                                 'placeholder_3_d': {'shape': np.array([3, 8, 10, 12])},
+                                 'einsum': {'equation': "abc,bcde,bc...->ade..."},
+                                 'einsum_d': {'shape': np.array([2, 7, 8, 10, 12])}
+                                 })
+
+        LayoutChangeForEinsum().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
+++ b/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
@ -28,7 +28,7 @@ class TestNoInferenceEngine(unittest.TestCase):
 def test_frontends():
    setup_env()
    args = [sys.executable, '-m', 'pytest',
-            'frontend_ngraph_test_actual.py', '-s']
+            os.path.join(os.path.dirname(__file__), 'frontend_ngraph_test_actual.py'), '-s']

    status = subprocess.run(args, env=os.environ)
    assert not status.returncode
@ -37,7 +37,7 @@ def test_frontends():
 def test_main_test():
    setup_env()
    args = [sys.executable, '-m', 'pytest',
-            'main_test_actual.py', '-s']
+            os.path.join(os.path.dirname(__file__), 'main_test_actual.py'), '-s']

    status = subprocess.run(args, env=os.environ)
    assert not status.returncode
--- a/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py
+++ b/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py
@ -0,0 +1,40 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import unittest
+from fnmatch import fnmatch
+from generator import generator, generate
+
+from mo.utils.custom_replacement_config import load_and_validate_json_config
+from mo.utils.error import Error
+from mo.utils.utils import get_mo_root_dir
+
+
+def get_json_configs(mo_root_dir):
+    config_path = os.path.join(mo_root_dir, 'extensions', 'front')
+    pattern = "*.json"
+    config_files_list = []
+    for path, subdirs, files in os.walk(config_path):
+        for name in files:
+            if fnmatch(name, pattern):
+                config_files_list.append((os.path.join(path, name),))
+    return config_files_list
+
+@generator
+class TestSchema(unittest.TestCase):
+    base_dir = get_mo_root_dir()
+    schema_file = os.path.join(base_dir, 'mo', 'utils', 'schema.json')
+    transformation_configs = get_json_configs(base_dir)
+    test_json1 = '[{"id": "", "match_kind": "general", "custom_attributes": {}}]'
+    test_json2 = '[{"id": "someid", "match_kind": "abc", "custom_attributes": {}}]'
+
+    @generate(*transformation_configs)
+    def test_schema_file(self, transformation_config):
+        self.assertTrue(load_and_validate_json_config(transformation_config))
+
+    def test_schema_id_empty(self):
+        self.assertRaises(Error, load_and_validate_json_config, self.test_json1)
+
+    def test_schema_match_kind_wrong(self):
+        self.assertRaises(Error, load_and_validate_json_config, self.test_json2)
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/CMakeLists.txt
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/CMakeLists.txt
@ -15,7 +15,13 @@ add_library(${TARGET_FE_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS})

 target_include_directories(${TARGET_FE_NAME} PRIVATE ".")

-target_link_libraries(${TARGET_FE_NAME} PRIVATE frontend_manager)
+target_link_libraries(${TARGET_FE_NAME} PRIVATE ngraph::frontend_manager::static)
 target_link_libraries(${TARGET_FE_NAME} PUBLIC ngraph PRIVATE ngraph::builder)

 add_clang_format_target(${TARGET_FE_NAME}_clang FOR_TARGETS ${TARGET_FE_NAME})
+
+set(NGRAPH_INSTALL_LIB "deployment_tools/ngraph/lib")
+
+install(TARGETS ${TARGET_FE_NAME}
+        RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL
+        LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL)
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
@ -41,3 +41,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_PDB_OUTPUT_DIRECTORY ${CMAKE_PDB_OUTPUT_DIRECTORY_OLD})
+
+install(TARGETS ${PYBIND_FE_NAME}
+        DESTINATION python/${PYTHON_VERSION}
+        COMPONENT tests EXCLUDE_FROM_ALL)
--- a/ngraph/core/include/ngraph/op/acosh.hpp
+++ b/ngraph/core/include/ngraph/op/acosh.hpp
@ -19,8 +19,8 @@ namespace ngraph
            class NGRAPH_API Acosh : public util::UnaryElementwiseArithmetic
            {
            public:
-                static constexpr NodeTypeInfo type_info{"Acosh", 3};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                /// \brief Constructs an Acosh operation.
                Acosh() = default;
                /// \brief Constructs an Acosh operation.
--- a/ngraph/core/include/ngraph/op/mvn.hpp
+++ b/ngraph/core/include/ngraph/op/mvn.hpp
@ -69,7 +69,7 @@ namespace ngraph
                void set_reduction_axes(AxisSet axes) { m_reduction_axes = axes; }

            private:
-                double m_eps = 1e-9;
+                double m_eps;
                bool m_across_channels;
                bool m_normalize_variance;
                AxisSet m_reduction_axes;
@ -128,9 +128,9 @@ namespace ngraph
                MVNEpsMode get_eps_mode() const { return m_eps_mode; }

            private:
-                bool m_normalize_variance = true;
-                float m_eps = (float)1e-6;
-                MVNEpsMode m_eps_mode = MVNEpsMode::INSIDE_SQRT;
+                bool m_normalize_variance;
+                float m_eps;
+                MVNEpsMode m_eps_mode;
            };
        } // namespace v6
    }     // namespace op
--- a/ngraph/core/include/ngraph/op/tensor_iterator.hpp
+++ b/ngraph/core/include/ngraph/op/tensor_iterator.hpp
@ -30,13 +30,11 @@ namespace ngraph
                std::shared_ptr<Node>
                    clone_with_new_inputs(const OutputVector& new_args) const override;
                /// \return the body of the iteration
-                std::shared_ptr<Function> get_body() const { return m_body; }
+                std::shared_ptr<Function> get_body() const { return m_bodies[0]; }
                /// \param body set the body of the iteration
-                void set_body(const std::shared_ptr<Function>& body) { m_body = body; }
+                void set_body(const std::shared_ptr<Function>& body) { set_function(body); }
                void validate_and_infer_types() override;
                void revalidate_and_infer_types_for_body_ops();
-                /// \return the body of the iteration
-                std::shared_ptr<Function> get_function() override;

            private:
                void try_to_set_num_iterations_if_no_slice_inputs();
--- a/ngraph/core/include/ngraph/op/util/multi_subgraph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/multi_subgraph_base.hpp
@ -0,0 +1,366 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/parameter.hpp>
+#include "ngraph/op/op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        namespace util
+        {
+            /// \brief Abstract base class for sub-graph based ops, i.e ops that have some
+            /// sub-graphs
+            ///
+            class NGRAPH_API MultiSubGraphOp : public Op
+            {
+            public:
+                NGRAPH_RTTI_DECLARATION;
+                /// \brief Abstract class describes a connection between a MultiSubGraphOp input and
+                /// the body.
+                class InputDescription
+                {
+                protected:
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      input_index           Position of the MultiSubGraphOp input
+                    /// \param      body_parameter_index  Body parameter to receive input
+                    ///
+                    InputDescription(uint64_t input_index, uint64_t body_parameter_index);
+                    InputDescription() = default;
+
+                public:
+                    using type_info_t = DiscreteTypeInfo;
+                    virtual ~InputDescription() = default;
+                    virtual std::shared_ptr<InputDescription> copy() const = 0;
+
+                    virtual const type_info_t& get_type_info() const = 0;
+
+                    uint64_t m_input_index{0};
+                    uint64_t m_body_parameter_index{0};
+                };
+
+                /// \brief Abstract class describes how a MultiSubGraphOp output is produced from
+                /// the body.
+                class OutputDescription
+                {
+                protected:
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      body_value_index  A body value that produces the output
+                    /// \param      output_index      The MultiSubGraphOp output index
+                    ///
+                    OutputDescription(uint64_t body_value_index, uint64_t output_index);
+                    OutputDescription() = default;
+
+                public:
+                    using type_info_t = DiscreteTypeInfo;
+                    virtual ~OutputDescription() = default;
+                    virtual std::shared_ptr<OutputDescription> copy() const = 0;
+                    virtual const type_info_t& get_type_info() const = 0;
+
+                    uint64_t m_body_value_index{0};
+                    uint64_t m_output_index{0};
+                };
+
+                ///
+                /// \brief      Describes a body input formed from slices of an input to
+                ///             MultiSubGraphOp.
+                ///
+                class NGRAPH_API SliceInputDescription : public InputDescription
+                {
+                public:
+                    NGRAPH_RTTI_DECLARATION;
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      input_index           Position of the MultiSubGraphOp input
+                    /// \param      body_parameter_index  Body parameter position to receive input
+                    /// \param      start                 First index for slices
+                    /// \param      stride                Step amount for slices
+                    /// \param      part_size             Width of slices
+                    /// \param      end                   Last index for slices
+                    /// \param      axis                  Axis being sliced
+                    ///
+                    SliceInputDescription(uint64_t input_index,
+                                          uint64_t body_parameter_index,
+                                          int64_t start,
+                                          int64_t stride,
+                                          int64_t part_size,
+                                          int64_t end,
+                                          int64_t axis);
+                    SliceInputDescription() = default;
+                    std::shared_ptr<InputDescription> copy() const override;
+                    int64_t m_start{0};
+                    int64_t m_stride{0};
+                    int64_t m_part_size{0};
+                    int64_t m_end{0};
+                    int64_t m_axis{0};
+                };
+
+                ///
+                /// \brief      Describes a body input initialized from a MultiSubGraphOp input
+                ///             on the first iteration, and then a body output thereafter.
+                ///
+                class NGRAPH_API MergedInputDescription : public InputDescription
+                {
+                public:
+                    NGRAPH_RTTI_DECLARATION;
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      input_index           Position of the MultiSubGraphOp input
+                    ///                                   supplying a value to body_parameter for
+                    ///                                   the initial iteration.
+                    /// \param      body_parameter_index  Body parameter position to receive input.
+                    /// \param      body_value_index      Body value to supply body_parameter for
+                    /// successive
+                    ///                                   iterations.
+                    ///
+                    MergedInputDescription(uint64_t input_index,
+                                           uint64_t body_parameter_index,
+                                           uint64_t body_value_index);
+                    MergedInputDescription() = default;
+                    std::shared_ptr<InputDescription> copy() const override;
+                    uint64_t m_body_value_index{0};
+                };
+
+                /// \brief Produces an output by concatenating an output from each iteration
+                class NGRAPH_API ConcatOutputDescription : public OutputDescription
+                {
+                public:
+                    NGRAPH_RTTI_DECLARATION;
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      body_value_index  A body value that produces the output
+                    /// \param      output_index      The MultiSubGraphOp output index
+                    /// \param      start             First index for slices
+                    /// \param      stride            Step amount for slices
+                    /// \param      part_size         Width of slices
+                    /// \param      end               Last index for slices
+                    /// \param      axis              Axis being sliced
+                    ///
+                    ConcatOutputDescription(uint64_t body_value_index,
+                                            uint64_t output_index,
+                                            int64_t start,
+                                            int64_t stride,
+                                            int64_t part_size,
+                                            int64_t end,
+                                            int64_t axis);
+                    ConcatOutputDescription() = default;
+
+                    std::shared_ptr<OutputDescription> copy() const override;
+                    int64_t m_start{0};
+                    int64_t m_stride{0};
+                    int64_t m_part_size{0};
+                    int64_t m_end{0};
+                    int64_t m_axis{0};
+                };
+
+                /// \brief Produces an input
+                class NGRAPH_API InvariantInputDescription : public InputDescription
+                {
+                public:
+                    NGRAPH_RTTI_DECLARATION;
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      input_index           Position of the MultiSubGraphOp input
+                    /// \param      body_parameter_index  Body parameter to receive input
+                    ///
+                    InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
+                    InvariantInputDescription() = default;
+                    std::shared_ptr<InputDescription> copy() const override;
+                };
+
+                /// \brief Produces an output from a specific iteration
+                class NGRAPH_API BodyOutputDescription : public MultiSubGraphOp::OutputDescription
+                {
+                public:
+                    NGRAPH_RTTI_DECLARATION;
+                    ///
+                    /// \brief      Constructs a new instance.
+                    ///
+                    /// \param      body_value_index  A body value that produces the output
+                    /// \param      output_index      The SubGraphOp output index
+                    /// \param      iteration         which iteration (typically -1, final) will
+                    ///                               supply the value
+                    ///
+                    BodyOutputDescription(uint64_t body_value_index,
+                                          uint64_t output_index,
+                                          int64_t iteration = -1);
+                    BodyOutputDescription() = default;
+                    std::shared_ptr<MultiSubGraphOp::OutputDescription> copy() const override;
+                    int64_t m_iteration{0};
+                };
+                using MultiSubgraphInputDescriptionPtr =
+                    std::shared_ptr<MultiSubGraphOp::InputDescription>;
+                using MultiSubgraphOutputDescriptionPtr =
+                    std::shared_ptr<MultiSubGraphOp::OutputDescription>;
+                using MultiSubgraphInputDescriptionVector =
+                    std::vector<MultiSubgraphInputDescriptionPtr>;
+                using MultiSubgraphOutputDescriptionVector =
+                    std::vector<MultiSubgraphOutputDescriptionPtr>;
+
+                /// \brief     Gets internal sub-graph by index in MultiSubGraphOp
+                ///
+                /// \param     index sub-graph's index in op
+                /// \return pointer to ngraph::Function with sub-graph
+                virtual const std::shared_ptr<Function>& get_function(int index) const
+                {
+                    return m_bodies[index];
+                };
+                /// \brief     Adds sub-graph to MultiSubGraphOp
+                ///
+                /// \param index   index of new sub-graph
+                /// \param func    func new sub_graph as ngraph::Function
+                virtual void set_function(int index, const std::shared_ptr<Function>& func)
+                {
+                    m_bodies[index] = func;
+                }
+                /// \brief     Gets vector with connections beewtwen operation inputs
+                /// and internal sub-graph parameters
+                ///
+                /// \param index   index of internal sub-graph
+                /// \return vector of input descriptions
+                const MultiSubgraphInputDescriptionVector& get_input_descriptions(int index) const
+                {
+                    return m_input_descriptions[index];
+                }
+                /// \brief     Gets vector with connections beewtwen operation inputs
+                /// and internal sub-graph parameters
+                ///
+                /// \param index   index of internal sub-graph
+                /// \return vector of input descriptions
+                MultiSubgraphInputDescriptionVector& get_input_descriptions(int index)
+                {
+                    return m_input_descriptions[index];
+                }
+                /// \brief     Gets vector with connections beewtwen operation outputs
+                /// and internal sub-graph results
+                ///
+                /// \param index   index of internal sub-graph
+                /// \return vector of output descriptions
+                const MultiSubgraphOutputDescriptionVector& get_output_descriptions(int index) const
+                {
+                    return m_output_descriptions[index];
+                }
+                /// \brief     Gets vector with connections beewtwen operation outputs
+                /// and internal sub-graph results
+                ///
+                /// \param index   index of internal sub-graph
+                /// \return vector of output descriptions
+                MultiSubgraphOutputDescriptionVector& get_output_descriptions(int index)
+                {
+                    return m_output_descriptions[index];
+                }
+                /// \brief     Sets vector with connections beewtwen operation inputs
+                /// and internal sub-graph parameters
+                ///
+                /// \param index   index of internal sub-graph
+                /// \param inputs  vector of input descriptions
+                void set_input_descriptions(int index,
+                                            const MultiSubgraphInputDescriptionVector& inputs)
+                {
+                    m_input_descriptions[index] = inputs;
+                }
+
+                /// \brief     Sets vector with connections beewtwen operation outputs
+                /// and internal sub-graph results
+                ///
+                /// \param index   index of internal sub-graph
+                /// \param outputs vector of input descriptions
+                void set_output_descriptions(int index,
+                                             const MultiSubgraphOutputDescriptionVector& outputs)
+                {
+                    m_output_descriptions[index] = outputs;
+                }
+
+                ///
+                /// \brief     Set input decriptions for MultiSubGraphOp input.
+                ///
+                /// \param      value              The value supplied as an input to the block.
+                /// \param      bodies_parameters  vector of bodies parameters.
+                virtual void set_invariant_inputs(const Output<Node>& value,
+                                                  const ParameterVector& bodies_parameters);
+                ///
+                /// \brief     Set output decriptions for MultiSubGraphOp output.
+                ///
+                /// \param      bodies_results  vector of bodies results for one output.
+                /// \return     value           Output node for bodies_results.
+                virtual Output<Node> set_body_outputs(const ResultVector& bodies_results);
+
+                MultiSubGraphOp(const MultiSubGraphOp&) = delete;
+                MultiSubGraphOp(MultiSubGraphOp&&) = default;
+
+                MultiSubGraphOp& operator=(const MultiSubGraphOp&) = delete;
+                MultiSubGraphOp& operator=(MultiSubGraphOp&&) = default;
+
+            protected:
+                // Find an input corresponding to value, adding one if necessary.
+                Input<Node> input_for_value(const Output<Node>& value);
+
+                MultiSubGraphOp(size_t number_of_bodies);
+                MultiSubGraphOp() = default;
+                MultiSubGraphOp(const OutputVector& args, size_t number_of_bodies);
+                explicit MultiSubGraphOp(const OutputVector& args);
+
+                std::vector<std::shared_ptr<Function>> m_bodies;
+                std::vector<MultiSubgraphInputDescriptionVector> m_input_descriptions;
+                std::vector<MultiSubgraphOutputDescriptionVector> m_output_descriptions;
+            };
+            using MultiSubgraphInputDescriptionPtr =
+                util::MultiSubGraphOp::MultiSubgraphInputDescriptionPtr;
+            using MultiSubgraphOutputDescriptionPtr =
+                util::MultiSubGraphOp::MultiSubgraphOutputDescriptionPtr;
+            using MultiSubgraphInputDescriptionVector =
+                util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector;
+            using MultiSubgraphOutputDescriptionVector =
+                util::MultiSubGraphOp::MultiSubgraphOutputDescriptionVector;
+
+        } // namespace util
+    }     // namespace op
+
+    template <>
+    class NGRAPH_API AttributeAdapter<
+        std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>
+        : public DirectValueAccessor<
+              std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>
+    {
+    public:
+        AttributeAdapter(
+            std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>&
+                value)
+            : DirectValueAccessor<std::vector<
+                  std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>(value)
+        {
+        }
+
+        NGRAPH_RTTI_DECLARATION;
+    };
+
+    template <>
+    class NGRAPH_API AttributeAdapter<
+        std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>
+        : public DirectValueAccessor<
+              std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>
+    {
+    public:
+        AttributeAdapter(
+            std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>&
+                value)
+            : DirectValueAccessor<std::vector<
+                  std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>(value)
+        {
+        }
+
+        NGRAPH_RTTI_DECLARATION;
+    };
+} // namespace ngraph
--- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
@ -5,7 +5,7 @@
 #pragma once

 #include <ngraph/op/parameter.hpp>
-#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/multi_subgraph_base.hpp"

 namespace ngraph
 {
@ -13,226 +13,46 @@ namespace ngraph
    {
        namespace util
        {
-            /// \brief Abstract base class for sub-graph based ops, i.e ops that have sub-graph
+            /// \brief Abstract base class for sub-graph based ops, i.e ops that have only one
+            /// sub-graph
            ///
-            class NGRAPH_API SubGraphOp : public Op
+            class NGRAPH_API SubGraphOp : public MultiSubGraphOp
            {
            public:
                NGRAPH_RTTI_DECLARATION;
-                /// \brief Describes a connection between a SubGraphOp input and the body.
-                class InputDescription
+
+                virtual const std::shared_ptr<Function>& get_function() const
                {
-                protected:
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      input_index           Position of the SubGraphOp input
-                    /// \param      body_parameter_index  Body parameter to receive input
-                    ///
-                    InputDescription(uint64_t input_index, uint64_t body_parameter_index);
-                    InputDescription() = default;
-
-                public:
-                    using type_info_t = DiscreteTypeInfo;
-                    virtual ~InputDescription() = default;
-                    virtual std::shared_ptr<InputDescription> copy() const = 0;
-
-                    virtual const type_info_t& get_type_info() const = 0;
-
-                    uint64_t m_input_index{0};
-                    uint64_t m_body_parameter_index{0};
+                    return m_bodies[0];
                };
-
-                ///
-                /// \brief      Describes a body input formed from slices of an input to
-                ///             SubGraphOp.
-                ///
-                class NGRAPH_API SliceInputDescription : public InputDescription
+                virtual void set_function(const std::shared_ptr<Function>& func)
                {
-                public:
-                    static constexpr type_info_t type_info{"SliceInputDescription", 0};
-                    const type_info_t& get_type_info() const override { return type_info; }
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      input_index           Position of the SubGraphOp input
-                    /// \param      body_parameter_index  Body parameter position to receive input
-                    /// \param      start                 First index for slices
-                    /// \param      stride                Step amount for slices
-                    /// \param      part_size             Width of slices
-                    /// \param      end                   Last index for slices
-                    /// \param      axis                  Axis being sliced
-                    ///
-                    SliceInputDescription(uint64_t input_index,
-                                          uint64_t body_parameter_index,
-                                          int64_t start,
-                                          int64_t stride,
-                                          int64_t part_size,
-                                          int64_t end,
-                                          int64_t axis);
-                    SliceInputDescription() = default;
-                    std::shared_ptr<InputDescription> copy() const override;
-                    int64_t m_start{0};
-                    int64_t m_stride{0};
-                    int64_t m_part_size{0};
-                    int64_t m_end{0};
-                    int64_t m_axis{0};
+                    m_bodies[0] = func;
                };
-
-                ///
-                /// \brief      Describes a body input initialized from a SubGraphOp input on
-                ///             the first iteration, and then a body output thereafter.
-                ///
-                class NGRAPH_API MergedInputDescription : public InputDescription
-                {
-                public:
-                    static constexpr type_info_t type_info{"MergedInputDescription", 0};
-                    const type_info_t& get_type_info() const override { return type_info; }
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      input_index           Position of the SubGraphOp input
-                    ///                                   supplying a value to body_parameter for
-                    ///                                   the initial iteration.
-                    /// \param      body_parameter_index  Body parameter position to receive input.
-                    /// \param      body_value_index      Body value to supply body_parameter for
-                    /// successive
-                    ///                                   iterations.
-                    ///
-                    MergedInputDescription(uint64_t input_index,
-                                           uint64_t body_parameter_index,
-                                           uint64_t body_value_index);
-                    MergedInputDescription() = default;
-                    std::shared_ptr<InputDescription> copy() const override;
-                    uint64_t m_body_value_index{0};
-                };
-
-                ///
-                /// \brief      Describes a body input initialized from a SubGraphOp input on
-                ///             the first iteration, and invariant thereafter.
-                ///
-                class NGRAPH_API InvariantInputDescription : public InputDescription
-                {
-                public:
-                    static constexpr type_info_t type_info{"InvariantInputDescription", 0};
-                    const type_info_t& get_type_info() const override { return type_info; }
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      input_index           Position of the SubGraphOp input
-                    /// \param      body_parameter_index  Body parameter to receive input
-                    ///
-                    InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
-                    InvariantInputDescription() = default;
-                    std::shared_ptr<InputDescription> copy() const override;
-                };
-
-                /// \brief Describes how a SubGraphOp output is produced from the body.
-                class OutputDescription
-                {
-                protected:
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      body_value_index  A body value that produces the output
-                    /// \param      output_index      The SubGraphOp output index
-                    ///
-                    OutputDescription(uint64_t body_value_index, uint64_t output_index);
-                    OutputDescription() = default;
-
-                public:
-                    using type_info_t = DiscreteTypeInfo;
-                    virtual ~OutputDescription() = default;
-                    virtual std::shared_ptr<OutputDescription> copy() const = 0;
-                    virtual const type_info_t& get_type_info() const = 0;
-
-                    uint64_t m_body_value_index{0};
-                    uint64_t m_output_index{0};
-                };
-
-                /// \brief Produces an output by concatenating an output from each iteration
-                class NGRAPH_API ConcatOutputDescription : public OutputDescription
-                {
-                public:
-                    static constexpr type_info_t type_info{"ConcatOutputDescription", 0};
-                    const type_info_t& get_type_info() const override { return type_info; }
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      body_value_index  A body value that produces the output
-                    /// \param      output_index      The SubGraphOp output index
-                    /// \param      start             First index for slices
-                    /// \param      stride            Step amount for slices
-                    /// \param      part_size         Width of slices
-                    /// \param      end               Last index for slices
-                    /// \param      axis              Axis being sliced
-                    ///
-                    ConcatOutputDescription(uint64_t body_value_index,
-                                            uint64_t output_index,
-                                            int64_t start,
-                                            int64_t stride,
-                                            int64_t part_size,
-                                            int64_t end,
-                                            int64_t axis);
-                    ConcatOutputDescription() = default;
-
-                    std::shared_ptr<OutputDescription> copy() const override;
-                    int64_t m_start{0};
-                    int64_t m_stride{0};
-                    int64_t m_part_size{0};
-                    int64_t m_end{0};
-                    int64_t m_axis{0};
-                };
-
-                /// \brief Produces an output from a specific iteration
-                class NGRAPH_API BodyOutputDescription : public OutputDescription
-                {
-                public:
-                    static constexpr type_info_t type_info{"BodyOutputDescription", 0};
-                    const type_info_t& get_type_info() const override { return type_info; }
-                    ///
-                    /// \brief      Constructs a new instance.
-                    ///
-                    /// \param      body_value_index  A body value that produces the output
-                    /// \param      output_index      The SubGraphOp output index
-                    /// \param      iteration         which iteration (typically -1, final) will
-                    ///                               supply the value
-                    ///
-                    BodyOutputDescription(uint64_t body_value_index,
-                                          uint64_t output_index,
-                                          int64_t iteration);
-                    BodyOutputDescription() = default;
-                    std::shared_ptr<OutputDescription> copy() const override;
-                    int64_t m_iteration{0};
-                };
-
-                virtual std::shared_ptr<Function> get_function() { return m_body; };
-                virtual std::shared_ptr<const Function> get_function() const { return m_body; };
-                virtual void set_function(const std::shared_ptr<Function>& func) { m_body = func; };
                /// \return a reference to the input descriptions.
                const std::vector<std::shared_ptr<InputDescription>>& get_input_descriptions() const
                {
-                    return m_input_descriptions;
+                    return m_input_descriptions[0];
                }
                /// \return a reference to the input descriptions. Can add input descriptions
                /// before
                /// validation.
                std::vector<std::shared_ptr<InputDescription>>& get_input_descriptions()
                {
-                    return m_input_descriptions;
+                    return m_input_descriptions[0];
                }
                /// \return a reference to the output descriptions.
                const std::vector<std::shared_ptr<OutputDescription>>&
                    get_output_descriptions() const
                {
-                    return m_output_descriptions;
+                    return m_output_descriptions[0];
                }
                /// \return a reference to the output descriptions. Can add output descriptions
                /// before
                /// validation.
                std::vector<std::shared_ptr<OutputDescription>>& get_output_descriptions()
                {
-                    return m_output_descriptions;
+                    return m_output_descriptions[0];
                }

                ///
@ -324,15 +144,13 @@ namespace ngraph
                // Find an input corresponding to value, adding one if necessary.
                Input<Node> input_for_value(const Output<Node>& value);

-                SubGraphOp() = default;
-
+                SubGraphOp();
                explicit SubGraphOp(const OutputVector& args);

-                std::shared_ptr<Function> m_body;
-                std::vector<std::shared_ptr<op::util::SubGraphOp::InputDescription>>
-                    m_input_descriptions;
-                std::vector<std::shared_ptr<op::util::SubGraphOp::OutputDescription>>
-                    m_output_descriptions;
+            private:
+                using MultiSubGraphOp::get_function;
+
+                using MultiSubGraphOp::set_function;
            };
            using InputDescriptionPtr = std::shared_ptr<util::SubGraphOp::InputDescription>;
            using OutputDescriptionPtr = std::shared_ptr<util::SubGraphOp::OutputDescription>;
@ -341,47 +159,4 @@ namespace ngraph
        } // namespace util
    }     // namespace op

-    template <>
-    class NGRAPH_API AttributeAdapter<
-        std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>
-        : public DirectValueAccessor<
-              std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>
-    {
-    public:
-        AttributeAdapter(
-            std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>& value)
-            : DirectValueAccessor<
-                  std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>(
-                  value)
-        {
-        }
-
-        static constexpr DiscreteTypeInfo type_info{
-            "AttributeAdapter<std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::"
-            "InputDescription>>>",
-            0};
-        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-    };
-
-    template <>
-    class NGRAPH_API AttributeAdapter<
-        std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>
-        : public DirectValueAccessor<
-              std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>
-    {
-    public:
-        AttributeAdapter(
-            std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>& value)
-            : DirectValueAccessor<
-                  std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>(
-                  value)
-        {
-        }
-
-        static constexpr DiscreteTypeInfo type_info{
-            "AttributeAdapter<std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::"
-            "OutputDescription>>>",
-            0};
-        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
-    };
 } // namespace ngraph
--- a/ngraph/core/reference/include/ngraph/runtime/reference/acosh.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/acosh.hpp
@ -13,7 +13,8 @@ namespace ngraph
    {
        namespace reference
        {
-            template <typename T>
+            template <typename T,
+                      typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
            void acosh(const T* arg, T* out, size_t count)
            {
                for (size_t i = 0; i < count; i++)
@ -21,6 +22,16 @@ namespace ngraph
                    out[i] = std::acosh(arg[i]);
                }
            }
+
+            template <typename T,
+                      typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
+            void acosh(const T* arg, T* out, size_t count)
+            {
+                for (size_t i = 0; i < count; i++)
+                {
+                    out[i] = std::roundl(std::acosh(arg[i]));
+                }
+            }
        } // namespace reference
    }     // namespace runtime
 } // namespace ngraph
--- a/Show More
+++ b/Show More