Merge remote-tracking branch 'upstream/master'

2021-07-26 11:57:49 +09:00 · 2021-07-26 11:57:49 +09:00 · ebdbea67cb
commit ebdbea67cb
parent 4880bd11d4 d921e7a9c4
181 changed files with 5953 additions and 2352 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -88,6 +88,11 @@ jobs:
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
      # For running Python API tests
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
      # For running nGraph unit tests dependent on Python frameworks
      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test.txt
      # For MO unit tests
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt
      # Speed up build
      wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
      unzip ninja-linux.zip
@ -109,6 +114,7 @@ jobs:
        -DENABLE_WHEEL=ON
        -DENABLE_TESTS=ON
        -DNGRAPH_ONNX_IMPORT_ENABLE=ON
        -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
        -DENABLE_FASTER_BUILD=ON
        -DENABLE_STRICT_DEPENDENCIES=OFF
        -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
@ -149,6 +155,12 @@ jobs:
    workingDirectory: $(BUILD_SAMPLES_DIR)
    displayName: 'Build c samples'
  - script: |
      export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer
      . $(SETUPVARS) -pyver 3.6 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml
    displayName: 'Model Optimizer UT'
    continueOnError: false
  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
    displayName: 'nGraph UT'
    continueOnError: false
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@ -95,6 +95,7 @@ jobs:
        -DENABLE_SAMPLES=OFF
        -DENABLE_SPEECH_DEMO=OFF
        -DNGRAPH_ONNX_IMPORT_ENABLE=ON
        -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
        -DNGRAPH_DEBUG_ENABLE=OFF
        $(REPO_DIR)
      workingDirectory: $(BUILD_DIR)
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@ -69,6 +69,7 @@ RUN cmake .. \
    -DENABLE_PYTHON=ON \
    -DPYTHON_EXECUTABLE=/usr/bin/python3 \
    -DNGRAPH_ONNX_IMPORT_ENABLE=ON \
    -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
    -DNGRAPH_DEBUG_ENABLE=OFF \
    -DCMAKE_INSTALL_PREFIX=/openvino/dist \
    -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE}
--- a/cmake/coverage.cmake
+++ b/cmake/coverage.cmake
@ -92,9 +92,15 @@ ie_coverage_genhtml(INFO_FILE "ngraph"
 if(NGRAPH_ONNX_IMPORT_ENABLE)
    ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer"
-        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_common*"
+        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*"
-                 "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_editor*"
+        "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*")
        "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_import*")
    ie_coverage_genhtml(INFO_FILE "onnx_importer"
        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()
 if(NGRAPH_ONNX_FRONTEND_ENABLE)
    ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend"
        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*")
    ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend"
        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()
--- a/cmake/developer_package/compile_flags/sanitizer.cmake
+++ b/cmake/developer_package/compile_flags/sanitizer.cmake
@ -34,7 +34,7 @@ endif()
 # common sanitizer options
 if (DEFINED SANITIZER_COMPILER_FLAGS)
    # ensure sumbols are present
-    set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer")
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
    # prevent unloading libraries at runtime, so sanitizer can resolve their symbols
    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -38,8 +38,6 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL
 find_package(PythonInterp 3 QUIET)
 ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)
 ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF)
 #
 # Inference Engine specific options
 #
@ -112,7 +110,11 @@ ie_dependent_option(ENABLE_TBB_RELEASE_ONLY "Only Release TBB libraries are link
 ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF)
-ie_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" OFF)
+ie_option (ENABLE_DEBUG_CAPS "enable OpenVINO debug capabilities at runtime" OFF)
 ie_dependent_option (ENABLE_GPU_DEBUG_CAPS "enable GPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF)
 ie_dependent_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF)
 if(ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)))
    set(protoc_available OFF)
@ -121,9 +123,12 @@ else()
 endif()
 ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
 ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" OFF
    "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
 ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
    "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
    "NGRAPH_UNIT_TEST_ENABLE" OFF)
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -221,6 +221,7 @@ limitations under the License.
                <tab type="user" title="PriorBox-1" url="@ref openvino_docs_ops_detection_PriorBox_1"/>
                <tab type="user" title="Proposal-1" url="@ref openvino_docs_ops_detection_Proposal_1"/>
                <tab type="user" title="Proposal-4" url="@ref openvino_docs_ops_detection_Proposal_4"/>
                <tab type="user" title="RandomUniform-8" url="@ref openvino_docs_ops_generation_RandomUniform_8"/>
                <tab type="user" title="Range-1" url="@ref openvino_docs_ops_generation_Range_1"/>
                <tab type="user" title="Range-4" url="@ref openvino_docs_ops_generation_Range_4"/>
                <tab type="user" title="ReadValue-3" url="@ref openvino_docs_ops_infrastructure_ReadValue_3"/>
--- a/docs/ops/generation/RandomUniform_8.md
+++ b/docs/ops/generation/RandomUniform_8.md
@ -0,0 +1,231 @@
 ## RandomUniform <a name="RandomUniform"></a> {#openvino_docs_ops_generation_RandomUniform_8}
 **Versioned name**: *RandomUniform-8*
 **Category**: Generation
 **Short description**: *RandomUniform* operation generates a sequence of random values from a uniform distribution.
 **Detailed description**:
 *RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`. 
 The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm 
 is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns 
 four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized 
 with *seed* and *seed2* attributes respectively.
 \f[
 key = seed\\
 counter = seed2
 \f]
 Link to the original paper [Parallel Random Numbers: As Easy as 1, 2, 3](https://www.thesalmons.org/john/random123/papers/random123sc11.pdf)
 The result of Philox is calculated by applying a fixed number of *key* and *counter* updating so-called "rounds". 
 This implementation uses 4x32_10 version of Philox algorithm, where number of rounds = 10.
 Suppose we have *n* which determines *n*-th 4 elements of random sequence.
 In each round *key*, *counter* and *n* are splitted to pairs of uint32 values:
 \f[
 R = cast\_to\_uint32(value)\\
 L = cast\_to\_uint32(value >> 32),
 \f]
 where *cast\_to\_uint32* - static cast to uint32, *value* - uint64 input value, *L*, *R* - uint32 
 result values, >> - bitwise right shift.
 Then *n* and *counter* are updated with the following formula:
 \f[
 L'= mullo(R, M)\\
 R' = mulhi(R, M) {\oplus} k {\oplus} L \\
 mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\
 mullo(a, b) = (a {\times} b) \mod 2^{32}
 \f]
 where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*, 
 *M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*.
 After each round *key* is raised by summing with another pair of const values:
 \f[
 L += 0x9E3779B9 \\
 R += 0xBB67AE85
 \f]
 Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers.
 Float values between [0..1) are obtained from 32-bit integers by the following rules.
 Float16 is formatted as follows: *sign*(1 bit) *exponent*(5 bits) *mantissa*(10 bits). The value is interpreted 
 using following formula:
 \f[
 (-1)^{sign} * 1, mantissa * 2 ^{exponent - 15}
 \f]
 so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows:
 ``` 
 sign = 0
 exponent = 15 - representation of a zero exponent.
 mantissa = 10 right bits from generated uint32 random value.
 ``` 
 So the resulting float16 value is:
 ``` 
 x_uint16 = x // Truncate the upper 16 bits.
 val = ((exponent << 10) | x_uint16 & 0x3ffu) - 1.0,
 ```
 where x is uint32 generated random value.
 Float32 is formatted as follows: *sign*(1 bit) *exponent*(8 bits) *mantissa*(23 bits). The value is interpreted 
 using following formula:
 \f[
 (-1)^{sign} * 1, mantissa * 2 ^{exponent - 127}
 \f]
 so to obtain float values *sign*, *exponent* and *mantissa* are set as follows:
 ``` 
 sign = 0
 exponent = 127 - representation of a zero exponent.
 mantissa = 23 right bits from generated uint32 random value.
 ``` 
 So the resulting float value is:
 ``` 
 val = ((exponent << 23) | x & 0x7fffffu) - 1.0,
 ```
 where x is uint32 generated random value.
 Double is formatted as follows: *sign*(1 bit) *exponent*(11 bits) *mantissa*(52 bits). The value is interpreted 
 using following formula:
 \f[
 (-1)^{sign} * 1, mantissa * 2 ^{exponent - 1023}
 \f]
 so to obtain double values *sign*, *exponent* and *mantissa* are set as follows:
 ``` 
 sign = 0
 exponent = 1023 - representation of a zero exponent.
 mantissa = 52 right bits from two concatinated uint32 values from random integer generator.
 ``` 
 So the resulting double is obtained as follows:
 ``` 
 mantissa_h = x0 & 0xfffffu;  // upper 20 bits of mantissa
 mantissa_l = x1;             // lower 32 bits of mantissa
 mantissa = (mantissa_h << 32) | mantissa_l;
 val = ((exponent << 52) | mantissa) - 1.0,
 ```
 where x0, x1 are uint32 generated random values.
 To obtain a value in a specified range each value is processed with the following formulas:
 For float values:
 \f[
 result = x * (maxval - minval) + minval,
 \f]
 where *x* is random float or double value between [0..1).
 For integer values:
 \f[
 result = x \mod (maxval - minval) + minval,
 \f]
 where *x* is uint32 random value.
 Example 1. *RandomUniform* output with `seed` = 150, `seed2` = 10, `output_type` = f32:
 ``` 
 input_shape    = [ 3, 3 ]
 output  = [[0.7011236  0.30539632 0.93931055]
          [0.9456035   0.11694777 0.50770056]
          [0.5197197   0.22727466 0.991374  ]]
 ```
 Example 2. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = double:
 ``` 
 input_shape    = [ 2, 2 ]
 minval = 2
 maxval = 10
 output  = [[5.65927959 4.23122376]
          [2.67008206 2.36423758]]
 ```
 Example 3. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = i32:
 ``` 
 input_shape    = [ 2, 3 ]
 minval = 50
 maxval = 100
 output  = [[65 70 56]
          [59 82 92]]
 ```
 **Attributes**:
 * *output_type*
    * **Description**: the type of the output. Determines generation algorithm and affects resulting values. 
      Output numbers generated for different values of *output_type* may not be equal.
    * **Range of values**: "i32", "i64", "f16", "bf16", "f32", "f64".
    * **Type**: string
    * **Required**: *Yes*
 * *seed*
    * **Description**: global seed value.
    * **Range of values**: positive integers
    * **Type**: `int`
    * **Required**: *Yes*
 * *seed2*
    * **Description**: operational seed value.
    * **Range of values**: positive integers
    * **Type**: `int`
    * **Required**: *Yes*
 **Inputs**:
 *   **1**: `shape` - 1D tensor of type *T_SHAPE* describing output shape. **Required.**
 *   **2**: `minval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, 
    defines the lower bound on the range of random values to generate (inclusive). **Required.**
 *   **3**: `maxval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, 
    defines the upper bound on the range of random values to generate (exclusive). **Required.**
 **Outputs**:
 * **1**: A tensor with type specified by the attribute *output_type* and shape defined by `shape` input tensor.
 **Types**
 * *T_SHAPE*: `int32` or `int64`.
 *Example 1: IR example.*
 ```xml
 <layer ... name="RandomUniform" type="RandomUniform">
    <data output_type="f32" seed="234" seed2="148"/>
    <input>
        <port id="0" precision="I32">  <!-- shape value: [2, 3, 10] -->
            <dim>3</dim>
        </port>
        <port id="1" precision="FP32"/> <!-- min value -->
        <port id="2" precision="FP32"/> <!-- max value -->
    </input>
    <output>
        <port id="3" precision="FP32" names="RandomUniform:0">
            <dim>2</dim>
            <dim>3</dim>
            <dim>10</dim>
        </port>
    </output>
 </layer>
 ```
--- a/docs/ops/opset8.md
+++ b/docs/ops/opset8.md
@ -115,6 +115,7 @@ declared in `namespace opset8`.
 * [PriorBox](detection/PriorBox_1.md)
 * [Proposal](detection/Proposal_4.md)
 * [PSROIPooling](detection/PSROIPooling_1.md)
 * [RandomUniform](generation/RandomUniform_8.md)
 * [Range](generation/Range_4.md)
 * [ReLU](activation/ReLU_1.md)
 * [ReadValue](infrastructure/ReadValue_3.md)
--- a/docs/template_plugin/tests/functional/op_reference/acosh.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/acosh.cpp
@ -0,0 +1,81 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>
 #include <vector>
 #include "base_reference_test.hpp"
 using namespace ngraph;
 namespace reference_tests {
 namespace {
 struct AcoshParams {
    Tensor input;
    Tensor expected;
 };
 struct Builder : ParamsBuilder<AcoshParams> {
    REFERENCE_TESTS_ADD_SET_PARAM(Builder, input);
    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected);
 };
 class ReferenceAcoshLayerTest : public testing::TestWithParam<AcoshParams>, public CommonReferenceTest {
 public:
    void SetUp() override {
        auto params = GetParam();
        function = CreateFunction(params.input.shape, params.input.type);
        inputData = {params.input.data};
        refOutData = {params.expected.data};
    }
    static std::string getTestCaseName(const testing::TestParamInfo<AcoshParams>& obj) {
        auto param = obj.param;
        std::ostringstream result;
        result << "shape=" << param.input.shape << "_";
        result << "type=" << param.input.type;
        return result.str();
    }
 private:
    static std::shared_ptr<Function> CreateFunction(const Shape& shape, const element::Type& type) {
        const auto in = std::make_shared<op::Parameter>(type, shape);
        const auto acosh = std::make_shared<op::Acosh>(in);
        return std::make_shared<Function>(NodeVector {acosh}, ParameterVector {in});
    }
 };
 TEST_P(ReferenceAcoshLayerTest, AcoshWithHardcodedRefs) {
    Exec();
 }
 }  // namespace
 INSTANTIATE_TEST_SUITE_P(
    smoke_Acosh_With_Hardcoded_Refs, ReferenceAcoshLayerTest,
    ::testing::Values(Builder {}
                          .input({{8}, element::f16, std::vector<ngraph::float16> {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}})
                          .expected({{8}, element::f16, std::vector<ngraph::float16> {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}),
                      Builder {}
                          .input({{8}, element::f32, std::vector<float> {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}})
                          .expected({{8}, element::f32, std::vector<float> {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}),
                      Builder {}
                          .input({{8}, element::i32, std::vector<int32_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
                          .expected({{8}, element::i32, std::vector<int32_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
                      Builder {}
                          .input({{8}, element::i64, std::vector<int64_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
                          .expected({{8}, element::i64, std::vector<int64_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
                      Builder {}
                          .input({{8}, element::u32, std::vector<uint32_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
                          .expected({{8}, element::u32, std::vector<uint32_t> {0, 1, 2, 2, 2, 3, 5, 8}}),
                      Builder {}
                          .input({{8}, element::u64, std::vector<uint64_t> {1, 2, 3, 4, 5, 10, 100, 1000}})
                          .expected({{8}, element::u64, std::vector<uint64_t> {0, 1, 2, 2, 2, 3, 5, 8}})),
    ReferenceAcoshLayerTest::getTestCaseName);
 }  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp
@ -9,6 +9,8 @@
 using namespace InferenceEngine;
 namespace reference_tests {
 CommonReferenceTest::CommonReferenceTest(): targetDevice("TEMPLATE") {
    core = PluginCache::get().ie(targetDevice);
 }
@ -171,3 +173,5 @@ void CommonReferenceTest::ValidateBlobs(const InferenceEngine::Blob::Ptr& refBlo
        FAIL() << "Comparator for " << precision << " precision isn't supported";
    }
 }
 }  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp
+++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp
@ -5,8 +5,12 @@
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
 #include <ngraph/shape.hpp>
 #include <ngraph/type/element_type.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>
 namespace reference_tests {
 class CommonReferenceTest {
 public:
    CommonReferenceTest();
@ -51,3 +55,55 @@ InferenceEngine::Blob::Ptr CreateBlob(const ngraph::element::Type& element_type,
    return blob;
 }
 ///
 /// Class which should help to build data for single input
 ///
 struct Tensor {
    Tensor() = default;
    Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const InferenceEngine::Blob::Ptr& data): shape {shape}, type {type}, data {data} {}
    template <typename T>
    Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const std::vector<T>& data_elements)
        : Tensor {shape, type, CreateBlob(type, data_elements)} {}
    ngraph::Shape shape;
    ngraph::element::Type type;
    InferenceEngine::Blob::Ptr data;
 };
 ///
 /// Class which should helps build test parameters.
 ///
 /// e.g.:
 /// struct Params {
 ///     Tensor i,o;
 ///     int mul;
 /// };
 /// struct TestParamsBuilder : ParamsBuilder<Params>
 ///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, i);
 ///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, o);
 ///     REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, mul);
 /// };
 ///
 /// const Params p = TestParamsBuilder{}
 ///                  .i(Tensor{{0}, i32, {1}})
 ///                  .o(Tensor{{0}, i32, {1}})
 ///                  .mul(10);
 template <typename Params>
 class ParamsBuilder {
 protected:
    Params params;
 public:
    operator Params() const {
        return params;
    }
 };
 #define REFERENCE_TESTS_ADD_SET_PARAM(builder_type, param_to_set) \
    builder_type& param_to_set(decltype(params.param_to_set) t) { \
        params.param_to_set = std::move(t);                       \
        return *this;                                             \
    }
 }  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/convert.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp
@ -12,6 +12,7 @@
 #include "base_reference_test.hpp"
 using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;
--- a/docs/template_plugin/tests/functional/op_reference/grn.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/grn.cpp
@ -12,21 +12,22 @@
 #include "base_reference_test.hpp"
 using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;
 namespace {
 struct GrnParams {
    template <class IT>
-    GrnParams(const float bias, const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues,
+    GrnParams(const float bias, const PartialShape& shape, const element::Type& iType, const std::vector<IT>& iValues,
              const std::vector<IT>& oValues)
        : bias(bias), pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {}
    float bias;
-    ngraph::PartialShape pshape;
+    PartialShape pshape;
-    ngraph::element::Type inType;
+    element::Type inType;
-    ngraph::element::Type outType;
+    element::Type outType;
-    InferenceEngine::Blob::Ptr inputData;
+    Blob::Ptr inputData;
-    InferenceEngine::Blob::Ptr refData;
+    Blob::Ptr refData;
 };
 class ReferenceGrnLayerTest : public testing::TestWithParam<GrnParams>, public CommonReferenceTest {
@ -60,21 +61,21 @@ TEST_P(ReferenceGrnLayerTest, CompareWithHardcodedRefs) {
 }
 template <element::Type_t IN_ET>
-std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
+std::vector<GrnParams> generateGrnParams(const element::Type& type) {
    using T = typename element_type_traits<IN_ET>::value_type;
    std::vector<GrnParams> grnParams {
        // bias 1e-6 // 2D // 3D // 4D
-        GrnParams(1e-6, ngraph::PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(1e-6, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                  std::vector<T> {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}),
-        GrnParams(1e-6, ngraph::PartialShape {2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784,
                                  0.433574,  0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}),
-        GrnParams(1e-6, ngraph::PartialShape {1, 2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
                                  0.997055,  0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}),
-        GrnParams(1e-6, ngraph::PartialShape {2, 2, 3, 4}, type,
+        GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type,
                  std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                  std::vector<T> {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214,
@ -82,17 +83,17 @@ std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
                                  0.559857,  0.564684, 0.56921,  0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6,
                                  0.828589,  0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}),
        // bias 100.25 // 2D // 3D // 4D
-        GrnParams(100.25, ngraph::PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+        GrnParams(100.25, PartialShape {3, 4}, type, std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
                  std::vector<T> {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}),
-        GrnParams(100.25, ngraph::PartialShape {2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641,
                                  0.41125,   0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503,  0.547945, 0.664327, 0.662047, 0.659774, 0.657534}),
-        GrnParams(100.25, ngraph::PartialShape {1, 2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {1, 2, 3, 4}, type,
                  std::vector<T> {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
                  std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
                                  0.790789,  0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}),
-        GrnParams(100.25, ngraph::PartialShape {2, 2, 3, 4}, type,
+        GrnParams(100.25, PartialShape {2, 2, 3, 4}, type,
                  std::vector<T> {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48},
                  std::vector<T> {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229,  0.401596, 0.418994,
@ -103,9 +104,9 @@ std::vector<GrnParams> generateGrnParams(const ngraph::element::Type& type) {
 }
 std::vector<GrnParams> generateGrnCombinedParams() {
-    const std::vector<std::vector<GrnParams>> grnTypeParams {generateGrnParams<element::Type_t::bf16>(ngraph::element::bf16),
+    const std::vector<std::vector<GrnParams>> grnTypeParams {generateGrnParams<element::Type_t::bf16>(element::bf16),
-                                                             generateGrnParams<element::Type_t::f16>(ngraph::element::f16),
+                                                             generateGrnParams<element::Type_t::f16>(element::f16),
-                                                             generateGrnParams<element::Type_t::f32>(ngraph::element::f32)};
+                                                             generateGrnParams<element::Type_t::f32>(element::f32)};
    std::vector<GrnParams> combinedParams;
    std::for_each(grnTypeParams.begin(), grnTypeParams.end(), [&](std::vector<GrnParams> params) {
        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
--- a/docs/template_plugin/tests/functional/op_reference/mvn.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/mvn.cpp
@ -0,0 +1,254 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>
 #include <tuple>
 #include "base_reference_test.hpp"
 using namespace ngraph;
 using namespace InferenceEngine;
 using namespace reference_tests;
 // ------------------------------ V0 ------------------------------
 struct MVN1Params {
    MVN1Params(const Tensor& paramInput, const ngraph::AxisSet& paramReductionAxes, const bool paramAcrossChannels, const bool paramNormalizeVariance,
               const double paramEps, const Tensor& paramExpected)
        : input(paramInput),
          reductionAxes(paramReductionAxes),
          acrossChannels(paramAcrossChannels),
          normalizeVariance(paramNormalizeVariance),
          eps(paramEps),
          expected(paramExpected) {}
    Tensor input;
    ngraph::AxisSet reductionAxes;
    bool acrossChannels;
    bool normalizeVariance;
    double eps;
    Tensor expected;
 };
 class ReferenceMVN1LayerTest : public testing::TestWithParam<MVN1Params>, public CommonReferenceTest {
 public:
    void SetUp() override {
        auto params = GetParam();
        function = CreateFunction(params.input, params.reductionAxes, params.acrossChannels, params.normalizeVariance, params.eps);
        inputData = {params.input.data};
        refOutData = {params.expected.data};
    }
    static std::string getTestCaseName(const testing::TestParamInfo<MVN1Params>& obj) {
        auto param = obj.param;
        std::ostringstream result;
        result << "shape=" << param.input.shape;
        result << "_iType=" << param.input.type;
        if (!param.reductionAxes.empty()) {
            result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.to_vector());
        } else {
            result << "_acrossChannels=" << (param.acrossChannels ? "TRUE" : "FALSE");
        }
        result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE");
        result << "_eps=" << param.eps;
        return result.str();
    }
 private:
    static std::shared_ptr<Function> CreateFunction(const Tensor& input, const ngraph::AxisSet& reductionAxes, const bool acrossChannels,
                                                    const bool normalizeVariance, const double eps) {
        const auto in = std::make_shared<op::Parameter>(input.type, input.shape);
        auto mvn = std::make_shared<op::MVN>(in, acrossChannels, normalizeVariance, eps);
        if (!reductionAxes.empty()) {
            mvn = std::make_shared<op::MVN>(in, reductionAxes, normalizeVariance, eps);
        }
        return std::make_shared<Function>(NodeVector {mvn}, ParameterVector {in});
    }
 };
 TEST_P(ReferenceMVN1LayerTest, CompareWithHardcodedRefs) {
    Exec();
 }
 const ngraph::AxisSet emptyReductionAxes {};
 INSTANTIATE_TEST_SUITE_P(
    smoke_MVN1_With_Hardcoded_Refs, ReferenceMVN1LayerTest,
    ::testing::Values(
        // across_channels=false, variance=false
        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   emptyReductionAxes,
                   false,
                   false,
                   1e-9,
                   Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {-4, -3, -2, -1, 0,  1,  2,  3,  4, -4, -3, -2, -1, 0,
                                                                                   1,  2,  3,  4,  -4, -3, -2, -1, 0, 1,  2,  3,  4}}),
        // across_channels=true, variance=false
        MVN1Params(
            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}},
            emptyReductionAxes,
            true,
            false,
            1e-9,
            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}),
        // across_channels=false, variance=true
        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   emptyReductionAxes,
                   false,
                   true,
                   1e-9,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
        // across_channels=true, variance=true
        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   emptyReductionAxes,
                   true,
                   true,
                   1e-9,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
        // reductionAxes, variance=false
        MVN1Params(
            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}},
            {1, 2, 3},
            false,
            false,
            1e-9,
            Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector<float> {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}),
        // reductionAxes, variance=true
        MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   {2, 3},
                   false,
                   true,
                   1e-9,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})),
    ReferenceMVN1LayerTest::getTestCaseName);
 // ------------------------------ V6 ------------------------------
 struct MVN6Params {
    MVN6Params(const Tensor& paramInput, const Tensor& paramReductionAxes, const bool paramNormalizeVariance, const double paramEps,
               const ngraph::op::MVNEpsMode mode, const Tensor& paramExpected)
        : input(paramInput),
          reductionAxes(paramReductionAxes),
          normalizeVariance(paramNormalizeVariance),
          eps(paramEps),
          epsMode(mode),
          expected(paramExpected) {}
    Tensor input;
    Tensor reductionAxes;
    bool normalizeVariance;
    double eps;
    ngraph::op::MVNEpsMode epsMode;
    Tensor expected;
 };
 class ReferenceMVN6LayerTest : public testing::TestWithParam<MVN6Params>, public CommonReferenceTest {
 public:
    void SetUp() override {
        auto params = GetParam();
        function = CreateFunction(params.input, params.reductionAxes, params.normalizeVariance, params.eps, params.epsMode);
        inputData = {params.input.data};
        refOutData = {params.expected.data};
    }
    static std::string getTestCaseName(const testing::TestParamInfo<MVN6Params>& obj) {
        auto param = obj.param;
        std::ostringstream result;
        result << "shape=" << param.input.shape;
        result << "_iType=" << param.input.type;
        result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.shape);
        result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE");
        result << "_eps=" << param.eps;
        result << "_eps_mode=" << param.epsMode;
        return result.str();
    }
 private:
    static std::shared_ptr<Function> CreateFunction(const Tensor& input, const Tensor& reductionAxes, const bool normalizeVariance, const double eps,
                                                    const ngraph::op::MVNEpsMode epsMode) {
        std::vector<int64_t> dataVector(reductionAxes.shape[0]);
        const auto in = std::make_shared<op::Parameter>(input.type, input.shape);
        auto mRef = as<InferenceEngine::MemoryBlob>(reductionAxes.data);
        IE_ASSERT(mRef);
        const auto refLockMemory = mRef->rmap();
        const auto refBuffer = refLockMemory.as<const std::uint64_t*>();
        for (size_t i = 0; i < dataVector.size(); ++i) {
            dataVector[i] = refBuffer[i];
        }
        const auto axes = std::make_shared<op::Constant>(reductionAxes.type, reductionAxes.shape, dataVector);
        auto mvn = std::make_shared<op::v6::MVN>(in, axes, normalizeVariance, eps, epsMode);
        return std::make_shared<Function>(NodeVector {mvn}, ParameterVector {in});
    }
 };
 TEST_P(ReferenceMVN6LayerTest, CompareWithHardcodedRefs) {
    Exec();
 }
 INSTANTIATE_TEST_SUITE_P(
    smoke_MVN6_With_Hardcoded_Refs, ReferenceMVN6LayerTest,
    ::testing::Values(
        // variance=false, OUTSIDE_SQRT
        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
                   false,
                   1e-9,
                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
                   Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {-4, -3, -2, -1, 0,  1,  2,  3,  4, -4, -3, -2, -1, 0,
                                                                                   1,  2,  3,  4,  -4, -3, -2, -1, 0, 1,  2,  3,  4}}),
        // variance=true, OUTSIDE_SQRT
        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
                   true,
                   1e-9,
                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
        // variance=true, INSIDE_SQRT
        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float> {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
                   Tensor {Shape {2}, ngraph::element::i64, std::vector<int64_t> {2, 3}},
                   true,
                   1e-9,
                   ngraph::op::MVNEpsMode::INSIDE_SQRT,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}),
        // variance=true, another reductionAxes, OUTSIDE_SQRT
        MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
                                                                                   6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9})},
                   Tensor {Shape {3}, ngraph::element::i64, std::vector<int64_t>({1, 2, 3})},
                   true,
                   1e-9,
                   ngraph::op::MVNEpsMode::OUTSIDE_SQRT,
                   Tensor {{1, 3, 3, 3},
                           ngraph::element::f32,
                           std::vector<float> {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934,
                                               -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})),
    ReferenceMVN6LayerTest::getTestCaseName);
--- a/docs/template_plugin/tests/functional/op_reference/select.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/select.cpp
@ -12,6 +12,7 @@
 #include "base_reference_test.hpp"
 using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;
@ -33,12 +34,12 @@ struct SelectParams {
    element::Type data_type;
    op::AutoBroadcastSpec broadcast;
    PartialShape select_input_pshape;
-    InferenceEngine::Blob::Ptr select_input;
+    Blob::Ptr select_input;
    PartialShape if_input_pshape;
-    InferenceEngine::Blob::Ptr if_input;
+    Blob::Ptr if_input;
    PartialShape else_input_pshape;
-    InferenceEngine::Blob::Ptr else_input;
+    Blob::Ptr else_input;
-    InferenceEngine::Blob::Ptr expected_output;
+    Blob::Ptr expected_output;
 };
 class ReferenceSelectLayerTest : public testing::TestWithParam<SelectParams>, public CommonReferenceTest {
--- a/docs/template_plugin/tests/functional/op_reference/sign.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/sign.cpp
@ -12,19 +12,20 @@
 #include "base_reference_test.hpp"
 using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;
 struct SignParams {
    template <class IT, class OT>
-    SignParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const ngraph::element::Type& oType, const std::vector<IT>& iValues,
+    SignParams(const PartialShape& shape, const element::Type& iType, const element::Type& oType, const std::vector<IT>& iValues,
                  const std::vector<OT>& oValues)
        : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(oType, oValues)) {}
-    ngraph::PartialShape pshape;
+    PartialShape pshape;
-    ngraph::element::Type inType;
+    element::Type inType;
-    ngraph::element::Type outType;
+    element::Type outType;
-    InferenceEngine::Blob::Ptr inputData;
+    Blob::Ptr inputData;
-    InferenceEngine::Blob::Ptr refData;
+    Blob::Ptr refData;
 };
 class ReferenceSignLayerTest : public testing::TestWithParam<SignParams>, public CommonReferenceTest {
@ -59,22 +60,22 @@ TEST_P(ReferenceSignLayerTest, CompareWithHardcodedRefs) {
 INSTANTIATE_TEST_SUITE_P(
    smoke_Sign_With_Hardcoded_Refs, ReferenceSignLayerTest,
    ::testing::Values(
-        SignParams(ngraph::PartialShape {6}, ngraph::element::f32, ngraph::element::f32,
+        SignParams(PartialShape {6}, element::f32, element::f32,
                      std::vector<float> {1, -2, 0, -4.8f, 4.8f, -0.0f},
                      std::vector<float> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::f16, ngraph::element::f16,
+        SignParams(PartialShape {6}, element::f16, element::f16,
                      std::vector<float16> {1, -2, 0, -4.8f, 4.8f, -0.0f},
                      std::vector<float16> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::u64, ngraph::element::u64,
+        SignParams(PartialShape {6}, element::u64, element::u64,
                      std::vector<uint64_t> {1, 2, 0, 4, 4, 0},
                      std::vector<uint64_t> {1, 1, 0, 1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::u32, ngraph::element::u32,
+        SignParams(PartialShape {6}, element::u32, element::u32,
                      std::vector<uint32_t> {1, 2, 0, 4, 4, 0},
                      std::vector<uint32_t> {1, 1, 0, 1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::i32, ngraph::element::i32,
+        SignParams(PartialShape {6}, element::i32, element::i32,
                      std::vector<int32_t> {1, -2, 0, -4, 4, -0},
                      std::vector<int32_t> {1, -1, 0, -1, 1, 0}),
-        SignParams(ngraph::PartialShape {6}, ngraph::element::i64, ngraph::element::i64,
+        SignParams(PartialShape {6}, element::i64, element::i64,
                      std::vector<int64_t> {1, -2, 0, -4, 4, -0},
                      std::vector<int64_t> {1, -1, 0, -1, 1, 0})),
    ReferenceSignLayerTest::getTestCaseName);
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@ -6,14 +6,14 @@ include_guard(GLOBAL)
 set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
 set(VPU_SUPPORTED_FIRMWARES_HASH
-    "d55a824838accec31733e4d4c45e8774bdd5690da8beefe41360f1983476e3d0"
+    "54a732b5fb17a0124652bc5113fa628c718a5af40621bca309471cb5ffd9271b"
-    "61797a77b38fc677be4cc63d730e8871bbf169686b88eabb7066b01f9d156129")
+    "5750b2831c77ef54b8e243d3840c5ed1c9509681d55aee7e369d558cef628735")
 #
 # Default packages
 #
-set(FIRMWARE_PACKAGE_VERSION 1714)
+set(FIRMWARE_PACKAGE_VERSION 1717)
 set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
 #
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@ -95,6 +95,7 @@ Options:
    -layout                     Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
    -cache_dir "<path>"         Optional. Enables caching of loaded models to specified directory.
    -load_from_file             Optional. Loads model from file directly without ReadNetwork.
    -latency_percentile         Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
  CPU-specific performance options:
    -nstreams "<integer>"       Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@ -56,6 +56,10 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
                                                "Also, using nstreams>1 is inherently throughput-oriented option, "
                                                "while for the best-latency estimations the number of streams should be set to 1.";
 /// @brief message for latency percentile settings
 static const char infer_latency_percentile_message[] =
    "Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).";
 /// @brief message for enforcing of BF16 execution where it is possible
 static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced "
                                           "if supported by platform.\n"
@ -189,6 +193,9 @@ DEFINE_uint32(nthreads, 0, infer_num_threads_message);
 /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
 DEFINE_string(nstreams, "", infer_num_streams_message);
 /// @brief The percentile which will be reported in latency metric
 DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message);
 /// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability
 DEFINE_bool(enforcebf16, false, enforce_bf16_message);
@ -278,6 +285,7 @@ static void showUsage() {
    std::cout << "    -layout                   " << layout_message << std::endl;
    std::cout << "    -cache_dir \"<path>\"        " << cache_dir_message << std::endl;
    std::cout << "    -load_from_file           " << load_from_file_message << std::endl;
    std::cout << "    -latency_percentile       " << infer_latency_percentile_message << std::endl;
    std::cout << std::endl << "  device-specific performance options:" << std::endl;
    std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -52,6 +52,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
        throw std::logic_error("Model is required but not set. Please set -m option.");
    }
    if (FLAGS_latency_percentile > 100 || FLAGS_latency_percentile < 1) {
        showUsage();
        throw std::logic_error("The percentile value is incorrect. The applicable values range is [1, 100].");
    }
    if (FLAGS_api != "async" && FLAGS_api != "sync") {
        throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value.");
    }
@ -100,11 +104,10 @@ static void next_step(const std::string additional_info = "") {
 }
 template <typename T>
-T getMedianValue(const std::vector<T>& vec) {
+T getMedianValue(const std::vector<T>& vec, std::size_t percentile) {
    std::vector<T> sortedVec(vec);
    std::sort(sortedVec.begin(), sortedVec.end());
-    return (sortedVec.size() % 2 != 0) ? sortedVec[sortedVec.size() / 2ULL]
+    return sortedVec[(sortedVec.size() / 100) * percentile];
                                       : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
 }
 /**
@ -624,7 +627,7 @@ int main(int argc, char* argv[]) {
        // wait the latest inference executions
        inferRequestsQueue.waitAll();
-        double latency = getMedianValue<double>(inferRequestsQueue.getLatencies());
+        double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
        double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
        double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
@ -634,8 +637,14 @@ int main(int argc, char* argv[]) {
                                                                                         {"total number of iterations", std::to_string(iteration)},
                                                                                     });
            if (device_name.find("MULTI") == std::string::npos) {
                std::string latency_label;
                if (FLAGS_latency_percentile == 50) {
                    latency_label = "latency (ms)";
                } else {
                    latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
                }
                statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
-                                                                                             {"latency (ms)", double_to_string(latency)},
+                                                                                             {latency_label, double_to_string(latency)},
                                                                                         });
            }
            statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}});
@ -684,8 +693,15 @@ int main(int argc, char* argv[]) {
        std::cout << "Count:      " << iteration << " iterations" << std::endl;
        std::cout << "Duration:   " << double_to_string(totalDuration) << " ms" << std::endl;
-        if (device_name.find("MULTI") == std::string::npos)
+        if (device_name.find("MULTI") == std::string::npos) {
-            std::cout << "Latency:    " << double_to_string(latency) << " ms" << std::endl;
+            std::cout << "Latency";
            if (FLAGS_latency_percentile == 50) {
                std::cout << ":    ";
            } else {
                std::cout << " (" << FLAGS_latency_percentile << " percentile):    ";
            }
            std::cout << double_to_string(latency) << " ms" << std::endl;
        }
        std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl;
    } catch (const std::exception& ex) {
        slog::err << ex.what() << slog::endl;
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@ -12,7 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
    endif()
 endif()
-if(GPU_DEBUG_CONFIG)
+if(ENABLE_GPU_DEBUG_CAPS)
  add_definitions(-DGPU_DEBUG_CONFIG=1)
 endif()
--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@ -194,11 +194,11 @@ REGISTER_FACTORY(v5, LSTMSequence);
 //REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion
 REGISTER_FACTORY(v5, Round);
 REGISTER_FACTORY(v5, GatherND);
 REGISTER_FACTORY(v5, Loop);
 // ----------------------------- Unsupported v5 ops ----------------------------- //
 // REGISTER_FACTORY(v5, BatchNormInference);
 // REGISTER_FACTORY(v5, GRUSequence);
 // REGISTER_FACTORY(v5, Loop);
 // REGISTER_FACTORY(v5, RNNSequence);
 // ------------------------------ Supported v6 ops ------------------------------ //
--- a/inference-engine/src/cldnn_engine/ops/loop.cpp
+++ b/inference-engine/src/cldnn_engine/ops/loop.cpp
@ -0,0 +1,227 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "cldnn_program.h"
 #include "cldnn_common_utils.h"
 #include "cldnn_engine.h"
 #include <cpp/ie_cnn_network.h>
 #include "ngraph/op/loop.hpp"
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/util/sub_graph_base.hpp"
 #include "transformations/utils/utils.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "cldnn/primitives/loop.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
 #include "cldnn/primitives/data.hpp"
 #include "cldnn/primitives/reorder.hpp"
 #include "cldnn/graph/topology.hpp"
 #include <vector>
 #include <algorithm>
 using Loop = ngraph::op::v5::Loop;
 namespace CLDNNPlugin {
 template<class DATA_TYPE>
 static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
    *ptr.begin() = num;
    return {id, mem};
 }
 static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
                                                        const cldnn::primitive_id& id, const cldnn::primitive_id& input,
                                                        const int32_t output_idx) {
    const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
    const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
    const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
    auto mem = p.GetEngine().allocate_memory(output_layout);
    auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
    return md;
 }
 static void UpdateBackedge(std::vector<cldnn::loop::backedge_mapping>& back_edges,
                            const cldnn::primitive_id& old_primitive_id, const cldnn::primitive_id& new_primitive_id) {
    for (auto& back_edge : back_edges) {
        if (back_edge.from == old_primitive_id) {
            back_edge.from = new_primitive_id;
        }
    }
 }
 static std::string GetExternalInputName(const int64_t body_parameter_index,
                                        const std::shared_ptr<Loop>& op) {
    const auto& loop_input_descs = op->get_input_descriptions();
    for (const auto& loop_input_desc : loop_input_descs) {
        if (loop_input_desc->m_body_parameter_index == body_parameter_index) {
            auto external_node = op->get_input_node_shared_ptr(loop_input_desc->m_input_index);
            return layer_type_name_ID(external_node);
        }
    }
    return {""};
 }
 void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
    const std::string layerName = layer_type_name_ID(op);
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
    const auto& loop_input_descs = op->get_input_descriptions();
    const auto& loop_output_descs = op->get_output_descriptions();
    const auto& body_inputs = op->get_function()->get_parameters();
    const auto& body_outputs = op->get_function()->get_results();
    InferenceEngine::CNNNetwork body_network(op->get_function());
    auto networkInputs = body_network.getInputsInfo();
    auto networkOutputs = body_network.getOutputsInfo();
    // Set special body ports: current_iteration input , execution condition output
    auto special_body_ports = op->get_special_body_ports();
    std::string body_current_iteration_id;
    if (special_body_ports.current_iteration_input_idx >= 0) {
        auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx);
        body_current_iteration_id = layer_type_name_ID(current_iteration_input);
        std::string input_name = ngraph::op::util::create_ie_output_name(current_iteration_input);
        const auto networkInput = networkInputs.at(input_name);
        auto precision = InferenceEngine::details::convertPrecision(current_iteration_input->get_element_type());
        networkInput->setPrecision(precision);
    }
    cldnn::primitive_id body_execution_condition_id;
    if (special_body_ports.body_condition_output_idx >= 0) {
        auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0);
        body_execution_condition_id = layer_type_name_ID(body_condition_output);
        std::string output_name = ngraph::op::util::create_ie_output_name(body_condition_output);
        const auto networkOutput = networkOutputs.at(output_name);
        networkOutput->setPrecision(InferenceEngine::Precision::I64);
    }
    // get body topology from ngraph function
    Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true);
    auto body_topology = *body_program.GetTopology();
    // setup input_primitive_maps/ output_primitive_maps and back_edges
    std::vector<cldnn::loop::io_primitive_map> input_primitive_maps;
    std::vector<cldnn::loop::io_primitive_map> output_primitive_maps;
    std::vector<cldnn::loop::backedge_mapping> back_edges;
    // set input mapping & back edges
    for (const auto& loop_input_desc : loop_input_descs) {
        const cldnn::primitive_id& external_id = inputPrimitives.at(loop_input_desc->m_input_index);
        auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index);
        cldnn::primitive_id internal_id = layer_type_name_ID(body_input);
        // set input mapping
        if (const auto& sliceInfo =
            std::dynamic_pointer_cast<Loop::SliceInputDescription>(loop_input_desc)) {
            // sliced input
            input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis,
                sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride);
        } else {
            // input without slicing
            input_primitive_maps.emplace_back(external_id, internal_id);
        }
        // set back edges
        if (const auto& mergedInput =
            std::dynamic_pointer_cast<Loop::MergedInputDescription>(loop_input_desc)) {
            // backedge
            const auto& to = body_inputs.at(mergedInput->m_body_parameter_index);
            const auto& from = body_outputs.at(mergedInput->m_body_value_index);
            cldnn::primitive_id to_id = layer_type_name_ID(to);
            cldnn::primitive_id from_id = layer_type_name_ID(from);
            // reset output data type because the data types of the outputs of the
            // body topology are always FP32 regardless of ngraph data type
            {
                const auto from_prim = body_topology.at(from_id);
                const auto& to_ngraph_type = to->get_element_type();
                const auto to_cldnn_type = DataTypeFromPrecision(to_ngraph_type);
                from_prim->output_data_type = to_cldnn_type;
            }
            back_edges.emplace_back(from_id, to_id);
        }
    }
    // set trip count, initial execution condition, num iteration primitives
    // they should be mutable_data to prevent from being optimized out
    const cldnn::primitive_id trip_count_id = layer_type_name_ID(op->get_input_node_shared_ptr(0));
    const cldnn::primitive_id execution_condition_id = layer_type_name_ID(op->get_input_node_shared_ptr(1));
    const int64_t num_iterations = op->get_num_iterations();
    if (num_iterations < 0) {
        IE_THROW() << "loop's num_iteration cannot be negative";
    }
    const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
    {
        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
        p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
        p.primitiveIDs[num_iteration_id] = num_iteration_id;
        p.AddPrimitive(num_iteration);
        p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
    }
    // set output mapping
    for (const auto& loop_output_desc : loop_output_descs) {
        const uint64_t output_idx = loop_output_desc->m_output_index;
        // Add additional mutable_data for multiple outputs
        // primitive ID should be <TI primitive ID>.<output_idx> if output_idx > 0
        // otherwise primitive ID should be equals to TI primitive ID
        const std::string layerNameWithIndex = layerName + "." + std::to_string(output_idx);
        std::string external_id;
        if (output_idx > 0) {
            cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx);
            p.AddPrimitive(output_data);
            p.AddInnerPrimitiveToProfiler(layerNameWithIndex, layerName, op);
            p.primitiveIDs[layerNameWithIndex] = layerNameWithIndex;
            external_id = layerNameWithIndex;
        } else {
            p.primitiveIDs[layerNameWithIndex] = layerName;
            p.primitiveIDs[layerName] = layerName;
            external_id = layerName;
        }
        const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index);
        cldnn::primitive_id internal_id = layer_type_name_ID(body_output);
        // update primitive_map
        if (const auto& concatOutput =
            std::dynamic_pointer_cast<Loop::ConcatOutputDescription>(loop_output_desc)) {
            // output which requires concatenation
            output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis,
                concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride);
        }
        if (std::dynamic_pointer_cast<Loop::BodyOutputDescription>(loop_output_desc)) {
            // output which requires no concatenation
            output_primitive_maps.emplace_back(external_id, internal_id);
        }
    }
    const cldnn::loop loopPrimitive(
        layerName,              /* layer name of this primitive (output id) */
        inputPrimitives,        /* inputs of this layer */
        body_topology,          /* body network */
        trip_count_id,          /* trip_count data in outer network, always same as num_iterations in TI */
        execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */
        num_iteration_id,       /* actual number of iteration data in body network */
        input_primitive_maps,         /* input mappings connecting outer network and inner network */
        output_primitive_maps,        /* output mappings connecting outer network and inner network */
        back_edges,             /* back edge mapping */
        num_iterations,         /* max iteration, i.e. length of iteration axis */
        body_current_iteration_id,
        body_execution_condition_id);
    p.AddPrimitive(loopPrimitive);
    p.AddPrimitiveToProfiler(op);
 }
 REGISTER_FACTORY_IMPL(v5, Loop);
 }  // namespace CLDNNPlugin
--- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
@ -25,6 +25,7 @@ public:
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
    static bool canBeTransformedToGroupConvolution(const std::shared_ptr<const Node>& layer) noexcept;
    static bool isDynamicOrScalar(const std::shared_ptr<const Node>& node);
    void setGroupSize(const size_t groupSize);
    size_t getGroupSize() const;
--- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@ -72,7 +72,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
                         NetworkHelper::getDequantization(reshapeFromWeights);
        if (dequantization.empty()) {
            const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
-            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+            auto constantShape = fqOnWeights->input(1).get_partial_shape();
            if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) {
                return false;
            }
            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(
                fqOnWeights,
                false,
                (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul);
            if (reshapeFromWeights != nullptr) {
                resultConstant = fold_reshape<opset1::Reshape>(
                        resultConstant,
--- a/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp
@ -42,7 +42,15 @@ bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, n
        return false;
    }
-    const auto resultConstant = NetworkHelper::fold_fake_quantize(fakeQuantize, false);
+    const auto constantShape = fakeQuantize->input(1).get_partial_shape();
    if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) {
        return false;
    }
    std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(
        fakeQuantize,
        false,
        (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul);
    if (is_type<opset1::Constant>(resultConstant)) {
        replace_node(fakeQuantize, resultConstant);
        return true;
--- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
@ -47,6 +47,9 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
    }
    auto dequantization = NetworkHelper::getDequantization(multiply, inputIndex);
    if (dequantization.data.get_node() == nullptr) {
        return false;
    }
    if (dequantization.subtractConvert != nullptr) {
        dequantization = NetworkHelper::foldDequantization(multiply, inputIndex);
    }
@ -176,12 +179,6 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
        return false;
    }
    const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex);
    if (dequantization.empty()) {
        return false;
    }
    for (size_t i = 2; i < constShape.size(); ++i) {
        if (constShape[i] != 1) {
            return false;
@ -189,9 +186,13 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
    }
    if (updatePrecisions && restrictions.size() > 0) {
        const element::Type parentPrecision = dequantization.data.get_element_type();
        const auto& availablePreisions = restrictions[0].second;
        if (availablePreisions.empty()) {
            return false;
        }
        const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex);
        const element::Type parentPrecision = dequantization.data.get_element_type();
        if (std::find(availablePreisions.begin(), availablePreisions.end(), parentPrecision) == availablePreisions.end()) {
            return false;
        }
@ -221,6 +222,35 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolutio
    return (pShape.rank().get_length() == 4ul) || (pShape.rank().get_length() == 5ul);
 }
 bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr<const Node>& node) {
    auto getConstantIndex = [](const std::shared_ptr<const Node>& node) -> int {
        if (is_type<opset1::Constant>(node->get_input_node_shared_ptr(1))) {
            return 1;
        }
        if (is_type<opset1::Constant>(node->get_input_node_shared_ptr(0))) {
            return 0;
        }
        return -1;
    };
    const int constantIndex = getConstantIndex(node);
    if (constantIndex == -1) {
        return false;
    }
    const Input<const Node> constantInput = node->input(constantIndex);
    const auto shape = constantInput.get_partial_shape();
    if (shape.is_dynamic() || shape.rank().is_dynamic()) {
        return true;
    }
    if (std::all_of(shape.begin(), shape.end(), [](const Dimension& dimension) { return dimension == 1ul; })) {
        return true;
    }
    return false;
 }
 void MultiplyToGroupConvolutionTransformation::setGroupSize(const size_t groupSize) {
    this->groupSize = groupSize;
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -357,6 +357,9 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>([](const_node_ptr& node) -> bool {
            return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
        });
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>([](const_node_ptr& node) -> bool {
            return MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node);
        });
        lptManager.run_passes(nGraphFunc);
    }
--- a/inference-engine/src/vpu/CMakeLists.txt
+++ b/inference-engine/src/vpu/CMakeLists.txt
@ -20,6 +20,9 @@ if(ENABLE_MYRIAD)
        install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/custom_kernels/
                DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels
                COMPONENT myriad)
        install(DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vpu_custom_kernels/
                DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels
                COMPONENT myriad)
        install(DIRECTORY ${VPU_CLC_MA2X8X_ROOT}/
                DESTINATION deployment_tools/tools/cl_compiler
                COMPONENT myriad
--- a/inference-engine/src/vpu/graph_transformer/src/stages/mvn.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/mvn.cpp
@ -48,10 +48,12 @@ private:
    void serializeParamsImpl(BlobSerializer& serializer) const override {
        auto normalize = attrs().get<int>("normalize");
        auto across_channels = attrs().get<int>("across_channels");
        auto across_width = attrs().get<int>("across_width");
        auto eps = attrs().get<float>("eps");
        serializer.append(static_cast<int32_t>(normalize));
        serializer.append(static_cast<int32_t>(across_channels));
        serializer.append(static_cast<int32_t>(across_width));
        serializer.append(static_cast<float>(eps));
    }
@ -88,11 +90,13 @@ void FrontEnd::parseMVN(const Model& model, const ie::CNNLayerPtr& layer, const
    for (int i = 0; i < indicesSize; i++) {
        axes.insert(getDimFromAxis(ndims, indicesPtr[i]));
    }
    const auto width = axes.count(Dim::W);
-    VPU_THROW_UNLESS(!axes.count(Dim::N) && axes.count(Dim::H) && axes.count(Dim::W),
+    VPU_THROW_UNLESS(!axes.count(Dim::N) && width,
                     "Unsupported combination of indices in layer \"%s\". "
-                     "Only across channel and full batch supported.", layer->name);
+                     "Only across channel, width and full batch supported.", layer->name);
    const auto acrossChannels = axes.count(Dim::C) != 0;
    const auto acrossWidth = width == 1 && axes.count(Dim::H) == 0;
    const auto normVariance = layer->GetParamAsBool("normalize_variance");
    const auto eps = layer->GetParamAsFloat("eps");
@ -104,6 +108,7 @@ void FrontEnd::parseMVN(const Model& model, const ie::CNNLayerPtr& layer, const
    auto stage = model->addNewStage<MVNStage>(layer->name, StageType::MVN, layer, inputs, outputs);
    stage->attrs().set<int>("normalize", normVariance);
    stage->attrs().set<int>("across_channels", acrossChannels);
    stage->attrs().set<int>("across_width", acrossWidth);
    stage->attrs().set<float>("eps", eps);
 }
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_backprop_data_transformation.cpp
@ -24,6 +24,13 @@ using namespace testing;
 using namespace ngraph;
 using namespace ngraph::pass;
 using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
 using callback_function_type = std::function<bool(const_node_ptr&)>;
 bool empty_callback(const std::shared_ptr<const ngraph::Node>& node) {
    return false;
 }
 class ConvolutionBackpropDataTransformationTestValues {
 public:
    class Actual {
@ -33,26 +40,31 @@ public:
        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
        builder::subgraph::DequantizationOperations dequantizationOnWeights;
        std::shared_ptr<ngraph::opset1::Constant> weights;
        callback_function_type callback;
        Actual() = default;
        Actual(
            const ngraph::element::Type& precisionBeforeDequantization,
            const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
            const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
-            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+            const std::shared_ptr<ngraph::opset1::Constant>& weights,
            const callback_function_type& callback = empty_callback) :
                precisionBeforeDequantization(precisionBeforeDequantization),
                dequantizationOnActivations(dequantizationOnActivations),
                fakeQuantizeOnWeights(fakeQuantizeOnWeights),
-                weights(weights) {}
+                weights(weights),
                callback(callback) {}
        Actual(
            const  ngraph::element::Type& precisionBeforeDequantization,
            const  ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
            const  builder::subgraph::DequantizationOperations& dequantizationOnWeights,
-            const std::shared_ptr<ngraph::opset1::Constant>& weights) :
+            const std::shared_ptr<ngraph::opset1::Constant>& weights,
            const callback_function_type& callback = empty_callback) :
            precisionBeforeDequantization(precisionBeforeDequantization),
            dequantizationOnActivations(dequantizationOnActivations),
            dequantizationOnWeights(dequantizationOnWeights),
-            weights(weights) {}
+            weights(weights),
            callback(callback) {}
    };
    class Expected {
@ -124,10 +136,11 @@ public:
                actualWeights);
        SimpleLowPrecisionTransformer transform;
-        transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
+        transform.add<low_precision::ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(testValues.params);
        transform.get_pass_config()->set_callback<low_precision::ConvolutionBackpropDataTransformation>(testValues.actual.callback);
        transform.transform(actualFunction);
-        std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
+        std::shared_ptr<Node> refWeights = low_precision::fold<opset1::Broadcast>(
                testValues.expected.weights,
                opset1::Constant::create(
                        element::i64,
@ -179,7 +192,7 @@ public:
 TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
    actualFunction->validate_nodes_and_infer_types();
-    auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
+    auto res = compare_functions(referenceFunction, actualFunction, true, true, false);
    ASSERT_TRUE(res.first) << res.second;
 }
@ -455,6 +468,27 @@ const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues =
            true
        }
    },
    //  issue #59593: subtract on activations, non-asymmetric
    {
        LayerTransformation::createParamsU8I8(),
        // ActualValues
        {
            ngraph::element::u8,
            {{ngraph::element::f32}, {128.f}, {0.01f}},
            { 255ul, Shape({ 1, 2, 1, 1 }), { 0.f }, { 254.f }, { 0.f }, { 25.4f } },
            op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
            low_precision::LayerTransformation::isAsymmetricQuantization
        },
        // ExpectedValues
        {
            ngraph::element::u8,
            {{ngraph::element::f32}, {128.f}, {0.01f}},
            {},
            {},
            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
            false // weights are not folded because of callback returning true
        }
    },
 };
 INSTANTIATE_TEST_SUITE_P(
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
@ -71,7 +71,7 @@ public:
        SimpleLowPrecisionTransformer transform;
        transform.add<ngraph::pass::low_precision::ConvolutionTransformation, ngraph::opset1::Convolution>(testValues.params);
        if (testValues.params.supportAsymmetricQuantization == false) {
-            transform.set_callback<ngraph::pass::low_precision::ConvolutionTransformation>(
+            transform.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::LayerTransformation::isAsymmetricQuantization(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp
@ -84,7 +84,7 @@ public:
        SimpleLowPrecisionTransformer transform;
        transform.add<ngraph::pass::low_precision::GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(testValues.params);
        if (testValues.params.supportAsymmetricQuantization == false) {
-            transform.set_callback<ngraph::pass::low_precision::GroupConvolutionTransformation>(
+            transform.get_pass_config()->set_callback<ngraph::pass::low_precision::GroupConvolutionTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::LayerTransformation::isAsymmetricQuantization(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/mat_mul_with_constant_transformation.cpp
@ -105,7 +105,7 @@ public:
        SimpleLowPrecisionTransformer transformer;
        transformer.add<ngraph::pass::low_precision::MatMulTransformation, ngraph::opset1::MatMul>(testValues.params);
        if (testValues.params.support3DTensorOnActivations == false) {
-            transformer.set_callback<ngraph::pass::low_precision::MatMulTransformation>(
+            transformer.get_pass_config()->set_callback<ngraph::pass::low_precision::MatMulTransformation>(
                [](const std::shared_ptr<const ngraph::Node>& node) -> bool {
                    return ngraph::pass::low_precision::MatMulTransformation::is3DTensorOnActivations(node);
                });
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.cpp
@ -21,9 +21,10 @@ using namespace ngraph::pass;
 SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
    const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions,
    const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions) {
    auto passConfig = get_pass_config();
    // TODO: use one pass manager
-    markup = std::make_shared<ngraph::pass::Manager>();
+    markup = std::make_shared<ngraph::pass::Manager>(passConfig);
    markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>();
    markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions);
    markup->register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
@ -32,15 +33,20 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
    markup->register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>();
    markup->register_pass<ngraph::pass::low_precision::AlignQuantizationParameters>();
-    common = std::make_shared<ngraph::pass::Manager>();
+    common = std::make_shared<ngraph::pass::Manager>(passConfig);
    commonGraphRewrite = common->register_pass<ngraph::pass::GraphRewrite>();
    cleanup = common->register_pass<ngraph::pass::GraphRewrite>();
 }
 void SimpleLowPrecisionTransformer::transform(std::shared_ptr<ngraph::Function>& function) {
    run_on_function(function);
 }
 bool SimpleLowPrecisionTransformer::run_on_function(std::shared_ptr<ngraph::Function> function) {
    ngraph::pass::low_precision::TypeRelaxedReplacer pass;
    pass.run_on_function(function);
    markup->run_passes(function);
    common->run_passes(function);
    return true;
 }
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/simple_low_precision_transformer.hpp
@ -14,7 +14,7 @@
 #include "low_precision/common/operation_precision_restriction.hpp"
 #include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"
-class SimpleLowPrecisionTransformer {
+class SimpleLowPrecisionTransformer : public ngraph::pass::FunctionPass{
 public:
    SimpleLowPrecisionTransformer(
        const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions = {},
@ -25,12 +25,8 @@ public:
        commonGraphRewrite->add_matcher<T>(TestTransformationParams::toParams(params));
    }
    template <class T>
    void set_callback(const std::function<bool(const std::shared_ptr<const ::ngraph::Node>)>& callback) {
        common->get_pass_config()->set_callback<T>(callback);
    }
    void transform(std::shared_ptr<ngraph::Function>& function);
    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
    std::shared_ptr<ngraph::pass::Manager> markup;
    std::shared_ptr<ngraph::pass::Manager> common;
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
@ -36,6 +36,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
        {Acosh,                 {}},
        {Asin,                  {}},
        {Asinh,                  {}},
        {Atan,                  {}},
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/mvn.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/mvn.cpp
@ -17,22 +17,34 @@ const std::vector<bool> normalizeVariance = {true, false};
 const std::vector<std::vector<size_t>> inputShapes = {{1, 10, 5, 7, 8},
                                                      {1, 3, 8, 9, 49}};
 const std::vector<ngraph::AxisSet> axes = {{1, 2, 3}, {2, 3}};
 const std::vector<bool> acrossChannels = {true, false};
 const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
 const std::vector<bool> emptyAcrossChannels = {{}};
 const std::vector<double> epsilon = {0.000000001};
-const auto MvnCases = ::testing::Combine(
+const auto MvnAcrossChannels = ::testing::Combine(
    ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(dataPrecisions),
-    ::testing::ValuesIn(acrossChannels), ::testing::ValuesIn(normalizeVariance),
+    ::testing::ValuesIn(emptyReductionAxes), ::testing::ValuesIn(acrossChannels),
-    ::testing::ValuesIn(epsilon),
+    ::testing::ValuesIn(normalizeVariance), ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU));
-TEST_P(MvnLayerTest, Serialize) {
+const auto MvnReductionAxes = ::testing::Combine(
    ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(dataPrecisions),
    ::testing::ValuesIn(axes), ::testing::ValuesIn(emptyAcrossChannels),
    ::testing::ValuesIn(normalizeVariance), ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU));
 TEST_P(Mvn1LayerTest, Serialize) {
    Serialize();
 }
-INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN, MvnLayerTest, MvnCases,
+INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_across_channels, Mvn1LayerTest, MvnAcrossChannels,
-                        MvnLayerTest::getTestCaseName);
+                        Mvn1LayerTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_reduction_axes, Mvn1LayerTest, MvnReductionAxes,
                        Mvn1LayerTest::getTestCaseName);
 // ------------------- MVN-6 -------------------------------------------------
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -12,14 +12,47 @@ const std::vector<element::Type> precisions = {
    element::f32
 };
-const std::vector< ngraph::PartialShape > inputShapes = {
+namespace shape4d {
-    { 1ul, 4ul, 16ul, 16ul }, { 1ul, 4ul, 16ul, 16ul, 16ul }
+const std::vector<ngraph::PartialShape> inputShapes = {
    { 1ul, 3ul, 16ul, 16ul },
    { 4ul, 3ul, 16ul, 16ul }
 };
-const std::vector<builder::subgraph::FakeQuantizeOnData> fqOnData = {
+const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+    {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 10.f }, { 25.5f } },
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { -12.8f }, { 12.7f } }
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // Multiply with scalar is not transformed to GroupConvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{1, 1, 1, 1}},
        "output/GroupConvolution",
        ""
    },
    // Multiply with scalar is not transformed to GroupConvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{}},
        "output/GroupConvolution",
        ""
    },
    // Zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // Zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "U8"
    }
 };
 INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
@ -27,6 +60,59 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
        ::testing::ValuesIn(precisions),
        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::ValuesIn(fqOnData)),
+        ::testing::ValuesIn(params)),
    MultiplyToGroupConvolutionTransformation::getTestCaseName);
 }  // namespace shape4d
 namespace shape5d {
 const std::vector<ngraph::PartialShape> inputShapes = {
    { 1ul, 3ul, 16ul, 16ul, 16ul },
    { 4ul, 3ul, 16ul, 16ul, 16ul }
 };
 const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // Multiply with scalar is not transformed to GroupConvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{1, 1, 1, 1, 1}},
        "output/GroupConvolution",
        ""
    },
    // Multiply with scalar is not transformed to GroupConvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{}},
        "output/GroupConvolution",
        ""
    },
    // Zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // Zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1, 1}},
        "output/GroupConvolution",
        "U8"
    }
 };
 INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(precisions),
         ::testing::ValuesIn(inputShapes),
         ::testing::Values(CommonTestUtils::DEVICE_CPU),
         ::testing::ValuesIn(params)),
     MultiplyToGroupConvolutionTransformation::getTestCaseName);
 }  // namespace shape5d
 }  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -37,6 +37,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
        {Acosh,                  {}},
        {Asin,                  {}},
        {Asinh,                 {}},
        {Atan,                  {}},
@ -66,6 +67,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
 // List of operations that should be tested also with integer precision
 const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTypes = {
        {Acosh,                 {}},
        {Asinh,                 {}},
        {Atan,                  {}},
        {Negative,              {}},
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
@ -56,7 +56,8 @@ namespace {
                                    Values<int64_t>(7),
                                    Values<InferenceEngine::SizeVector>({2, 1, 4}),
                                    Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
-                                    Values(CommonTestUtils::DEVICE_CPU)));
+                                    Values(CommonTestUtils::DEVICE_CPU),
                                    Values<std::map<std::string, std::string>>({})));
    using namespace testing;
    INSTANTIATE_TEST_SUITE_P(smoke_TrivialLoop, TrivialLoopTest,
                            Combine(
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp
@ -9,6 +9,9 @@
 using namespace LayerTestsDefinitions;
 const std::vector<bool> emptyAcrossChannels = {{}};
 const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
 const std::vector<std::vector<size_t>> inputShapes = {
    {8},
    {1, 16},
@ -41,23 +44,35 @@ const std::vector<double> epsilon = {
    0.000000001
 };
-const auto MvnCases = ::testing::Combine(
+std::vector<InferenceEngine::Precision> dataPrecisions = {
        InferenceEngine::Precision::FP16,
        InferenceEngine::Precision::FP32
 };
 const auto MvnAcrossChannels = ::testing::Combine(
    ::testing::ValuesIn(inputShapes),
-    ::testing::Values(InferenceEngine::Precision::FP32),
+    ::testing::ValuesIn(dataPrecisions),
    ::testing::ValuesIn(emptyReductionAxes),
    ::testing::ValuesIn(acrossChannels),
    ::testing::ValuesIn(normalizeVariance),
    ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
-INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN, MvnLayerTest, MvnCases, MvnLayerTest::getTestCaseName);
+const auto MvnReductionAxes = ::testing::Combine(
    ::testing::ValuesIn(std::vector<std::vector<size_t>>{{1, 10, 5, 17}, {1, 3, 8, 9}}),
    ::testing::Values(InferenceEngine::Precision::FP32),
    ::testing::ValuesIn(std::vector<ngraph::AxisSet>{{1, 2, 3}, {2, 3}}),
    ::testing::ValuesIn(emptyAcrossChannels),
    ::testing::ValuesIn(normalizeVariance),
    ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );
 INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_AcrossChannels, Mvn1LayerTest, MvnAcrossChannels, Mvn1LayerTest::getTestCaseName);
 INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_ReductionAxes, Mvn1LayerTest, MvnReductionAxes, Mvn1LayerTest::getTestCaseName);
 std::vector<InferenceEngine::Precision> dataPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16
 };
 std::vector<InferenceEngine::Precision> idxPrecisions = {
    InferenceEngine::Precision::I32,
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
@ -13,7 +13,7 @@ using namespace CPUTestUtils;
 namespace CPULayerTestsDefinitions {
 typedef std::tuple<
-        LayerTestsDefinitions::mvnParams,
+        LayerTestsDefinitions::mvn1Params,
        CPUSpecificParams,
        fusingSpecificParams,
        Precision, // CNNNetwork input precision
@ -24,14 +24,14 @@ class MvnLayerCPUTest : public testing::WithParamInterface<MvnLayerCPUTestParamS
                        virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<MvnLayerCPUTestParamSet> obj) {
-        LayerTestsDefinitions::mvnParams basicParamsSet;
+        LayerTestsDefinitions::mvn1Params basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        Precision inputPrecision, outputPrecision;
        std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param;
        std::ostringstream result;
-        result << LayerTestsDefinitions::MvnLayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvnParams>(
+        result << LayerTestsDefinitions::Mvn1LayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvn1Params>(
                basicParamsSet, 0));
        result << "_" << "CNNInpPrc=" << inputPrecision.name();
@ -45,7 +45,7 @@ public:
    }
 protected:
    void SetUp() override {
-        LayerTestsDefinitions::mvnParams basicParamsSet;
+        LayerTestsDefinitions::mvn1Params basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam();
@ -55,13 +55,17 @@ protected:
        InferenceEngine::SizeVector inputShapes;
        InferenceEngine::Precision netPrecision;
        ngraph::AxisSet axes;
        bool acrossChanels, normalizeVariance;
        double eps;
-        std::tie(inputShapes, netPrecision, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
+        std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
        auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto param = ngraph::builder::makeParams(netPrc, {inputShapes});
        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
        auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps);
        if (!axes.empty()) {
             mvn = ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps);
        }
        selectedType = getPrimitiveType() + "_" + inPrc.name();
@ -128,6 +132,8 @@ const std::vector<double> epsilon = {
        0.000000001
 };
 const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
 std::vector<Precision> inpPrc = {Precision::I8, Precision::BF16, Precision::FP32};
 std::vector<Precision> outPrc = {Precision::BF16, Precision::FP32};
@ -162,6 +168,7 @@ const auto Mvn3D = ::testing::Combine(
        ::testing::Combine(
            ::testing::ValuesIn(inputShapes_3D),
            ::testing::Values(InferenceEngine::Precision::FP32),
            ::testing::ValuesIn(emptyReductionAxes),
            ::testing::ValuesIn(acrossChannels),
            ::testing::ValuesIn(normalizeVariance),
            ::testing::ValuesIn(epsilon),
@ -177,6 +184,7 @@ const auto Mvn4D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_4D),
                ::testing::Values(InferenceEngine::Precision::FP32),
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -192,6 +200,7 @@ const auto Mvn5D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_5D),
                ::testing::Values(InferenceEngine::Precision::FP32),
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -216,6 +225,7 @@ const auto Mvn1D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_1D),
                ::testing::Values(InferenceEngine::Precision::FP32),
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -232,6 +242,7 @@ const auto Mvn2D = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_2D),
                ::testing::Values(InferenceEngine::Precision::FP32),
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(false),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
@ -248,6 +259,7 @@ const auto Mvn2DTrans = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(inputShapes_2D),
                ::testing::Values(InferenceEngine::Precision::FP32),
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(true),
                ::testing::ValuesIn(normalizeVariance),
                ::testing::ValuesIn(epsilon),
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -9,18 +9,50 @@ using namespace InferenceEngine::details;
 namespace {
 const std::vector<element::Type> precisions = {
-    element::f32,
+    element::f32
    element::f16
 };
-const std::vector<ngraph::PartialShape>inputShapes = {
+namespace shape4d {
-    { 1ul, 4ul, 16ul, 16ul }, { 1ul, 4ul, 16ul, 16ul, 16ul }
+const std::vector<ngraph::PartialShape> inputShapes = {
    { 1ul, 3ul, 16ul, 16ul },
    { 4ul, 3ul, 16ul, 16ul }
 };
-const std::vector<builder::subgraph::FakeQuantizeOnData> fqOnData = {
+const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+    {
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 10.f }, { 25.5f } },
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
-    { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { -12.8f }, { 12.7f } }
+        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // Multiply with scalar is transformed to GroupConvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{1, 1, 1, 1}},
        "output/GroupConvolution",
        "U8"
    },
    // multiply with scalar is transformed to groupconvolution
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
        {{4.f}, element::f32, Shape{}},
        "output/GroupConvolution",
        "U8"
    },
    // zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "I8"
    },
    // zero point
    {
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
        {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
        "output/GroupConvolution",
        "U8"
    }
 };
 INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
@ -28,6 +60,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
        ::testing::ValuesIn(precisions),
        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_GPU),
-        ::testing::ValuesIn(fqOnData)),
+        ::testing::ValuesIn(params)),
    MultiplyToGroupConvolutionTransformation::getTestCaseName);
 }  // namespace shape4d
 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -34,6 +34,7 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Clamp,                 {{-2.0f, 2.0f}}},
        {Negative,              {}},
        {Acos,                  {}},
        {Acosh,                 {}},
        {Asin,                  {}},
        {Asinh,                  {}},
        {Atan,                  {}},
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mvn.cpp
@ -9,6 +9,8 @@
 using namespace LayerTestsDefinitions;
 const std::vector<ngraph::AxisSet> emptyReductionAxes = {{}};
 const std::vector<std::vector<size_t>> inputShapes = {
    {1, 32, 17},
    {1, 37, 9},
@ -41,13 +43,14 @@ const std::vector<double> epsilon = {
 const auto MvnCases = ::testing::Combine(
    ::testing::ValuesIn(inputShapes),
    ::testing::Values(InferenceEngine::Precision::FP32),
    ::testing::ValuesIn(emptyReductionAxes),
    ::testing::ValuesIn(acrossChannels),
    ::testing::ValuesIn(normalizeVariance),
    ::testing::ValuesIn(epsilon),
    ::testing::Values(CommonTestUtils::DEVICE_GPU)
 );
-INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, MvnLayerTest, MvnCases, MvnLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CLDNN_TestsMVN, Mvn1LayerTest, MvnCases, Mvn1LayerTest::getTestCaseName);
 std::vector<InferenceEngine::Precision> dataPrecisions = {
    InferenceEngine::Precision::FP32,
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@ -58,6 +58,9 @@ std::vector<std::string> disabledTestPatterns() {
            R"(.*IEClassImportExportTestP.*)",
            // TODO: Issue: 59586, NormalizeL2 output mismatch for empty axes case
-            R"(.*NormalizeL2LayerTest.*axes=\(\).*)"
+            R"(.*NormalizeL2LayerTest.*axes=\(\).*)",
            // Not allowed dynamic loop tests on GPU
            R"(.*smoke_StaticShapeLoop_dynamic_exit.*)"
    };
 }
--- a/inference-engine/tests/functional/plugin/gpu/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/single_layer_tests/loop.cpp
@ -0,0 +1,140 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <tuple>
 #include <string>
 #include <vector>
 #include <memory>
 #include "single_layer_tests/loop.hpp"
 #include "common_test_utils/test_constants.hpp"
 using namespace LayerTestsDefinitions;
 using namespace InferenceEngine;
 namespace {
    std::vector<InferenceEngine::Precision> netPrecisions = {
        InferenceEngine::Precision::FP32,
        InferenceEngine::Precision::I32
    };
    std::map<std::string, std::string> netConfigurations = {
        {GPUConfigParams::KEY_GPU_ENABLE_LOOP_UNROLLING, PluginConfigParams::NO}
    };
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_axis_0 {
        //  GCC4.8 limitation: have to specify type of each element in list
        //                               static_trip_count |  max | dynamic_exit | axis
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1, 0 },  // n_iter 10, no dynamic exit
    };
    std::vector<InferenceEngine::SizeVector> inputs_0 = {
        {1, 4, 2}
    };
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_0, StaticShapeLoopTest,
                            testing::Combine(
                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
                            /* static_continue_cond */ testing::Values(true),
                            /* args_papck */ testing::ValuesIn(static_loop_types_axis_0),
                            /* start_value */ testing::Values<int64_t>(0),
                            /* data_shape */ testing::ValuesIn(inputs_0),
                            /* data_prc */ testing::ValuesIn(netPrecisions),
                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
                            StaticShapeLoopTest::getTestCaseName);
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_1 {
        //  GCC4.8 limitation: have to specify type of each element in list
        //                               static_trip_count |  max | dynamic_exit | axis
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5, -1,  1 },  // n_iter 5, no dynamic exit
    };
    std::vector<InferenceEngine::SizeVector> inputs_1 = {
        {2, 1, 4, 6}
    };
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_1, StaticShapeLoopTest,
                            testing::Combine(
                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
                            /* static_continue_cond */ testing::Values(true),
                            /* args_papck */ testing::ValuesIn(static_loop_types_1),
                            /* start_value */ testing::Values<int64_t>(0),
                            /* data_shape */ testing::ValuesIn(inputs_1),
                            /* data_prc */ testing::ValuesIn(netPrecisions),
                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
                            StaticShapeLoopTest::getTestCaseName);
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_2 {
        //  GCC4.8 limitation: have to specify type of each element in list
        //                               static_trip_count |  max | dynamic_exit | axis
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1,  2 },  // n_iter 10, no dynamic exit
    };
    std::vector<InferenceEngine::SizeVector> inputs_2 = {
        {2, 4, 1, 6}
    };
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_axis_2, StaticShapeLoopTest,
                            testing::Combine(
                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
                            /* static_continue_cond */ testing::Values(true),
                            /* args_papck */ testing::ValuesIn(static_loop_types_2),
                            /* start_value */ testing::Values<int64_t>(0),
                            /* data_shape */ testing::ValuesIn(inputs_2),
                            /* data_prc */ testing::ValuesIn(netPrecisions),
                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
                            StaticShapeLoopTest::getTestCaseName);
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_no_auto_concat {
        //  GCC4.8 limitation: have to specify type of each element in list
        //                               static_trip_count |  max | dynamic_exit | axis
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  10, -1, -1 },  // n_iter 5, no dynamic exit
    };
    std::vector<InferenceEngine::SizeVector> inputs_no_auto_concat = {
        {4, 20, 12}
    };
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_no_auto_concat, StaticShapeLoopTest,
                            testing::Combine(
                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
                            /* static_continue_cond */ testing::Values(true),
                            /* args_papck */ testing::ValuesIn(static_loop_types_no_auto_concat),
                            /* start_value */ testing::Values<int64_t>(0),
                            /* data_shape */ testing::ValuesIn(inputs_no_auto_concat),
                            /* data_prc */ testing::ValuesIn(netPrecisions),
                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
                            StaticShapeLoopTest::getTestCaseName);
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types_dynamic_exit {
        //  GCC4.8 limitation: have to specify type of each element in list
        //                               static_trip_count |  max | dynamic_exit | axis
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  3,  -1 },  // n_iter 3, dynamic exit on 3
        std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  7,   1 },  // n_iter 5, dynamic exit not reached
        std::tuple<bool, int64_t, int64_t, int64_t>{  true , -1,  5,  -1 },  // n_iter 5, inf loop with dynamic exit on 5
        std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  3,  -1 },  // | same with dynamic trip count
        std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  7,   1 },  // |
        std::tuple<bool, int64_t, int64_t, int64_t>{ false , -1,  5,  -1 }   // |
    };
    std::vector<InferenceEngine::SizeVector> inputs_dynamic_exit = {
        {4, 1, 2}
    };
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop_dynamic_exit, StaticShapeLoopTest,
                            testing::Combine(
                            /* unrolling */ testing::ValuesIn(std::vector<bool>{false}),
                            /* static_continue_cond */ testing::Values(true),
                            /* args_papck */ testing::ValuesIn(static_loop_types_dynamic_exit),
                            /* start_value */ testing::Values<int64_t>(0),
                            /* data_shape */ testing::ValuesIn(inputs_dynamic_exit),
                            /* data_prc */ testing::ValuesIn(netPrecisions),
                            /* device */ testing::Values<std::string>(CommonTestUtils::DEVICE_GPU),
                            /* configuration */ testing::Values<std::map<std::string, std::string>>(netConfigurations)),
                            StaticShapeLoopTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/mvn.cpp
@ -15,6 +15,8 @@ const std::vector<std::vector<int>> indices_4D = {
 };
 const std::vector<std::vector<int>> indices_3D = {
        {2},
        {0, 2},
        {1, 2},     // equivalent MVN-1 across_channel=0
        {0, 1, 2}   // equivalent MVN-1 across_channel=1
 };
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp
@ -8,6 +8,7 @@
 #include <memory>
 #include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
 #include "lpt_ngraph_functions/common/constant.hpp"
 #include "lpt_ngraph_functions/common/dequantization_operations.hpp"
 #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
@ -15,11 +16,19 @@ using namespace ngraph;
 namespace LayerTestsDefinitions {
 class MultiplyToGroupConvolutionTransformationParam {
 public:
    builder::subgraph::FakeQuantizeOnData fqOnData;
    builder::subgraph::Constant constant;
    std::string layerName;
    std::string expectedKernelType;
 };
 typedef std::tuple <
    element::Type,
    PartialShape,
    std::string,
-    builder::subgraph::FakeQuantizeOnData> MultiplyToGroupConvolutionTransformationParams;
+    MultiplyToGroupConvolutionTransformationParam> MultiplyToGroupConvolutionTransformationParams;
 class MultiplyToGroupConvolutionTransformation :
    public testing::WithParamInterface<MultiplyToGroupConvolutionTransformationParams>,
@ -29,6 +38,7 @@ public:
 protected:
    void SetUp() override;
    void Run() override;
 };
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/mvn.hpp
@ -8,10 +8,15 @@
 namespace LayerTestsDefinitions {
 // DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 TEST_P(MvnLayerTest, CompareWithRefs) {
    Run();
 };
 TEST_P(Mvn1LayerTest, CompareWithRefs) {
    Run();
 };
 TEST_P(Mvn6LayerTest, CompareWithRefs) {
    Run();
 };
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp
@ -26,24 +26,41 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(testing::T
    ngraph::element::Type precision;
    ngraph::PartialShape shape;
    auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8();
-    builder::subgraph::FakeQuantizeOnData fqOnData;
+    MultiplyToGroupConvolutionTransformationParam param;
-    std::tie(precision, shape, targetDevice, fqOnData) = obj.param;
+    std::tie(precision, shape, targetDevice, param) = obj.param;
    std::ostringstream result;
-    result << getTestCaseNameByParams(precision, shape, targetDevice, params) << "_" << fqOnData;
+    result << getTestCaseNameByParams(precision, shape, targetDevice, params) << "_" <<
        param.fqOnData << "_" <<
        param.constant << "_" <<
        param.layerName << "_" <<
        param.expectedKernelType;
    return result.str();
 }
 void MultiplyToGroupConvolutionTransformation::SetUp() {
    ngraph::PartialShape shape;
    ngraph::element::Type precision;
-    builder::subgraph::FakeQuantizeOnData fqOnData;
+    MultiplyToGroupConvolutionTransformationParam param;
-    std::tie(precision, shape, targetDevice, fqOnData) = this->GetParam();
+    std::tie(precision, shape, targetDevice, param) = this->GetParam();
    function = ngraph::builder::subgraph::MultiplyToGroupConvolutionFunction::getOriginal(
        precision,
        shape,
-        fqOnData);
+        param.fqOnData,
        param.constant);
 }
 void MultiplyToGroupConvolutionTransformation::Run() {
    LayerTestsCommon::Run();
    const auto param = std::get<3>(GetParam());
    const auto actualPrecision = getRuntimePrecision(param.layerName);
    auto expectedPrecision = param.expectedKernelType;
    if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
        expectedPrecision = "FP16";
    }
    EXPECT_EQ(actualPrecision, expectedPrecision);
 }
 TEST_P(MultiplyToGroupConvolutionTransformation, CompareWithRefImpl) {
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/activation.hpp
@ -39,6 +39,7 @@ static std::map<ngraph::helpers::ActivationTypes, std::string> activationNames =
        {ngraph::helpers::ActivationTypes::Clamp,                 "Clamp"},
        {ngraph::helpers::ActivationTypes::Negative,              "Negative"},
        {ngraph::helpers::ActivationTypes::Acos,                  "Acos"},
        {ngraph::helpers::ActivationTypes::Acosh,                 "Acosh"},
        {ngraph::helpers::ActivationTypes::Asin,                  "Asin"},
        {ngraph::helpers::ActivationTypes::Asinh,                 "Asinh"},
        {ngraph::helpers::ActivationTypes::Atan,                  "Atan"},
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp
@ -50,7 +50,8 @@ using StaticShapeLoopParams = typename std::tuple<
        int64_t,
        InferenceEngine::SizeVector,
        InferenceEngine::Precision,
-        std::string
+        std::string,
        std::map<std::string, std::string>
        >;
 /**
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/mvn.hpp
@ -11,6 +11,7 @@
 namespace LayerTestsDefinitions {
 // DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 typedef std::tuple<
        InferenceEngine::SizeVector, // Input shapes
        InferenceEngine::Precision,  // Input precision
@ -27,6 +28,24 @@ protected:
    void SetUp() override;
 };
 typedef std::tuple<
        InferenceEngine::SizeVector, // Input shapes
        InferenceEngine::Precision,  // Input precision
        ngraph::AxisSet,             // Reduction axes
        bool,                        // Across channels
        bool,                        // Normalize variance
        double,                      // Epsilon
        std::string                  // Device name
    > mvn1Params;
 class Mvn1LayerTest : public testing::WithParamInterface<mvn1Params>, virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<mvn1Params>& obj);
 protected:
    void SetUp() override;
 };
 typedef std::tuple<
        InferenceEngine::SizeVector, // Input shapes
        InferenceEngine::Precision,  // Data precision
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
@ -84,6 +84,12 @@ InferenceEngine::Blob::Ptr ActivationLayerTest::GenerateInput(const InferenceEng
            resolution = 32768;
            break;
        }
        case ngraph::helpers::ActivationTypes::Acosh: {
            data_start_from = 1;
            data_range = 200;
            resolution = 32768;
            break;
        }
        case ngraph::helpers::ActivationTypes::Ceiling: {
            data_start_from = -1000;
            data_range = 2000;
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp
@ -140,6 +140,47 @@ namespace LayerTestsDefinitions {
        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result0, result1, result2}, params, "loop");
    }
    std::string StaticShapeLoopTest::getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj) {
        bool unrolling;
        bool static_iter_num;
        bool static_continue_cond;
        int64_t max_iter_num;
        int64_t dynamic_exit;
        int64_t axis;
        int64_t start_value;
        InferenceEngine::SizeVector data_shape;
        InferenceEngine::Precision data_prc;
        std::string targetDevice;
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
        std::map<std::string, std::string> configuration;
        std::tie(
            unrolling,
            static_continue_cond,
            args_papck,
            start_value,
            data_shape,
            data_prc,
            targetDevice,
            configuration) = obj.param;
        std::ostringstream result;
        result << "unrolling=" << std::to_string(unrolling) << "_";
        result << "static_iter_num=" << std::to_string(static_iter_num) << "_";
        result << "static_continue_cond=" << std::to_string(static_continue_cond) << "_";
        result << "max_iter_num=" << std::to_string(max_iter_num) << "_";
        result << "dynamic_exit=" << std::to_string(dynamic_exit) << "_";
        result << "axis=" << std::to_string(axis) << "_";
        result << "start_value=" << std::to_string(start_value) << "_";
        result << "max_iter_num=" << std::to_string(max_iter_num) << "_";
        result << "IS=" << CommonTestUtils::vec2str(data_shape) << "_";
        result << "netPRC=" << std::to_string(data_prc) << "_";
        result << "targetDevice=" << targetDevice << "_";
        auto res_str = result.str();
        std::replace(res_str.begin(), res_str.end(), '-', '_');
        return res_str;
    }
    void StaticShapeLoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
@ -150,7 +191,8 @@ namespace LayerTestsDefinitions {
            start_value,
            data_shape,
            data_prc,
-            targetDevice) = GetParam();
+            targetDevice,
            configuration) = GetParam();
        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
        const auto ngShape = ngraph::Shape{data_shape};
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mvn.cpp
@ -7,6 +7,7 @@
 namespace LayerTestsDefinitions {
 // DEPRECATED, remove MvnLayerTest when KMB and ARM plugin will switch to use Mvn1LayerTest (#60420)
 std::string MvnLayerTest::getTestCaseName(const testing::TestParamInfo<mvnParams>& obj) {
    InferenceEngine::SizeVector inputShapes;
    InferenceEngine::Precision inputPrecision;
@ -38,6 +39,46 @@ void MvnLayerTest::SetUp() {
    function = std::make_shared<ngraph::Function>(results, param, "mvn");
 }
 std::string Mvn1LayerTest::getTestCaseName(const testing::TestParamInfo<mvn1Params>& obj) {
    InferenceEngine::SizeVector inputShapes;
    InferenceEngine::Precision inputPrecision;
    ngraph::AxisSet axes;
    bool acrossChannels, normalizeVariance;
    double eps;
    std::string targetDevice;
    std::tie(inputShapes, inputPrecision, axes, acrossChannels, normalizeVariance, eps, targetDevice) = obj.param;
    std::ostringstream result;
    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
    result << "Precision=" << inputPrecision.name() << "_";
    if (!axes.empty()) {
        result << "ReductionAccess=" << CommonTestUtils::vec2str(axes.to_vector()) << "_";
    } else {
        result << "AcrossChannels=" << (acrossChannels ? "TRUE" : "FALSE") << "_";
    }
    result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_";
    result << "Epsilon=" << eps << "_";
    result << "TargetDevice=" << targetDevice;
    return result.str();
 }
 void Mvn1LayerTest::SetUp() {
    InferenceEngine::SizeVector inputShapes;
    InferenceEngine::Precision inputPrecision;
    ngraph::AxisSet axes;
    bool acrossChanels, normalizeVariance;
    double eps;
    std::tie(inputShapes, inputPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = this->GetParam();
    auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
    auto param = ngraph::builder::makeParams(inType, {inputShapes});
    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
    auto mvn = std::dynamic_pointer_cast<ngraph::op::MVN>(ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps));
    if (!axes.empty()) {
        mvn = std::dynamic_pointer_cast<ngraph::op::MVN>(ngraph::builder::makeMVN(paramOuts[0], axes, normalizeVariance, eps));
    }
    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(mvn)};
    function = std::make_shared<ngraph::Function>(results, param, "MVN1");
 }
 std::string Mvn6LayerTest::getTestCaseName(const testing::TestParamInfo<mvn6Params>& obj) {
    InferenceEngine::SizeVector inputShapes;
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@ -4,6 +4,7 @@
 VERIFIED_OP_REFERENCES = [
    'Abs-1',
    'Acos-1',
    'Acosh-3',
    'Add-1',
    'Asin-1',
    'Asinh-3',
@ -56,6 +57,7 @@ VERIFIED_OP_REFERENCES = [
    'LSTMSequence-5',
    'LogSoftmax-5',
    'Loop-5',
    'MVN-1',
    'MVN-6',
    'Maximum-1',
    'MaxPool-1',
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp
@ -9,6 +9,8 @@
 #include <ngraph/op/constant.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include "lpt_ngraph_functions/common/constant.hpp"
 #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
 #include "lpt_ngraph_functions/common/dequantization_operations.hpp"
 namespace ngraph {
@ -26,7 +28,8 @@ public:
    static std::shared_ptr<ngraph::Function> getOriginal(
        const ngraph::element::Type precision,
        const ngraph::PartialShape& inputShape,
-        const FakeQuantizeOnData& fqOnData);
+        const FakeQuantizeOnData& fqOnData,
        const Constant& constant);
    static std::shared_ptr<ngraph::Function> getReference(
        const ngraph::PartialShape& inputShape,
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp
@ -38,18 +38,27 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
 std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOriginal(
    const ngraph::element::Type precision,
    const ngraph::PartialShape& inputShape,
-    const FakeQuantizeOnData& fqOnData) {
+    const FakeQuantizeOnData& fqOnData,
    const Constant& constant) {
    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
-    const auto fakeQuantizeOnActivations = makeFakeQuantize(input, precision, fqOnData);
+    const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);
    const auto reshape = std::make_shared<ngraph::opset1::Reshape>(
        fakeQuantizeOnActivations,
        std::make_shared<ngraph::opset1::Constant>(element::i32, Shape{ static_cast<size_t>(inputShape.rank().get_length()) }, inputShape.to_shape()),
        true);
    reshape->set_friendly_name("output");
-    ngraph::ResultVector results{
+    const auto rank = inputShape.rank();
-        std::make_shared<ngraph::opset1::Result>(reshape)
+    assert(rank.is_static());
-    };
+    const size_t size = rank.get_length() - 2;
    const auto maxPool = std::make_shared<opset1::MaxPool>(
        fakeQuantize,
        Strides(size, 1),
        Shape(size, 1),
        Shape(size, 0),
        Shape(size, 2));
    const auto multiply = std::make_shared<ngraph::opset1::Multiply>(
        maxPool,
        std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
    multiply->set_friendly_name("output");
    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(multiply)};
    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
 }
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@ -291,6 +291,11 @@ std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
                                      bool normalizeVariance,
                                      double eps);
 std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
                                      const ngraph::AxisSet &axes,
                                      bool normalizeVariance,
                                      double eps);
 std::shared_ptr<ngraph::Node> makeMVN6(const Output<Node>& in,
                                       const Output<Node>& axesNode,
                                       bool normalizeVariance,
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@ -99,6 +99,7 @@ enum ActivationTypes {
    Clamp,
    Negative,
    Acos,
    Acosh,
    Asin,
    Asinh,
    Atan,
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/activation.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/activation.cpp
@ -46,6 +46,8 @@ std::shared_ptr<ngraph::Node> makeActivation(const ngraph::Output<Node> &in,
            return std::make_shared<ngraph::op::Negative>(in);
        case ngraph::helpers::ActivationTypes::Acos:
            return std::make_shared<ngraph::op::Acos>(in);
        case ngraph::helpers::ActivationTypes::Acosh:
            return std::make_shared<ngraph::op::Acosh>(in);
        case ngraph::helpers::ActivationTypes::Asin:
            return std::make_shared<ngraph::op::Asin>(in);
        case ngraph::helpers::ActivationTypes::Asinh:
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/mvn.cpp
@ -24,6 +24,15 @@ std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
    return mvnNode;
 }
 std::shared_ptr<ngraph::Node> makeMVN(const ngraph::Output<Node> &in,
                                      const ngraph::AxisSet &axes,
                                      bool normalizeVariance,
                                      double eps) {
    auto mvnNode = std::make_shared<ngraph::op::MVN>(in, axes, normalizeVariance, eps);
    return mvnNode;
 }
 std::shared_ptr<Node> makeMVN6(const Output<Node>& in,
                               const Output<Node>& axesNode,
                               bool normalizeVariance,
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@ -24,7 +24,6 @@ if (ENABLE_CLDNN)
        set(CLDNN__INCLUDE_TESTS OFF CACHE BOOL "" FORCE)
    endif()
    set(CLDNN_THREADING "${THREADING}" CACHE STRING "" FORCE)
    set(GPU_DEBUG_CONFIG OFF CACHE BOOL "Enable debug config feature")
    add_subdirectory(clDNN)
 endif()
--- a/inference-engine/thirdparty/clDNN/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/CMakeLists.txt
@ -20,7 +20,7 @@ else()
    add_definitions(-DCLDNN_THREADING=CLDNN_THREADING_THREADPOOL)
 endif()
-if(GPU_DEBUG_CONFIG)
+if(ENABLE_GPU_DEBUG_CAPS)
  add_definitions(-DGPU_DEBUG_CONFIG=1)
 endif()
--- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
@ -5,6 +5,8 @@
 #include "cldnn/runtime/debug_configuration.hpp"
 #include <iostream>
 #include <memory>
 #include <vector>
 #include <sstream>
 namespace cldnn {
@ -13,22 +15,81 @@ const char *debug_configuration::prefix = "GPU_Debug: ";
 // Default policy is that dump_configuration will override other configuration from IE.
 #ifdef GPU_DEBUG_CONFIG
-static void print_option(std::string option_name, std::string option_value) {
+
 template<typename T>
 void print_option(std::string option_name, T option_value) {
    GPU_DEBUG_COUT << "Config " << option_name << " = " << option_value << std::endl;
 }
-static void get_int_env(const std::string &var, int &val) {
+static std::string to_upper_case(const std::string& var) {
-    if (const auto env_var = std::getenv(var.c_str())) {
+    std::stringstream s;
-        val = std::stoi(env_var);
+
-        print_option(var, std::to_string(val));
+    for (size_t i = 0; i < var.size(); i++) {
        if (std::isupper(var[i])) {
            if (i != 0) {
                s << "_";
            }
            s << var[i];
        } else {
            s << static_cast<char>(std::toupper(var[i]));
        }
    }
    return s.str();
 }
 static std::vector<std::string> get_possible_option_names(const std::string& var, std::vector<std::string> allowed_option_prefixes) {
    std::vector<std::string> result;
    for (auto& prefix : allowed_option_prefixes) {
        result.push_back(prefix + var);
        result.push_back(prefix + to_upper_case(var));
    }
    return result;
 }
 template <typename T>
 T convert_to(const std::string &str) {
    std::istringstream ss(str);
    T res;
    ss >> res;
    return res;
 }
 template<typename T>
 void get_debug_env_var(const std::string &var, T &val, std::vector<std::string> allowed_option_prefixes) {
    bool found = false;
    for (auto o : get_possible_option_names(var, allowed_option_prefixes)) {
        if (const auto env_var = std::getenv(o.c_str())) {
            val = convert_to<T>(env_var);
            found = true;
        }
    }
    if (found) {
        print_option(var, val);
    }
 }
-static void get_str_env(const std::string &var, std::string &val) {
+template<typename T>
-    if (const auto env_var = std::getenv(var.c_str())) {
+void get_gpu_debug_env_var(const std::string &var, T &val) {
-        val = env_var;
+    return get_debug_env_var(var, val, {"OV_GPU_"});
-        print_option(var, val);
+}
-    }
+
 template<typename T>
 void get_common_debug_env_var(const std::string &var, T &val) {
    // The list below should be prioritized from lowest to highest prefix priority
    // If an option is set several times with different prefixes, version with the highest priority will be actually used.
    // This may allow to enable global option with some value and override this value for GPU plugin
    // For example: OV_GPU_Verbose=2 OV_Verbose=1 ./my_app => this->verbose == 2
    // In that case we enable Verbose (with level = 1) for all OV components that support this option, but for GPU plugin we increase verbose level to 2
    std::vector<std::string> allowed_option_prefixes = {
        "OV_",
        "OV_GPU_"
    };
    return get_debug_env_var(var, val, allowed_option_prefixes);
 }
 #endif
@ -42,13 +103,13 @@ debug_configuration::debug_configuration()
        , dump_layers(std::string())
        , dump_layers_dst_only(0) {
 #ifdef GPU_DEBUG_CONFIG
-    get_int_env("OV_GPU_Verbose", verbose);
+    get_common_debug_env_var("Verbose", verbose);
-    get_int_env("OV_GPU_PrintMultiKernelPerf", print_multi_kernel_perf);
+    get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
-    get_int_env("OV_GPU_DisableUsm", disable_usm);
+    get_gpu_debug_env_var("DisableUsm", disable_usm);
-    get_str_env("OV_GPU_DumpGraphs", dump_graphs);
+    get_gpu_debug_env_var("DumpGraphs", dump_graphs);
-    get_str_env("OV_GPU_DumpLayersPath", dump_layers_path);
+    get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
-    get_str_env("OV_GPU_DumpLayers", dump_layers);
+    get_gpu_debug_env_var("DumpLayers", dump_layers);
-    get_int_env("OV_GPU_DumpLayersDstOnly", dump_layers_dst_only);
+    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
    if (dump_layers_path.length() > 0 && !disable_usm) {
        disable_usm = 1;
        GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
--- a/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
@ -25,73 +25,6 @@ struct loop_impl : typed_primitive_impl<loop> {
    loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
    explicit loop_impl(const loop_node& node) : node(node) {}
    // read scala value from data primitive
    static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
        int64_t trip_count = 0;
        const layout& prim_layout = mem->get_layout();
        switch (prim_layout.data_type) {
        case data_types::u8: {
            mem_lock<uint8_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i8: {
            mem_lock<int8_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i32: {
            mem_lock<int32_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i64: {
            mem_lock<int64_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        default:
            assert(false);
        }
        return trip_count;
    }
    static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) {
        const layout& prim_layout = mem->get_layout();
        switch (prim_layout.data_type) {
        case data_types::u8: {
            assert(input >= std::numeric_limits<uint8_t>::min() &&
                   input <= std::numeric_limits<uint8_t>::max());
            mem_lock<uint8_t> lock_prim_output{mem, stream};
            *lock_prim_output.data() = static_cast<uint8_t>(input);
            break;
        }
        case data_types::i8: {
            assert(input >= std::numeric_limits<int8_t>::min() &&
                   input <= std::numeric_limits<int8_t>::max());
            mem_lock<int8_t> lock_prim_output{mem, stream};
            *lock_prim_output.data() = static_cast<int8_t>(input);
            break;
        }
        case data_types::i32: {
            assert(input >= std::numeric_limits<int32_t>::min() &&
                   input <= std::numeric_limits<int32_t>::max());
            mem_lock<int32_t> lock_prim_output{mem, stream};
            *lock_prim_output.data() = static_cast<int32_t>(input);
            break;
        }
        case data_types::i64: {
            mem_lock<int64_t> lock_prim_output{mem, stream};
            *lock_prim_output.data() = input;
            break;
        }
        default:
            assert(false);
        }
    }
    event::ptr execute_impl(const std::vector<event::ptr>& events, loop_inst& instance) override {
        auto& outer_network = instance.get_network();
        auto& stream = outer_network.get_stream();
@ -104,29 +37,37 @@ struct loop_impl : typed_primitive_impl<loop> {
            instance.preprocess_output_memory();
            instance.preprocess_input_memory();
            instance.preprocess_backedge_memory();
            // set input data for current_iteration primitive if current_iteration is used
            if (node.is_current_iteration_used()) {
                const primitive_id& current_iteration_id = node.get_current_iteration_id();
                auto current_iteration_prim = body_network->get_primitive(current_iteration_id);
                auto input_layout_prim = std::dynamic_pointer_cast<input_layout_inst>(current_iteration_prim);
                if (input_layout_prim == nullptr) {
                    CLDNN_ERROR_MESSAGE(node.id(), "current_iteration primitive is not input_layout");
                }
                const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping();
                input_layout_prim->set_data(backedge_mapping.initial_mem);
            }
            instance.preproc_memories_done = true;
        }
        // read trip_count from outer network
        bool update_num_iterations = false;
        const primitive_id& trip_count_id = node.get_trip_count_id();
        memory::ptr trip_count_mem = outer_network.get_primitive(trip_count_id)->output_memory_ptr();
-        int64_t trip_count = read_scalar_value(trip_count_mem, stream);
+        int64_t trip_count = loop_node::read_scalar_value(trip_count_mem, stream);
        if (trip_count < 0) {
            const int64_t max_iteration = node.get_max_iteration();
            trip_count = max_iteration;
            update_num_iterations = true;
        }
        // read initial execution condition from outer network
        const primitive_id& initial_execution_id = node.get_initial_execution_id();
        memory::ptr initial_execution_mem = outer_network.get_primitive(initial_execution_id)->output_memory_ptr();
-        int64_t execution_condition = read_scalar_value(initial_execution_mem, stream);
+        int64_t execution_condition = loop_node::read_scalar_value(initial_execution_mem, stream);
        // shortcut of current_iteration memory in body network (slice of input)
        memory::ptr current_iteration_mem = nullptr;
        if (node.is_current_iteration_used()) {
            const primitive_id& current_iteration_id = node.get_current_iteration_id();
            current_iteration_mem = body_network->get_primitive(current_iteration_id)->output_memory_ptr();
        }
        // shortcut of execution_condition memory in body network
        memory::ptr execution_condition_mem = nullptr;
@ -135,11 +76,6 @@ struct loop_impl : typed_primitive_impl<loop> {
            execution_condition_mem = body_network->get_primitive(condition_id)->output_memory_ptr();
        }
        int64_t current_iteration = 0;
        if (node.is_current_iteration_used()) {
            write_scalar_value(current_iteration_mem, stream, current_iteration);
        }
        const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings;
        const auto& concatenated_output_mem_mappings = instance.concatenated_output_mem_mappings;
@ -155,12 +91,12 @@ struct loop_impl : typed_primitive_impl<loop> {
        }
        std::vector<event::ptr> loop_carried_dep(events.begin(), events.end());
-
+        int64_t current_iteration_idx = 0;
-        while (current_iteration < trip_count && execution_condition) {
+        while (current_iteration_idx < trip_count && execution_condition) {
            // Copy & Set sliced input memory
            for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) {
                const auto& concatenated_input = concatenated_input_mem_mappings.at(i);
-                memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration);
+                memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration_idx);
                if (mem) {
                    concatenated_input.sliced_data_prim->set_output_memory(mem);
                } else {
@ -170,12 +106,12 @@ struct loop_impl : typed_primitive_impl<loop> {
            // Set backedges
            for (const auto& backedge_memory_mapping : instance.backedge_memory_mappings) {
-                backedge_memory_mapping.setup_iteration(current_iteration);
+                backedge_memory_mapping.setup_iteration(current_iteration_idx);
            }
            // Set sliced output memory
            for (const auto& concat_output_mem_mapping : concatenated_output_mem_mappings) {
-                concat_output_mem_mapping.setup_concatenated_output_memory(current_iteration);
+                concat_output_mem_mapping.setup_concatenated_output_memory(current_iteration_idx);
            }
            // execute body network
@ -187,17 +123,16 @@ struct loop_impl : typed_primitive_impl<loop> {
                loop_carried_dep.emplace_back(body_event);
            }
-            //TODO: "curreint_iteration primitive and execution_condition is prepared
+            //TODO: execution_condition is prepared as they are presented in the
-            //as they are presented in the ngraph opset document for loop operation.
+            //      ngraph opset document for loop operation.
-            //However they are not being used yet and only TensorIterator which has fixed sequence length is being validated.
+            // However they are not being used yet and only TensorIterator which
-            if (node.is_current_iteration_used()) {
+            // has fixed sequence length is being validated.
                write_scalar_value(current_iteration_mem, stream, current_iteration);
            }
            if (node.is_execution_condition_used()) {
-                execution_condition = read_scalar_value(execution_condition_mem, stream);
+                execution_condition = loop_node::read_scalar_value(execution_condition_mem, stream);
            }
            // update index & execution condition for the next iteration
-            ++current_iteration;
+            ++current_iteration_idx;
        }
        body_network->reset_execution();
@ -208,9 +143,21 @@ struct loop_impl : typed_primitive_impl<loop> {
            concat_output.restore_concatenated_mem();
        }
-        const primitive_id& num_iteration_id = node.get_num_iteration_id();
+        if (update_num_iterations) {
-        memory::ptr num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory_ptr();
+            // update num_iterations (actual number of iterations)
-        write_scalar_value(num_actual_iterations_mem, stream, current_iteration);
+            int64_t actual_iterations = 0;
            if (node.is_current_iteration_used()) {
                const auto& backedge_mapping = instance.get_current_iteration_backedge_mapping();
                auto current_iteration_mem = backedge_mapping.from_primitive->output_memory_ptr();
                actual_iterations = loop_node::read_scalar_value(current_iteration_mem, stream);
            } else {
                actual_iterations = current_iteration_idx;
            }
            const primitive_id& num_iteration_id = node.get_num_iteration_id();
            memory::ptr num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory_ptr();
            loop_node::write_scalar_value(num_actual_iterations_mem, stream, actual_iterations);
        }
        ev->set();
        return ev;
--- a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
@ -7,7 +7,9 @@
 #include "cldnn/primitives/loop.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
 #include "cldnn/primitives/data.hpp"
 #include "cldnn/primitives/input_layout.hpp"
 #include "cldnn/primitives/eltwise.hpp"
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/error_handler.hpp"
@ -27,24 +29,10 @@ private:
    std::vector<loop::io_primitive_map> input_primitive_maps;
    std::vector<loop::io_primitive_map> output_primitive_maps;
-    std::vector<cldnn::loop::backedge_mapping> back_edges;
+    mutable std::vector<loop::backedge_mapping> back_edges;
    bool use_current_iteration;
    bool use_execution_condition;
    mutable program_impl::ptr body_program;
    mutable std::map<primitive_id, memory::ptr> backedge_mem_impls;
    mutable std::map<primitive_id, std::shared_ptr<mutable_data>> backedge_layers;
    mutable std::map<primitive_id, std::shared_ptr<memory>> backedge_mem;
    mutable bool output_is_backedge;
    void setup_internal_mutabledata_node(primitive_id md_id, layout md_layout, std::vector<primitive_id> md_inputs_id = {}, uint32_t net_id = 0) const {
        if (body.get_primitives().count(md_id) == 0) {
            backedge_mem_impls[md_id] = get_program().get_engine().allocate_memory(md_layout, net_id);
            backedge_mem[md_id] = backedge_mem_impls[md_id];
            backedge_layers[md_id] = std::make_shared<mutable_data>(md_id, md_inputs_id, backedge_mem[md_id]);
            body.add(backedge_layers[md_id]);
        }
    }
 public:
    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) :
@ -63,7 +51,6 @@ public:
    int64_t get_max_iteration() const { return max_iteration; }
    program_impl::ptr get_body_program() const { return body_program; }
    bool is_output_working_as_backedge() const { return output_is_backedge; }
    bool is_current_iteration_used() const { return use_current_iteration; }
    bool is_execution_condition_used() const { return use_execution_condition; }
@ -99,19 +86,95 @@ public:
    static size_t convert_to_raw_axis(size_t axis, size_t ndim) {
        // convert between bfyx, bfzyx, bfzyxw and tensor.size.raw
-        assert(axis < ndim);
+        if (axis >= ndim) {
            throw std::runtime_error("axis should be less than ndim");
        }
        if (axis < 2) {
            return axis;
        }
        return (ndim - 1) - (axis - 2);
    }
    // read scala value from data primitive
    static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
        int64_t trip_count = 0;
        const layout& prim_layout = mem->get_layout();
        switch (prim_layout.data_type) {
        case data_types::u8: {
            mem_lock<uint8_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i8: {
            mem_lock<int8_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i32: {
            mem_lock<int32_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        case data_types::i64: {
            mem_lock<int64_t> lock_prim_output{mem, stream};
            trip_count = *lock_prim_output.data();
            break;
        }
        default:
            throw std::runtime_error("Invalid data type : " + data_type_traits::name(prim_layout.data_type));
        }
        return trip_count;
    }
    template<typename T>
    static inline void validate_input_value(int64_t input) {
        if (input < std::numeric_limits<T>::min() || input > std::numeric_limits<T>::max()) {
            throw std::runtime_error("Invalid data value : " + std::to_string(input));
        }
    }
    static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) {
        const layout& prim_layout = mem->get_layout();
        switch (prim_layout.data_type) {
        case data_types::u8: {
            validate_input_value<uint8_t>(input);
            mem_lock<uint8_t> lock_prim_output{mem, stream};
            lock_prim_output[0] = static_cast<uint8_t>(input);
            break;
        }
        case data_types::i8: {
            validate_input_value<int8_t>(input);
            mem_lock<int8_t> lock_prim_output{mem, stream};
            lock_prim_output[0] = static_cast<int8_t>(input);
            break;
        }
        case data_types::i32: {
            validate_input_value<int32_t>(input);
            mem_lock<int32_t> lock_prim_output{mem, stream};
            lock_prim_output[0] = static_cast<int32_t>(input);
            break;
        }
        case data_types::i64: {
            mem_lock<int64_t> lock_prim_output{mem, stream};
            lock_prim_output[0] = input;
            break;
        }
        default:
            throw std::runtime_error("Invalid data type : " + data_type_traits::name(prim_layout.data_type));
        }
    }
    layout calc_body_input_layout(const loop::io_primitive_map& inputDesc) const {
        const auto& dependency_list = this->get_dependencies();
        auto input = std::find_if(dependency_list.begin(), dependency_list.end(), [&inputDesc](const program_node* p){
            return p->id() == inputDesc.external_id;
        });
-        assert(input != dependency_list.end());
+        if (input == dependency_list.end()) {
            throw std::runtime_error("Can't find input from dependency_list");
        }
        layout calculated_layout = (*input)->get_output_layout();
        auto shape = calculated_layout.size.sizes(calculated_layout.format);
@ -164,6 +227,7 @@ public:
    static bool is_integer(const data_types& data_type) {
        switch (data_type) {
            case data_types::u8:
            case data_types::i8:
            case data_types::i32:
            case data_types::i64:
@ -173,54 +237,73 @@ public:
        }
    }
-    void process_single_int_input(const primitive_id& id) const {
+    void process_current_iteration() const {
        const primitive_id& current_iteration_id = get_current_iteration_id();
        if (current_iteration_id.empty()) {
            return;
        }
        const topology_map& body_topology_map = body.get_primitives();
-        if (!id.empty()) {
+        const layout body_input_layout(data_types::i64, format::bfyx, {1, 1, 1, 1});
-            // add input_layout if not exist
+
-            if (body_topology_map.count(id)) {
+        // add current_iteration primitive if current_iteration primitive is not exist in body
-                layout body_input_layout(data_types::i32, format::bfyx, {1, 1, 1, 1});
+        if (body_topology_map.find(current_iteration_id) == body_topology_map.end()) {
-                body.add(std::make_shared<input_layout>(id, body_input_layout));
+            body.add(std::make_shared<input_layout>(current_iteration_id, body_input_layout));
        } else {
            const auto& body_input_prim = body.at(current_iteration_id);
            const auto input_layout_prim = std::dynamic_pointer_cast<input_layout>(body_input_prim);
            if (!input_layout_prim) {
                CLDNN_ERROR_MESSAGE(this->id(), "current_iteration primitive should be cldnn::input_layout");
            } else {
-                const auto& body_input_prim = body.at(id);
+                input_layout_prim->change_layout(body_input_layout);
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
+            }
-                    body_input_prim->type != input_layout::type_id(),
+        }
-                    id + " is not cldnn::input_layout");
+
-                const auto input_layout_prim = static_cast<const input_layout*>(body_input_prim.get());
+        // add incremental data: 1
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
+        // it is used to update current_iteration in body network
-                    !static_cast<bool>(input_layout_prim->output_data_type),
+        const primitive_id increment_value_id = current_iteration_id + "_inc";
-                    "data_type of " + id + " is not specified");
+        auto mem = get_program().get_engine().allocate_memory(body_input_layout);
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
+        auto& stream = get_program().get_stream();
-                    !is_integer(*input_layout_prim->output_data_type),
+        write_scalar_value(mem, stream, 1);
-                    id + " is not integer type");
+        body.add(std::make_shared<data>(increment_value_id, mem));
-                CLDNN_ERROR_BOOL(this->id(), "Error while building body program",
+
-                    input_layout_prim->layout.count() != 1,
+        // add eltwise sum updating current_iteration with incremental data
-                    id + " should have 1 element");
+        const primitive_id updated_currnet_iteration_id = current_iteration_id + "_update";
        body.add(std::make_shared<eltwise>(updated_currnet_iteration_id,
            current_iteration_id, increment_value_id, eltwise_mode::sum));
        // set backedge
        back_edges.emplace_back(updated_currnet_iteration_id, current_iteration_id);
    }
    void process_single_int_output(const primitive_id& id) const {
        // add mutable if not exist
        const topology_map& body_topology_map = body.get_primitives();
        layout body_output_layout(data_types::i64, format::bfyx, {1, 1, 1, 1});
        if (!id.empty()) {
            auto body_output = body_topology_map.find(id);
            if (body_output == body_topology_map.end()) {
                auto mem = get_program().get_engine().allocate_memory(body_output_layout);
                auto md = std::make_shared<data>(id, mem);
                body.add(md);
            } else {
                auto body_output_prim = body.at(body_output->first);
                auto mem = get_program().get_engine().allocate_memory(body_output_layout);
                body_output_prim.reset(new mutable_data(body_output->first, mem));
            }
        }
    }
    void build_body_program() const {
-        const std::vector<cldnn::program_node *>& deps = get_dependencies();
+        for (const auto& pm : input_primitive_maps) {
-        // setup internal inputs
+            layout calculated_layout = calc_body_input_layout(pm);
-        const primitive_id& trip_count_id = get_trip_count_id();
+            const primitive_id& internal_input_id = pm.internal_id;
        const primitive_id& initial_execution = get_initial_execution_id();
        const primitive_id& num_iteration = get_num_iteration_id();
        for (const cldnn::program_node * dep : deps) {
            const primitive_id& id = dep->id();
            if (id == trip_count_id || id == initial_execution || id == num_iteration) {
                continue;
            }
-            for (const auto& pm : input_primitive_maps) {
+            // add inputs for body network if not exist
-                layout calculated_layout = calc_body_input_layout(pm);
+            if (body.get_primitives().count(internal_input_id) == 0) {
-                const primitive_id& internal_input_id = pm.internal_id;
+                body.add(std::make_shared<input_layout>(internal_input_id, calculated_layout));
-
+            } else {
-                // add inputs for body network if not exist
+                body.change_input_layout(internal_input_id, calculated_layout);
                if (body.get_primitives().count(internal_input_id) == 0) {
                    body.add(std::make_shared<input_layout>(internal_input_id, calculated_layout));
                } else {
                    body.change_input_layout(internal_input_id, calculated_layout);
                }
            }
        }
@ -230,39 +313,35 @@ public:
        }
        std::set<primitive_id> output_names;
        output_names.insert(output_primitive_maps.front().internal_id);
        const auto& back_edges_list = this->get_primitive()->back_edges;
        // add current_iteration_id in body network, condition_id if exist
-        process_single_int_input(get_current_iteration_id());
+        process_current_iteration();
-        process_single_int_input(get_condition_id());
+        process_single_int_output(get_condition_id());
        // setup outputs for backedges
-        for (auto& back_edge : back_edges_list) {
+        for (auto& back_edge : back_edges) {
            // check whether the back_edge.to has its corresponding io_primitive_map
            const auto& input_map = std::find_if(input_primitive_maps.begin(), input_primitive_maps.end(),
                [&](const loop::io_primitive_map& pm) {
                    return pm.internal_id == back_edge.to;
                });
-            if (input_map == input_primitive_maps.end()) {
+
            // backedge which is current_iteration does not have
            // input primitive map because its initial value is always
            // zero and the value will be set in execute_impl()
            if (back_edge.to != get_current_iteration_id() && input_map == input_primitive_maps.end()) {
                std::string msg = "No primitive mapping for backedge (internal_id: " + back_edge.to + ')';
                CLDNN_ERROR_MESSAGE(this->id(), msg.c_str());
            }
            for (const auto& prim : body.get_primitives()) {
                if (prim.first != back_edge.from) {
                    continue;
                }
                const auto dependencies_ref = prim.second->dependencies();
                std::vector<primitive_id> dep_pids(dependencies_ref.size());
                for (const auto& dep : dependencies_ref) {
                    dep_pids.emplace_back(dep.get());
                }
                setup_internal_mutabledata_node(back_edge.from, calc_body_input_layout(*input_map), dep_pids);
            }
            output_names.insert(back_edge.from);
        }
        // if execution_condition_id is specified, we need to add the id in build_option::outputs
        if (!get_condition_id().empty()) {
            output_names.insert(get_condition_id());
        }
        auto opts = get_program().get_options();
        std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
        opts.set_option(build_option::outputs(output_names_vec));
@ -310,6 +389,7 @@ public:
            from_primitive(from_primitive),
            to_primitive(to_primitive),
            from_mems(from_mems),
            initial_mem(initial_mem),
            stream(stream),
            type(type),
            total_bytes(initial_mem->get_layout().bytes_count()) {
@ -396,7 +476,10 @@ private:
            bytes_iteration_initial_offset(initial_offset * bytes_iteration) {}
        static int64_t get_batch_size(layout mem_layout, int64_t axis) {
-            assert(axis >= 0);
+            if (axis < 0) {
                throw std::runtime_error("axis should be positive integer or zero");
            }
            int64_t batch_size = 1;
            for (int64_t i = 0; i < axis; ++i) {
                batch_size *= mem_layout.size.raw[i];
@ -472,6 +555,7 @@ private:
    std::vector<concatenated_memory_mapping> concatenated_output_mem_mappings;
    static std::string to_string(const loop_node& node);
    size_t current_iteratoin_backedge_mapping_idx = 0;
 public:
    typed_primitive_inst(network_impl& network, const loop_node& node);
@ -479,6 +563,12 @@ public:
    void preprocess_input_memory();
    void preprocess_output_memory();
    void preprocess_backedge_memory();
    const backedge_memory_mapping& get_current_iteration_backedge_mapping() const {
        if (!node.is_current_iteration_used()) {
            CLDNN_ERROR_MESSAGE(node.id(), "no backedge mapping for current_iteration");
        }
        return backedge_memory_mappings.at(current_iteratoin_backedge_mapping_idx);
    }
 private:
    network_impl::ptr body_network;
--- a/inference-engine/thirdparty/clDNN/src/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/loop.cpp
@ -279,12 +279,24 @@ void loop_inst::preprocess_backedge_memory() {
    for (const auto& back_edge : back_edges) {
        //find corresponding input of the backedge
        const auto input_map_ptrs = node.find_io_primitive_maps(back_edge.to, false);
        assert(input_map_ptrs.size() == 1);
        const auto& input_map = input_map_ptrs.front();
        auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from);
        const auto backedge_to_prim = body_network->get_primitive(back_edge.to);
        const auto backedge_from_prim = body_network->get_primitive(back_edge.from);
-        memory::ptr initial_mem = get_external_memory(input_map->external_id);
+
        memory::ptr initial_mem;
        if (back_edge.to == node.get_current_iteration_id()) {
            const layout current_iteration_layout = backedge_to_prim->output_memory().get_layout();
            initial_mem = get_network().get_engine().allocate_memory(current_iteration_layout);
            auto& stream = get_network().get_stream();
            loop_node::write_scalar_value(initial_mem, stream, 0);
            current_iteratoin_backedge_mapping_idx = backedge_memory_mappings.size();
        } else {
            if (input_map_ptrs.empty()) {
                CLDNN_ERROR_MESSAGE(id(), "no input_mapping for backedged input");
            }
            initial_mem = get_external_memory(input_map_ptrs.front()->external_id);
        }
        auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from);
        if (backedged_sliced_output_mems.empty()) {
            // backedge output which does not need concatenation
            // input memory = output memory = loop output memory
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@ -945,14 +945,17 @@ bool program_impl::extract_and_remove(program_node& node) {
    // update primitive_map of loop primitive,
    // if extracted node is input of loop
-    for (const auto user : node.users) {
+    for (const auto& user : node.users) {
        if (user->is_type<loop>()) {
            loop_node& loop = *user;
            loop.update_primitive_map(node.id(), input.id());
        }
-        if (node.dependencies.front()->is_type<loop>()) {
+
-            loop_node& loop = *node.dependencies.front();
+        for (auto& dep : node.dependencies) {
-            loop.update_primitive_map(node.id(), user->id());
+            if (dep->is_type<loop>()) {
                loop_node& loop = *dep;
                loop.update_primitive_map(node.id(), user->id());
            }
        }
    }
    input.users.remove(&node);
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
@ -90,9 +90,6 @@ TEST(loop_gpu, basic_no_concat)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);
    mem_lock<int32_t> ptr{num_iteration_mem, get_test_stream()};
    EXPECT_EQ(ptr[0], trip_count);
    // value check
    mem_lock<float> output_ptr{output, get_test_stream()};
    EXPECT_EQ(output_ptr.size(), input_data.size());
@ -164,10 +161,6 @@ TEST(loop_gpu, basic_concat)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);
    mem_lock<int32_t> ptr{num_iteration_mem, get_test_stream()};
    const int32_t actual_iterations = ptr[0];
    EXPECT_EQ(actual_iterations, trip_count);
    // value check
    mem_lock<float> output_ptr{output, get_test_stream()};
    for (size_t i=0, iend = input_data.size(); i<iend; ++i) {
@ -303,14 +296,6 @@ TEST(loop_gpu, basic_concat_nested)
    EXPECT_EQ(output_layout.size.spatial[0], 4);
    EXPECT_EQ(output_layout.size.spatial[1], 5);
    // check trip count = actual iteration
    mem_lock<int64_t> inner_num_iteration_ptr{inner_num_iteration_mem, get_test_stream()};
    int64_t inner_actual_iterations = inner_num_iteration_ptr[0];
    EXPECT_EQ(inner_actual_iterations, inner_trip_count);
    mem_lock<int64_t> num_iteration_ptr{num_iteration_mem, get_test_stream()};
    int64_t actual_iterations = num_iteration_ptr[0];
    EXPECT_EQ(actual_iterations, outer_trip_count);
    // check output values
    EXPECT_EQ(output_layout.count(), expected.size());
    mem_lock<float> output_ptr{output, get_test_stream()};
--- a/model-optimizer/CMakeLists.txt
+++ b/model-optimizer/CMakeLists.txt
@ -49,3 +49,8 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests
        DESTINATION deployment_tools/model_optimizer
        COMPONENT tests
        EXCLUDE_FROM_ALL)
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/automation
        DESTINATION deployment_tools/model_optimizer
        COMPONENT tests
        EXCLUDE_FROM_ALL)
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@ -29,7 +29,6 @@ extensions/back/GroupedConvWeightsNormalize.py
 extensions/back/insert_compatibility_l2normalization.py
 extensions/back/InterpolateReshape.py
 extensions/back/kaldi_remove_memory_output.py
 extensions/back/LayoutChangeForEinsum.py
 extensions/back/LayoutChangeForGatherND.py
 extensions/back/LeakyReLUMutation.py
 extensions/back/LinearToLinearONNXReplacer.py
@ -597,6 +596,7 @@ extensions/middle/InsertSelect.py
 extensions/middle/InterpolateSequenceToInterpolate.py
 extensions/middle/L2NormFusing.py
 extensions/middle/LayoutChangeForConstantShapePaths.py
 extensions/middle/LayoutChangeForEinsum.py
 extensions/middle/LeakyReluPattern.py
 extensions/middle/LSTMRNNSequenceToTensorIterator.py
 extensions/middle/MakeKaldiConstReshapable.py
@ -1070,6 +1070,7 @@ mo/utils/ir_reader/extenders/topk_extender.py
 mo/utils/ir_reader/extenders/variadic_split_extender.py
 mo/utils/ir_reader/layer_to_class.py
 mo/utils/ir_reader/restore_graph.py
 mo/utils/json_schema.py
 mo/utils/logger.py
 mo/utils/model_analysis.py
 mo/utils/pipeline_config.py
--- a/model-optimizer/extensions/middle/LayoutChangeForEinsum.py
+++ b/model-optimizer/extensions/middle/LayoutChangeForEinsum.py
@ -1,12 +1,14 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from extensions.middle.InsertLayoutPropagationTransposes import is_input_data_in_correct_layout, \
    is_output_data_in_correct_layout
 from extensions.ops.einsum import Einsum
 from mo.back.replacement import BackReplacementPattern
 from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
-class LayoutChangeForEinsum(BackReplacementPattern):
+class LayoutChangeForEinsum(MiddleReplacementPattern):
    """
    The transformation adjusts Einsum equation to NCHW layout.
    Subscripts for tensor of rank greater than three must be adjusted
@ -19,7 +21,15 @@ class LayoutChangeForEinsum(BackReplacementPattern):
    """
    enabled = True
    force_shape_inference = True
-    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
+    graph_condition = [lambda graph: graph.graph['layout'] == 'NHWC']
    def run_after(self):
        from extensions.middle.MarkSubgraphsWithCorrectLayout import MarkSubGraphsWithCorrectLayout
        return [MarkSubGraphsWithCorrectLayout]
    def run_before(self):
        from extensions.middle.InsertLayoutPropagationTransposes import InsertLayoutPropagationTranspose
        return [InsertLayoutPropagationTranspose]
    def find_and_replace_pattern(self, graph: Graph):
        import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
@ -31,27 +41,35 @@ class LayoutChangeForEinsum(BackReplacementPattern):
            connected_in_ports = [port for port in einsum.in_ports().values() if not port.disconnected()]
            num_inputs = len(connected_in_ports)
-            # compute a mask of inputs of rank greater than 3 that are required original layout (NCHW)
+            # check if correct_data_layout attribute is set for inputs and output
-            # due to presence of ellipsis covering multiple tail dimensions in the corresponding input subscript
+            # this attribute can be set up within MarkSubgraphWithCorrectLayout transformation
            # for example, when Einsum is located near to MatMul operation in a graph
            input_correct_layout_mask = []
            for input_ind in range(num_inputs):
                input_correct_layout_mask.append(is_input_data_in_correct_layout(einsum, input_ind))
            is_output_layout_correct = is_output_data_in_correct_layout(einsum, 0)
            # compute a mask of which inputs/output are adjusted to the required layout
            # if they are not adjusted, it means to require transpose
            input_ranks = [len(einsum.in_port(port_idx).data.get_shape()) for port_idx in range(num_inputs)]
            output_rank = len(einsum.out_port(0).data.get_shape())
-            permuted_equation, is_inputs_permuted, is_output_permuted = Einsum.adjust_equation_with_NCHW_layout(
+            permuted_equation, are_inputs_adjusted, is_output_adjusted = Einsum.adjust_equation_with_NCHW_layout(
                einsum_name,
                equation,
                input_ranks,
-                output_rank)
+                output_rank, input_correct_layout_mask, is_output_layout_correct)
-            assert len(is_inputs_permuted) == num_inputs
+            assert len(are_inputs_adjusted) == num_inputs
            # setup adjusted equation
            einsum.equation = permuted_equation
            # insert Transpose node to get NHWC layout back (for inputs) that is required due to specifics of equation
            for input_ind in range(num_inputs):
-                if not is_inputs_permuted[input_ind]:
+                if not are_inputs_adjusted[input_ind]:
                    # that means Einsum can only accept input in NHWC layout
                    # so the inserted transpose before the Einsum will convert the layout to NHWC
                    InsertTransposes.insert_transpose(graph, einsum.in_port(input_ind), before_input=True)
-            if not is_output_permuted:
+            if not is_output_adjusted:
                # that means Einsum can only generate output in NHWC layout
                # so the inserted transpose followed after the output will convert the layout back into NCHW layout
                InsertTransposes.insert_transpose(graph, einsum.out_port(0), before_input=False)
--- a/model-optimizer/extensions/ops/einsum.py
+++ b/model-optimizer/extensions/ops/einsum.py
@ -137,7 +137,8 @@ class Einsum(Op):
        return labels
    @staticmethod
-    def adjust_equation_with_NCHW_layout(node_name: str, equation: str, input_ranks: list, output_rank: int) -> (
+    def adjust_equation_with_NCHW_layout(node_name: str, equation: str, input_ranks: list, output_rank: int,
                                         input_correct_layout_mask: list, output_correct_layout_mask: bool) -> (
            str, list, bool):
        """
        In order to satisfy NCHW layout, subscripts for tensors with rank greater than three must be adjusted by moving labels
@ -151,11 +152,13 @@ class Einsum(Op):
        :param output_rank: output rank
        :return: adjusted equation, boolean mask for inputs, and boolean flag if output subscript is adjusted
        """
-        is_inputs_permuted = []
+        is_inputs_adjusted = []
        input_subscripts, output_subscript = Einsum.parse_equation(node_name, equation)
        num_inputs = len(input_ranks)
        assert len(input_subscripts) == num_inputs, "The number of inputs must match a number " \
                                                    "of input subscripts"
        assert len(input_correct_layout_mask) == num_inputs, "The number of inputs must match a number " \
                                                             "elements in input_correct_layout_mask list"
        # permute labels in input subscripts and mark inputs for which inference in NCHW layout is acceptable
        # in case ellipsis covering multiple dimensions in the end, the permutation is impossible
@ -166,31 +169,35 @@ class Einsum(Op):
            input_rank = input_ranks[input_ind]
            labels = Einsum.extract_subscript_labels(node_name, input_subscript)
            num_broadcasted_dims = input_rank - len(labels) + 1
-            if input_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
+            if input_correct_layout_mask[input_ind]:
-                is_inputs_permuted.append(True)
+                is_inputs_adjusted.append(True)
            elif input_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
                is_inputs_adjusted.append(True)
                labels.insert(1, labels[-1])
                del labels[-1]
            else:
-                is_inputs_permuted.append(False)
+                is_inputs_adjusted.append(False)
            permuted_input_subscript = ''.join(labels)
            permuted_input_subscripts.append(permuted_input_subscript)
        # perform the same procedure for the output subscript as for the inputs subscripts
        labels = Einsum.extract_subscript_labels(node_name, output_subscript)
        num_broadcasted_dims = output_rank - len(labels) + 1
-        if output_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
+        if output_correct_layout_mask:
-            is_output_permuted = True
+            is_output_adjusted = True
        elif output_rank > 3 and (labels[-1] != "..." or labels[-1] == "..." and num_broadcasted_dims == 1):
            is_output_adjusted = True
            labels.insert(1, labels[-1])
            del labels[-1]
        else:
-            is_output_permuted = False
+            is_output_adjusted = False
        permuted_output_subscript = ''.join(labels)
        # concatenate the left and right hands of the resulted equation
        left_hand = ','.join(permuted_input_subscripts)
        right_hand = permuted_output_subscript
        permuted_equation = left_hand + "->" + right_hand
-        return permuted_equation, is_inputs_permuted, is_output_permuted
+        return permuted_equation, is_inputs_adjusted, is_output_adjusted
    @staticmethod
    def infer(node: Node):
--- a/model-optimizer/mo/utils/custom_replacement_config.py
+++ b/model-optimizer/mo/utils/custom_replacement_config.py
@ -1,6 +1,7 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import fastjsonschema as json_validate
 import json
 import logging as log
 import os
@ -9,7 +10,8 @@ from re import compile, match
 from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes
-from mo.utils.utils import refer_to_faq_msg
+from mo.utils.json_schema import schema_dict
 from mo.utils.utils import get_mo_root_dir, refer_to_faq_msg
 class CustomReplacementDescriptor(object):
@ -297,12 +299,12 @@ class CustomReplacementDescriptorScope(CustomReplacementDescriptor):
                log.debug("Node {} doesn't have output edges. Consider it output".format(node_name))
                output_tensors.add((generate_pattern_for_node(graph, pattern, node_name), 0))
-        if not self.has('inputs'):
+        if not self.has('inputs') or len(self._replacement_desc['inputs']) == 0:
            self._replacement_desc['inputs'] = [[{'node': desc[0], 'port': desc[1]} for desc in inp]
                                                for inp in sorted(input_nodes_mapping.values())]
            log.debug('Updated inputs of sub-graph for instance "{}"'.format(self.instances))
-        if not self.has('outputs'):
+        if not self.has('outputs') or len(self._replacement_desc['outputs']) == 0:
            self._replacement_desc['outputs'] = [{'node': node, 'port': port} for node, port in sorted(output_tensors)]
            log.debug('Updated outputs of sub-graph for instance "{}"'.format(self.instances))
@ -342,13 +344,8 @@ def parse_custom_replacement_config_file(file_name: str):
    if not os.path.exists(file_name):
        raise Error("Custom replacements configuration file '{}' does not exist. ".format(file_name) +
                    refer_to_faq_msg(69))
    try:
        with open(file_name, 'r') as f:
            data = json.load(f)
    except Exception as exc:
        raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(file_name, exc) +
                    refer_to_faq_msg(70)) from exc
    data = load_and_validate_json_config(file_name)
    result = list()
    validation_errors = list()
    for attrs in data:
@ -394,3 +391,22 @@ def generate_pattern_for_node(graph: Graph, sub_graph_pattern: str, node_name: s
    raise RuntimeError('The pattern that uniquely identifies node "{}" using sub-graph pattern "{}" has not been found'.
                       format(node_name, sub_graph_pattern))
 def load_and_validate_json_config(config_file_name: str):
    """
    Reads and validate custom replacement configuration file config_file_name.
    :param config_file_name: name of the file to read from.
    :return: A dictionary serialized from json config file.
    """
    try:
        with open(config_file_name, 'r') as f:
            json_config = json.load(f)
            validator = json_validate.compile(schema_dict)
            validator(json_config)
    except Exception as e:
        raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(config_file_name, e) +
                    refer_to_faq_msg(70)) from e
    return json_config
--- a/model-optimizer/mo/utils/json_schema.py
+++ b/model-optimizer/mo/utils/json_schema.py
@ -0,0 +1,129 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 schema_dict = {
    "definitions": {},
    "$schema": "http://json-schema.org/draft-07/schema#",
    "title": "Root",
    "type": "array",
    "default": [],
    "items": {
        "$id": "#root/items",
        "title": "Items",
        "type": "object",
        "required": [
            "id",
            "match_kind"
        ],
        "properties": {
            "custom_attributes": {
                "$id": "#root/items/custom_attributes",
                "title": "Custom_attributes",
                "type": "object",
                "properties": {
                }
            },
            "id": {
                "$id": "#root/items/id",
                "title": "Id",
                "type": "string",
                "pattern": "^.*$",
                "minLength": 1
            },
            "inputs": {
                "$id": "#root/items/inputs",
                "title": "Inputs",
                "type": "array",
                "default": [],
                "items": {
                    "$id": "#root/items/inputs/items",
                    "title": "Items",
                    "type": "array",
                    "default": [],
                    "items": {
                        "$id": "#root/items/inputs/items/items",
                        "title": "Items",
                        "type": "object",
                        "properties": {
                            "node": {
                                "$id": "#root/items/inputs/items/items/node",
                                "title": "Node",
                                "type": "string",
                                "default": "",
                                "pattern": "^.*$"
                            },
                            "port": {
                                "$id": "#root/items/inputs/items/items/port",
                                "title": "Port",
                                "type": "integer",
                                "default": 0
                            }
                        },
                        "required": ["node", "port"]
                    }
                }
            },
            "instances": {
                "$id": "#root/items/instances",
                "title": "Instances",
                "type": ["array", "object"],
                "items": {
                    "$id": "#root/items/instances/items",
                    "title": "Items",
                    "type": "string",
                    "default": "",
                    "pattern": "^.*$"
                }
            },
            "match_kind": {
                "$id": "#root/items/match_kind",
                "title": "Match_kind",
                "type": "string",
                "enum": ["points", "scope", "general"],
                "default": "points",
                "pattern": "^.*$"
            },
            "outputs": {
                "$id": "#root/items/outputs",
                "title": "Outputs",
                "type": "array",
                "default": [],
                "items": {
                    "$id": "#root/items/outputs/items",
                    "title": "Items",
                    "type": "object",
                    "properties": {
                        "node": {
                            "$id": "#root/items/outputs/items/node",
                            "title": "Node",
                            "type": "string",
                            "default": "",
                            "pattern": "^.*$"
                        },
                        "port": {
                            "$id": "#root/items/outputs/items/port",
                            "title": "Port",
                            "type": "integer",
                            "default": 0
                        }
                    },
                    "required": ["node", "port"]
                }
            },
            "include_inputs_to_sub_graph": {
                "$id": "#root/items/include_inputs_to_sub_graph",
                "title": "Include_inputs_to_sub_graph",
                "type": "boolean",
                "default": False
            },
            "include_outputs_to_sub_graph": {
                "$id": "#root/items/include_outputs_to_sub_graph",
                "title": "Include_outputs_to_sub_graph",
                "type": "boolean",
                "default": False
            }
        }
    }
 }
--- a/model-optimizer/mo/utils/summarize_graph.py
+++ b/model-optimizer/mo/utils/summarize_graph.py
@ -70,9 +70,10 @@ if __name__ == "__main__":  # pragma: no cover
    if argv.input_model and argv.saved_model_dir:
        print("[ ERROR ] Both keys were provided --input_model and --input_dir. Please, provide only one of them")
        sys.exit(1)
-    graph_def, _ = load_tf_graph_def(graph_file_name=argv.input_model, is_binary=not argv.text,
+    tags = argv.saved_model_tags.split(",")
-                                     checkpoint=argv.input_checkpoint,
+    graph_def, _, _ = load_tf_graph_def(graph_file_name=argv.input_model, is_binary=not argv.text,
-                                     model_dir=argv.saved_model_dir, saved_model_tags=argv.saved_model_tags)
+                                        checkpoint=argv.input_checkpoint,
                                        model_dir=argv.saved_model_dir, saved_model_tags=tags)
    summary = summarize_graph(graph_def)
    print("{} input(s) detected:".format(len(summary['inputs'])))
    for input in summary['inputs']:
--- a/model-optimizer/requirements.txt
+++ b/model-optimizer/requirements.txt
@ -8,3 +8,4 @@ onnx>=1.8.1
 defusedxml>=0.7.1
 urllib3>=1.26.4
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_caffe.txt
+++ b/model-optimizer/requirements_caffe.txt
@ -3,3 +3,4 @@ numpy>=1.16.6,<1.20
 protobuf>=3.15.6
 defusedxml>=0.7.1
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_dev.txt
+++ b/model-optimizer/requirements_dev.txt
@ -6,3 +6,4 @@ test-generator==0.1.1
 defusedxml>=0.5.0
 requests>=2.20.0
 pytest>=6.2.4
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_kaldi.txt
+++ b/model-optimizer/requirements_kaldi.txt
@ -2,3 +2,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_mxnet.txt
+++ b/model-optimizer/requirements_mxnet.txt
@ -5,3 +5,4 @@ numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 urllib3>=1.26.4
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_onnx.txt
+++ b/model-optimizer/requirements_onnx.txt
@ -3,3 +3,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_tf.txt
+++ b/model-optimizer/requirements_tf.txt
@ -4,3 +4,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.19
 defusedxml>=0.7.1
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/requirements_tf2.txt
+++ b/model-optimizer/requirements_tf2.txt
@ -3,3 +3,4 @@ networkx~=2.5
 numpy>=1.16.6,<1.20
 defusedxml>=0.7.1
 requests>=2.25.1
 fastjsonschema~=2.15.1
--- a/model-optimizer/unit_tests/extensions/middle/LayoutChangeForEinsum_test.py
+++ b/model-optimizer/unit_tests/extensions/middle/LayoutChangeForEinsum_test.py
@ -5,7 +5,7 @@ import unittest
 import numpy as np
-from extensions.back.LayoutChangeForEinsum import LayoutChangeForEinsum
+from extensions.middle.LayoutChangeForEinsum import LayoutChangeForEinsum
 from mo.front.common.partial_infer.utils import int64_array
 from mo.utils.ir_engine.compare_graphs import compare_graphs
 from unit_tests.utils.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect
@ -47,7 +47,7 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
                                # this input does not require additional transpose
                                # since the corresponding subscript can be adjusted
                                'placeholder_2_d': {'shape': np.array([3, 8, 5, 7])},
-                                # [3, 5, 10, 12] - NHWC, [3, 12, 5, 10] - NCHW
+                                # [3, 8, 10, 12] - NHWC, [3, 12, 8, 10] - NCHW
                                # the third input must be transposed to NHWC layout
                                # since ellipsis covers multiple dimensions in the end
                                # the corresponding subscript is not changed
@ -60,7 +60,7 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
                                # and additional transpose to NCHW will be inserted
                                'einsum_d': {'shape': np.array([2, 12, 7, 8, 10])},
                            }, nodes_with_edges_only=True)
-        graph.graph['fw'] = 'tf'
+        graph.graph['layout'] = 'NHWC'
        graph_ref = build_graph(nodes_attributes,
                                [*connect('placeholder_3', '0:transpose_1'),
@ -80,3 +80,46 @@ class LayoutChangeForEinsumTests(unittest.TestCase):
        LayoutChangeForEinsum().find_and_replace_pattern(graph)
        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
        self.assertTrue(flag, resp)
    def test_no_adjustment_layout_einsum(self):
        graph = build_graph(nodes_attributes,
                            [*connect('placeholder_1', '0:einsum'),
                             *connect('placeholder_2', '1:einsum'),
                             *connect('placeholder_3', '2:einsum'),
                             *connect('einsum', 'output')],
                            {  # this input stays as is since it is of a rank equal to 3
                                'placeholder_1_d': {'shape': np.array([2, 3, 5])},
                                # [3, 5, 7, 8] - NHWC
                                # this input does not require additional transpose
                                # since the corresponding layout is correct
                                'placeholder_2_d': {'shape': np.array([3, 5, 7, 8])},
                                # [3, 8, 10, 12] - NHWC
                                # this input does not require additional transpose
                                # since the corresponding layout is correct
                                'placeholder_3_d': {'shape': np.array([3, 8, 10, 12])},
                                # equation is still for NHWC layout
                                'einsum': {'equation': "abc,bcde,bc...->ade...",
                                           'correct_in_data_layout': [0, 1, 2],
                                           'correct_out_data_layout': [0]},
                                # [2, 7, 8, 10, 12] - NHWC
                                # this output does not require additional transpose
                                # since the corresponding layout is correct
                                'einsum_d': {'shape': np.array([2, 7, 8, 10, 12])},
                            }, nodes_with_edges_only=True)
        graph.graph['layout'] = 'NHWC'
        graph_ref = build_graph(nodes_attributes,
                                [*connect('placeholder_1', '0:einsum'),
                                 *connect('placeholder_2', '1:einsum'),
                                 *connect('placeholder_3', '2:einsum'),
                                 *connect('einsum', 'output')],
                                {'placeholder_1_d': {'shape': np.array([2, 3, 5])},
                                 'placeholder_2_d': {'shape': np.array([3, 5, 7, 8])},
                                 'placeholder_3_d': {'shape': np.array([3, 8, 10, 12])},
                                 'einsum': {'equation': "abc,bcde,bc...->ade..."},
                                 'einsum_d': {'shape': np.array([2, 7, 8, 10, 12])}
                                 })
        LayoutChangeForEinsum().find_and_replace_pattern(graph)
        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
        self.assertTrue(flag, resp)
--- a/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
+++ b/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
@ -28,7 +28,7 @@ class TestNoInferenceEngine(unittest.TestCase):
 def test_frontends():
    setup_env()
    args = [sys.executable, '-m', 'pytest',
-            'frontend_ngraph_test_actual.py', '-s']
+            os.path.join(os.path.dirname(__file__), 'frontend_ngraph_test_actual.py'), '-s']
    status = subprocess.run(args, env=os.environ)
    assert not status.returncode
@ -37,7 +37,7 @@ def test_frontends():
 def test_main_test():
    setup_env()
    args = [sys.executable, '-m', 'pytest',
-            'main_test_actual.py', '-s']
+            os.path.join(os.path.dirname(__file__), 'main_test_actual.py'), '-s']
    status = subprocess.run(args, env=os.environ)
    assert not status.returncode
--- a/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py
+++ b/model-optimizer/unit_tests/mo/utils/custom_replacement_config_test.py
@ -0,0 +1,40 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 import os
 import unittest
 from fnmatch import fnmatch
 from generator import generator, generate
 from mo.utils.custom_replacement_config import load_and_validate_json_config
 from mo.utils.error import Error
 from mo.utils.utils import get_mo_root_dir
 def get_json_configs(mo_root_dir):
    config_path = os.path.join(mo_root_dir, 'extensions', 'front')
    pattern = "*.json"
    config_files_list = []
    for path, subdirs, files in os.walk(config_path):
        for name in files:
            if fnmatch(name, pattern):
                config_files_list.append((os.path.join(path, name),))
    return config_files_list
@generator
 class TestSchema(unittest.TestCase):
    base_dir = get_mo_root_dir()
    schema_file = os.path.join(base_dir, 'mo', 'utils', 'schema.json')
    transformation_configs = get_json_configs(base_dir)
    test_json1 = '[{"id": "", "match_kind": "general", "custom_attributes": {}}]'
    test_json2 = '[{"id": "someid", "match_kind": "abc", "custom_attributes": {}}]'
    @generate(*transformation_configs)
    def test_schema_file(self, transformation_config):
        self.assertTrue(load_and_validate_json_config(transformation_config))
    def test_schema_id_empty(self):
        self.assertRaises(Error, load_and_validate_json_config, self.test_json1)
    def test_schema_match_kind_wrong(self):
        self.assertRaises(Error, load_and_validate_json_config, self.test_json2)
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/CMakeLists.txt
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/CMakeLists.txt
@ -15,7 +15,13 @@ add_library(${TARGET_FE_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS})
 target_include_directories(${TARGET_FE_NAME} PRIVATE ".")
-target_link_libraries(${TARGET_FE_NAME} PRIVATE frontend_manager)
+target_link_libraries(${TARGET_FE_NAME} PRIVATE ngraph::frontend_manager::static)
 target_link_libraries(${TARGET_FE_NAME} PUBLIC ngraph PRIVATE ngraph::builder)
 add_clang_format_target(${TARGET_FE_NAME}_clang FOR_TARGETS ${TARGET_FE_NAME})
 set(NGRAPH_INSTALL_LIB "deployment_tools/ngraph/lib")
 install(TARGETS ${TARGET_FE_NAME}
        RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL
        LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL)
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/CMakeLists.txt
@ -41,3 +41,7 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY_OLD})
 set(CMAKE_PDB_OUTPUT_DIRECTORY ${CMAKE_PDB_OUTPUT_DIRECTORY_OLD})
 install(TARGETS ${PYBIND_FE_NAME}
        DESTINATION python/${PYTHON_VERSION}
        COMPONENT tests EXCLUDE_FROM_ALL)
--- a/ngraph/core/include/ngraph/op/acosh.hpp
+++ b/ngraph/core/include/ngraph/op/acosh.hpp
@ -19,8 +19,8 @@ namespace ngraph
            class NGRAPH_API Acosh : public util::UnaryElementwiseArithmetic
            {
            public:
-                static constexpr NodeTypeInfo type_info{"Acosh", 3};
+                NGRAPH_RTTI_DECLARATION;
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+
                /// \brief Constructs an Acosh operation.
                Acosh() = default;
                /// \brief Constructs an Acosh operation.
--- a/ngraph/core/include/ngraph/op/mvn.hpp
+++ b/ngraph/core/include/ngraph/op/mvn.hpp
@ -69,7 +69,7 @@ namespace ngraph
                void set_reduction_axes(AxisSet axes) { m_reduction_axes = axes; }
            private:
-                double m_eps = 1e-9;
+                double m_eps;
                bool m_across_channels;
                bool m_normalize_variance;
                AxisSet m_reduction_axes;
@ -128,9 +128,9 @@ namespace ngraph
                MVNEpsMode get_eps_mode() const { return m_eps_mode; }
            private:
-                bool m_normalize_variance = true;
+                bool m_normalize_variance;
-                float m_eps = (float)1e-6;
+                float m_eps;
-                MVNEpsMode m_eps_mode = MVNEpsMode::INSIDE_SQRT;
+                MVNEpsMode m_eps_mode;
            };
        } // namespace v6
    }     // namespace op
--- a/ngraph/core/include/ngraph/op/tensor_iterator.hpp
+++ b/ngraph/core/include/ngraph/op/tensor_iterator.hpp
@ -30,13 +30,11 @@ namespace ngraph
                std::shared_ptr<Node>
                    clone_with_new_inputs(const OutputVector& new_args) const override;
                /// \return the body of the iteration
-                std::shared_ptr<Function> get_body() const { return m_body; }
+                std::shared_ptr<Function> get_body() const { return m_bodies[0]; }
                /// \param body set the body of the iteration
-                void set_body(const std::shared_ptr<Function>& body) { m_body = body; }
+                void set_body(const std::shared_ptr<Function>& body) { set_function(body); }
                void validate_and_infer_types() override;
                void revalidate_and_infer_types_for_body_ops();
                /// \return the body of the iteration
                std::shared_ptr<Function> get_function() override;
            private:
                void try_to_set_num_iterations_if_no_slice_inputs();
--- a/ngraph/core/include/ngraph/op/util/multi_subgraph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/multi_subgraph_base.hpp
@ -0,0 +1,366 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <ngraph/op/parameter.hpp>
 #include "ngraph/op/op.hpp"
 namespace ngraph
 {
    namespace op
    {
        namespace util
        {
            /// \brief Abstract base class for sub-graph based ops, i.e ops that have some
            /// sub-graphs
            ///
            class NGRAPH_API MultiSubGraphOp : public Op
            {
            public:
                NGRAPH_RTTI_DECLARATION;
                /// \brief Abstract class describes a connection between a MultiSubGraphOp input and
                /// the body.
                class InputDescription
                {
                protected:
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the MultiSubGraphOp input
                    /// \param      body_parameter_index  Body parameter to receive input
                    ///
                    InputDescription(uint64_t input_index, uint64_t body_parameter_index);
                    InputDescription() = default;
                public:
                    using type_info_t = DiscreteTypeInfo;
                    virtual ~InputDescription() = default;
                    virtual std::shared_ptr<InputDescription> copy() const = 0;
                    virtual const type_info_t& get_type_info() const = 0;
                    uint64_t m_input_index{0};
                    uint64_t m_body_parameter_index{0};
                };
                /// \brief Abstract class describes how a MultiSubGraphOp output is produced from
                /// the body.
                class OutputDescription
                {
                protected:
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The MultiSubGraphOp output index
                    ///
                    OutputDescription(uint64_t body_value_index, uint64_t output_index);
                    OutputDescription() = default;
                public:
                    using type_info_t = DiscreteTypeInfo;
                    virtual ~OutputDescription() = default;
                    virtual std::shared_ptr<OutputDescription> copy() const = 0;
                    virtual const type_info_t& get_type_info() const = 0;
                    uint64_t m_body_value_index{0};
                    uint64_t m_output_index{0};
                };
                ///
                /// \brief      Describes a body input formed from slices of an input to
                ///             MultiSubGraphOp.
                ///
                class NGRAPH_API SliceInputDescription : public InputDescription
                {
                public:
                    NGRAPH_RTTI_DECLARATION;
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the MultiSubGraphOp input
                    /// \param      body_parameter_index  Body parameter position to receive input
                    /// \param      start                 First index for slices
                    /// \param      stride                Step amount for slices
                    /// \param      part_size             Width of slices
                    /// \param      end                   Last index for slices
                    /// \param      axis                  Axis being sliced
                    ///
                    SliceInputDescription(uint64_t input_index,
                                          uint64_t body_parameter_index,
                                          int64_t start,
                                          int64_t stride,
                                          int64_t part_size,
                                          int64_t end,
                                          int64_t axis);
                    SliceInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                    int64_t m_start{0};
                    int64_t m_stride{0};
                    int64_t m_part_size{0};
                    int64_t m_end{0};
                    int64_t m_axis{0};
                };
                ///
                /// \brief      Describes a body input initialized from a MultiSubGraphOp input
                ///             on the first iteration, and then a body output thereafter.
                ///
                class NGRAPH_API MergedInputDescription : public InputDescription
                {
                public:
                    NGRAPH_RTTI_DECLARATION;
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the MultiSubGraphOp input
                    ///                                   supplying a value to body_parameter for
                    ///                                   the initial iteration.
                    /// \param      body_parameter_index  Body parameter position to receive input.
                    /// \param      body_value_index      Body value to supply body_parameter for
                    /// successive
                    ///                                   iterations.
                    ///
                    MergedInputDescription(uint64_t input_index,
                                           uint64_t body_parameter_index,
                                           uint64_t body_value_index);
                    MergedInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                    uint64_t m_body_value_index{0};
                };
                /// \brief Produces an output by concatenating an output from each iteration
                class NGRAPH_API ConcatOutputDescription : public OutputDescription
                {
                public:
                    NGRAPH_RTTI_DECLARATION;
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The MultiSubGraphOp output index
                    /// \param      start             First index for slices
                    /// \param      stride            Step amount for slices
                    /// \param      part_size         Width of slices
                    /// \param      end               Last index for slices
                    /// \param      axis              Axis being sliced
                    ///
                    ConcatOutputDescription(uint64_t body_value_index,
                                            uint64_t output_index,
                                            int64_t start,
                                            int64_t stride,
                                            int64_t part_size,
                                            int64_t end,
                                            int64_t axis);
                    ConcatOutputDescription() = default;
                    std::shared_ptr<OutputDescription> copy() const override;
                    int64_t m_start{0};
                    int64_t m_stride{0};
                    int64_t m_part_size{0};
                    int64_t m_end{0};
                    int64_t m_axis{0};
                };
                /// \brief Produces an input
                class NGRAPH_API InvariantInputDescription : public InputDescription
                {
                public:
                    NGRAPH_RTTI_DECLARATION;
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the MultiSubGraphOp input
                    /// \param      body_parameter_index  Body parameter to receive input
                    ///
                    InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
                    InvariantInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                };
                /// \brief Produces an output from a specific iteration
                class NGRAPH_API BodyOutputDescription : public MultiSubGraphOp::OutputDescription
                {
                public:
                    NGRAPH_RTTI_DECLARATION;
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The SubGraphOp output index
                    /// \param      iteration         which iteration (typically -1, final) will
                    ///                               supply the value
                    ///
                    BodyOutputDescription(uint64_t body_value_index,
                                          uint64_t output_index,
                                          int64_t iteration = -1);
                    BodyOutputDescription() = default;
                    std::shared_ptr<MultiSubGraphOp::OutputDescription> copy() const override;
                    int64_t m_iteration{0};
                };
                using MultiSubgraphInputDescriptionPtr =
                    std::shared_ptr<MultiSubGraphOp::InputDescription>;
                using MultiSubgraphOutputDescriptionPtr =
                    std::shared_ptr<MultiSubGraphOp::OutputDescription>;
                using MultiSubgraphInputDescriptionVector =
                    std::vector<MultiSubgraphInputDescriptionPtr>;
                using MultiSubgraphOutputDescriptionVector =
                    std::vector<MultiSubgraphOutputDescriptionPtr>;
                /// \brief     Gets internal sub-graph by index in MultiSubGraphOp
                ///
                /// \param     index sub-graph's index in op
                /// \return pointer to ngraph::Function with sub-graph
                virtual const std::shared_ptr<Function>& get_function(int index) const
                {
                    return m_bodies[index];
                };
                /// \brief     Adds sub-graph to MultiSubGraphOp
                ///
                /// \param index   index of new sub-graph
                /// \param func    func new sub_graph as ngraph::Function
                virtual void set_function(int index, const std::shared_ptr<Function>& func)
                {
                    m_bodies[index] = func;
                }
                /// \brief     Gets vector with connections beewtwen operation inputs
                /// and internal sub-graph parameters
                ///
                /// \param index   index of internal sub-graph
                /// \return vector of input descriptions
                const MultiSubgraphInputDescriptionVector& get_input_descriptions(int index) const
                {
                    return m_input_descriptions[index];
                }
                /// \brief     Gets vector with connections beewtwen operation inputs
                /// and internal sub-graph parameters
                ///
                /// \param index   index of internal sub-graph
                /// \return vector of input descriptions
                MultiSubgraphInputDescriptionVector& get_input_descriptions(int index)
                {
                    return m_input_descriptions[index];
                }
                /// \brief     Gets vector with connections beewtwen operation outputs
                /// and internal sub-graph results
                ///
                /// \param index   index of internal sub-graph
                /// \return vector of output descriptions
                const MultiSubgraphOutputDescriptionVector& get_output_descriptions(int index) const
                {
                    return m_output_descriptions[index];
                }
                /// \brief     Gets vector with connections beewtwen operation outputs
                /// and internal sub-graph results
                ///
                /// \param index   index of internal sub-graph
                /// \return vector of output descriptions
                MultiSubgraphOutputDescriptionVector& get_output_descriptions(int index)
                {
                    return m_output_descriptions[index];
                }
                /// \brief     Sets vector with connections beewtwen operation inputs
                /// and internal sub-graph parameters
                ///
                /// \param index   index of internal sub-graph
                /// \param inputs  vector of input descriptions
                void set_input_descriptions(int index,
                                            const MultiSubgraphInputDescriptionVector& inputs)
                {
                    m_input_descriptions[index] = inputs;
                }
                /// \brief     Sets vector with connections beewtwen operation outputs
                /// and internal sub-graph results
                ///
                /// \param index   index of internal sub-graph
                /// \param outputs vector of input descriptions
                void set_output_descriptions(int index,
                                             const MultiSubgraphOutputDescriptionVector& outputs)
                {
                    m_output_descriptions[index] = outputs;
                }
                ///
                /// \brief     Set input decriptions for MultiSubGraphOp input.
                ///
                /// \param      value              The value supplied as an input to the block.
                /// \param      bodies_parameters  vector of bodies parameters.
                virtual void set_invariant_inputs(const Output<Node>& value,
                                                  const ParameterVector& bodies_parameters);
                ///
                /// \brief     Set output decriptions for MultiSubGraphOp output.
                ///
                /// \param      bodies_results  vector of bodies results for one output.
                /// \return     value           Output node for bodies_results.
                virtual Output<Node> set_body_outputs(const ResultVector& bodies_results);
                MultiSubGraphOp(const MultiSubGraphOp&) = delete;
                MultiSubGraphOp(MultiSubGraphOp&&) = default;
                MultiSubGraphOp& operator=(const MultiSubGraphOp&) = delete;
                MultiSubGraphOp& operator=(MultiSubGraphOp&&) = default;
            protected:
                // Find an input corresponding to value, adding one if necessary.
                Input<Node> input_for_value(const Output<Node>& value);
                MultiSubGraphOp(size_t number_of_bodies);
                MultiSubGraphOp() = default;
                MultiSubGraphOp(const OutputVector& args, size_t number_of_bodies);
                explicit MultiSubGraphOp(const OutputVector& args);
                std::vector<std::shared_ptr<Function>> m_bodies;
                std::vector<MultiSubgraphInputDescriptionVector> m_input_descriptions;
                std::vector<MultiSubgraphOutputDescriptionVector> m_output_descriptions;
            };
            using MultiSubgraphInputDescriptionPtr =
                util::MultiSubGraphOp::MultiSubgraphInputDescriptionPtr;
            using MultiSubgraphOutputDescriptionPtr =
                util::MultiSubGraphOp::MultiSubgraphOutputDescriptionPtr;
            using MultiSubgraphInputDescriptionVector =
                util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector;
            using MultiSubgraphOutputDescriptionVector =
                util::MultiSubGraphOp::MultiSubgraphOutputDescriptionVector;
        } // namespace util
    }     // namespace op
    template <>
    class NGRAPH_API AttributeAdapter<
        std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>
        : public DirectValueAccessor<
              std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>
    {
    public:
        AttributeAdapter(
            std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>&
                value)
            : DirectValueAccessor<std::vector<
                  std::shared_ptr<ngraph::op::util::MultiSubGraphOp::InputDescription>>>(value)
        {
        }
        NGRAPH_RTTI_DECLARATION;
    };
    template <>
    class NGRAPH_API AttributeAdapter<
        std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>
        : public DirectValueAccessor<
              std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>
    {
    public:
        AttributeAdapter(
            std::vector<std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>&
                value)
            : DirectValueAccessor<std::vector<
                  std::shared_ptr<ngraph::op::util::MultiSubGraphOp::OutputDescription>>>(value)
        {
        }
        NGRAPH_RTTI_DECLARATION;
    };
 } // namespace ngraph
--- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
@ -5,7 +5,7 @@
 #pragma once
 #include <ngraph/op/parameter.hpp>
-#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/multi_subgraph_base.hpp"
 namespace ngraph
 {
@ -13,226 +13,46 @@ namespace ngraph
    {
        namespace util
        {
-            /// \brief Abstract base class for sub-graph based ops, i.e ops that have sub-graph
+            /// \brief Abstract base class for sub-graph based ops, i.e ops that have only one
            /// sub-graph
            ///
-            class NGRAPH_API SubGraphOp : public Op
+            class NGRAPH_API SubGraphOp : public MultiSubGraphOp
            {
            public:
                NGRAPH_RTTI_DECLARATION;
-                /// \brief Describes a connection between a SubGraphOp input and the body.
+
-                class InputDescription
+                virtual const std::shared_ptr<Function>& get_function() const
                {
-                protected:
+                    return m_bodies[0];
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the SubGraphOp input
                    /// \param      body_parameter_index  Body parameter to receive input
                    ///
                    InputDescription(uint64_t input_index, uint64_t body_parameter_index);
                    InputDescription() = default;
                public:
                    using type_info_t = DiscreteTypeInfo;
                    virtual ~InputDescription() = default;
                    virtual std::shared_ptr<InputDescription> copy() const = 0;
                    virtual const type_info_t& get_type_info() const = 0;
                    uint64_t m_input_index{0};
                    uint64_t m_body_parameter_index{0};
                };
-
+                virtual void set_function(const std::shared_ptr<Function>& func)
                ///
                /// \brief      Describes a body input formed from slices of an input to
                ///             SubGraphOp.
                ///
                class NGRAPH_API SliceInputDescription : public InputDescription
                {
-                public:
+                    m_bodies[0] = func;
                    static constexpr type_info_t type_info{"SliceInputDescription", 0};
                    const type_info_t& get_type_info() const override { return type_info; }
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the SubGraphOp input
                    /// \param      body_parameter_index  Body parameter position to receive input
                    /// \param      start                 First index for slices
                    /// \param      stride                Step amount for slices
                    /// \param      part_size             Width of slices
                    /// \param      end                   Last index for slices
                    /// \param      axis                  Axis being sliced
                    ///
                    SliceInputDescription(uint64_t input_index,
                                          uint64_t body_parameter_index,
                                          int64_t start,
                                          int64_t stride,
                                          int64_t part_size,
                                          int64_t end,
                                          int64_t axis);
                    SliceInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                    int64_t m_start{0};
                    int64_t m_stride{0};
                    int64_t m_part_size{0};
                    int64_t m_end{0};
                    int64_t m_axis{0};
                };
                ///
                /// \brief      Describes a body input initialized from a SubGraphOp input on
                ///             the first iteration, and then a body output thereafter.
                ///
                class NGRAPH_API MergedInputDescription : public InputDescription
                {
                public:
                    static constexpr type_info_t type_info{"MergedInputDescription", 0};
                    const type_info_t& get_type_info() const override { return type_info; }
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the SubGraphOp input
                    ///                                   supplying a value to body_parameter for
                    ///                                   the initial iteration.
                    /// \param      body_parameter_index  Body parameter position to receive input.
                    /// \param      body_value_index      Body value to supply body_parameter for
                    /// successive
                    ///                                   iterations.
                    ///
                    MergedInputDescription(uint64_t input_index,
                                           uint64_t body_parameter_index,
                                           uint64_t body_value_index);
                    MergedInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                    uint64_t m_body_value_index{0};
                };
                ///
                /// \brief      Describes a body input initialized from a SubGraphOp input on
                ///             the first iteration, and invariant thereafter.
                ///
                class NGRAPH_API InvariantInputDescription : public InputDescription
                {
                public:
                    static constexpr type_info_t type_info{"InvariantInputDescription", 0};
                    const type_info_t& get_type_info() const override { return type_info; }
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      input_index           Position of the SubGraphOp input
                    /// \param      body_parameter_index  Body parameter to receive input
                    ///
                    InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
                    InvariantInputDescription() = default;
                    std::shared_ptr<InputDescription> copy() const override;
                };
                /// \brief Describes how a SubGraphOp output is produced from the body.
                class OutputDescription
                {
                protected:
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The SubGraphOp output index
                    ///
                    OutputDescription(uint64_t body_value_index, uint64_t output_index);
                    OutputDescription() = default;
                public:
                    using type_info_t = DiscreteTypeInfo;
                    virtual ~OutputDescription() = default;
                    virtual std::shared_ptr<OutputDescription> copy() const = 0;
                    virtual const type_info_t& get_type_info() const = 0;
                    uint64_t m_body_value_index{0};
                    uint64_t m_output_index{0};
                };
                /// \brief Produces an output by concatenating an output from each iteration
                class NGRAPH_API ConcatOutputDescription : public OutputDescription
                {
                public:
                    static constexpr type_info_t type_info{"ConcatOutputDescription", 0};
                    const type_info_t& get_type_info() const override { return type_info; }
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The SubGraphOp output index
                    /// \param      start             First index for slices
                    /// \param      stride            Step amount for slices
                    /// \param      part_size         Width of slices
                    /// \param      end               Last index for slices
                    /// \param      axis              Axis being sliced
                    ///
                    ConcatOutputDescription(uint64_t body_value_index,
                                            uint64_t output_index,
                                            int64_t start,
                                            int64_t stride,
                                            int64_t part_size,
                                            int64_t end,
                                            int64_t axis);
                    ConcatOutputDescription() = default;
                    std::shared_ptr<OutputDescription> copy() const override;
                    int64_t m_start{0};
                    int64_t m_stride{0};
                    int64_t m_part_size{0};
                    int64_t m_end{0};
                    int64_t m_axis{0};
                };
                /// \brief Produces an output from a specific iteration
                class NGRAPH_API BodyOutputDescription : public OutputDescription
                {
                public:
                    static constexpr type_info_t type_info{"BodyOutputDescription", 0};
                    const type_info_t& get_type_info() const override { return type_info; }
                    ///
                    /// \brief      Constructs a new instance.
                    ///
                    /// \param      body_value_index  A body value that produces the output
                    /// \param      output_index      The SubGraphOp output index
                    /// \param      iteration         which iteration (typically -1, final) will
                    ///                               supply the value
                    ///
                    BodyOutputDescription(uint64_t body_value_index,
                                          uint64_t output_index,
                                          int64_t iteration);
                    BodyOutputDescription() = default;
                    std::shared_ptr<OutputDescription> copy() const override;
                    int64_t m_iteration{0};
                };
                virtual std::shared_ptr<Function> get_function() { return m_body; };
                virtual std::shared_ptr<const Function> get_function() const { return m_body; };
                virtual void set_function(const std::shared_ptr<Function>& func) { m_body = func; };
                /// \return a reference to the input descriptions.
                const std::vector<std::shared_ptr<InputDescription>>& get_input_descriptions() const
                {
-                    return m_input_descriptions;
+                    return m_input_descriptions[0];
                }
                /// \return a reference to the input descriptions. Can add input descriptions
                /// before
                /// validation.
                std::vector<std::shared_ptr<InputDescription>>& get_input_descriptions()
                {
-                    return m_input_descriptions;
+                    return m_input_descriptions[0];
                }
                /// \return a reference to the output descriptions.
                const std::vector<std::shared_ptr<OutputDescription>>&
                    get_output_descriptions() const
                {
-                    return m_output_descriptions;
+                    return m_output_descriptions[0];
                }
                /// \return a reference to the output descriptions. Can add output descriptions
                /// before
                /// validation.
                std::vector<std::shared_ptr<OutputDescription>>& get_output_descriptions()
                {
-                    return m_output_descriptions;
+                    return m_output_descriptions[0];
                }
                ///
@ -324,15 +144,13 @@ namespace ngraph
                // Find an input corresponding to value, adding one if necessary.
                Input<Node> input_for_value(const Output<Node>& value);
-                SubGraphOp() = default;
+                SubGraphOp();
                explicit SubGraphOp(const OutputVector& args);
-                std::shared_ptr<Function> m_body;
+            private:
-                std::vector<std::shared_ptr<op::util::SubGraphOp::InputDescription>>
+                using MultiSubGraphOp::get_function;
-                    m_input_descriptions;
+
-                std::vector<std::shared_ptr<op::util::SubGraphOp::OutputDescription>>
+                using MultiSubGraphOp::set_function;
                    m_output_descriptions;
            };
            using InputDescriptionPtr = std::shared_ptr<util::SubGraphOp::InputDescription>;
            using OutputDescriptionPtr = std::shared_ptr<util::SubGraphOp::OutputDescription>;
@ -341,47 +159,4 @@ namespace ngraph
        } // namespace util
    }     // namespace op
    template <>
    class NGRAPH_API AttributeAdapter<
        std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>
        : public DirectValueAccessor<
              std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>
    {
    public:
        AttributeAdapter(
            std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>& value)
            : DirectValueAccessor<
                  std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>(
                  value)
        {
        }
        static constexpr DiscreteTypeInfo type_info{
            "AttributeAdapter<std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::"
            "InputDescription>>>",
            0};
        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
    };
    template <>
    class NGRAPH_API AttributeAdapter<
        std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>
        : public DirectValueAccessor<
              std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>
    {
    public:
        AttributeAdapter(
            std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>& value)
            : DirectValueAccessor<
                  std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>(
                  value)
        {
        }
        static constexpr DiscreteTypeInfo type_info{
            "AttributeAdapter<std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::"
            "OutputDescription>>>",
            0};
        const DiscreteTypeInfo& get_type_info() const override { return type_info; }
    };
 } // namespace ngraph
--- a/ngraph/core/reference/include/ngraph/runtime/reference/acosh.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/acosh.hpp
@ -13,7 +13,8 @@ namespace ngraph
    {
        namespace reference
        {
-            template <typename T>
+            template <typename T,
                      typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
            void acosh(const T* arg, T* out, size_t count)
            {
                for (size_t i = 0; i < count; i++)
@ -21,6 +22,16 @@ namespace ngraph
                    out[i] = std::acosh(arg[i]);
                }
            }
            template <typename T,
                      typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
            void acosh(const T* arg, T* out, size_t count)
            {
                for (size_t i = 0; i < count; i++)
                {
                    out[i] = std::roundl(std::acosh(arg[i]));
                }
            }
        } // namespace reference
    }     // namespace runtime
 } // namespace ngraph
--- a/Show More
+++ b/Show More