[GNA] Insert identity using ngraph transformation (#13317)

* [GNA] Insert identity using ngraph transformation * Update src/plugins/intel_gna/src/ops/identity.hpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Update src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.hpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Update src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.cpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Update src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Rewrites pass with Identity insertion using recusrive function. Adds test for Split. Adds comments * Change namespace for element type Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
2022-10-19 21:27:55 +04:00
parent 2455bb67d4
commit a4655bb6b3
11 changed files with 1208 additions and 9 deletions
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@@ -93,6 +93,7 @@
 #include "transformations/insert_copy_layer.hpp"
 #include "transformations/split_eltwise.hpp"
 #include "transformations/markup_fusable_transpose.hpp"
+#include "transformations/insert_identity_layer.hpp"

 #include <ngraph/opsets/opset7.hpp>

@@ -740,6 +741,16 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
            input is doing
        */
        manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
+        /* The following transformations perform insertion of Identity layer in 3 steps:
+           1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened
+           2. Insert Identity after operation which have consumers marked with precision change
+           3. Cleanup appropriate attribute from rt_info
+        */
+        manager.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(config.gnaFlags.input_low_precision);
+        manager.register_pass<ov::intel_gna::pass::InsertIdentity>();
+        manager.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
+        // Breaks fusing of layers before result
+        manager.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
        if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
            manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
            manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
@@ -843,6 +854,8 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        passes->registerPass<InsertConcatAligningFilterPass>();
        passes->registerPass<ReorderConcatInputsPass>();
        passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
+        // Keep legacy inserting of Identity layer here
+        // because concat and split aliging passes are not moved to ngraph yet
        passes->registerPass<InsertIdentityLayerPass>();
        passes->registerPass<BreakFusingOfOutputLayersPass>();
        passes->registerPass<InsertDiagonalLayerPass>();
@@ -999,7 +1012,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        }
        portId++;
    }
-
    // TODO: how active list will work in multioutput case
    // make room for active list
    gnamem->getQueue(REGION_OUTPUTS)
--- a/src/plugins/intel_gna/src/layers/gna_layer_type.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_layer_type.hpp
@@ -50,6 +50,7 @@ enum class LayerType {
    FakeQuantize,
    Gemm,
    Pwl,
+    Identity,
    NO_TYPE
 };

@@ -89,6 +90,7 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
        { "SoftSign", LayerType::SoftSign },
        { "FakeQuantize", LayerType::FakeQuantize },
        { "Pwl", LayerType::Pwl },
+        { "Identity", LayerType::Identity },
        {"Gemm", LayerType::Gemm},
 };

--- a/src/plugins/intel_gna/src/ops/identity.cpp
+++ b/src/plugins/intel_gna/src/ops/identity.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "identity.hpp"
+
+#include <ngraph/validation_util.hpp>
+
+#include "ngraph/attribute_visitor.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace op {
+
+Identity::Identity(const ngraph::Output<ngraph::Node>& arg) : Op({arg}) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> Identity::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<Identity>(new_args.at(0));
+}
+
+void Identity::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+bool Identity::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    return true;
+}
+
+} // namespace op
+} // namespace intel_gna
+} // namespace ov
--- a/src/plugins/intel_gna/src/ops/identity.hpp
+++ b/src/plugins/intel_gna/src/ops/identity.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/op/op.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace op {
+/// \brief GNA specific Identity layer operation.
+///
+class Identity : public ngraph::op::Op {
+public:
+    OPENVINO_OP("Identity", "intel_gna", ov::op::Op);
+
+    Identity() = default;
+    /// \brief Constructs a Identity operation.
+    ///
+    /// \param [in] arg Input tensor
+    Identity(const ngraph::Output<ngraph::Node>& arg);
+
+    void validate_and_infer_types() override;
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+};
+} // namespace op
+} // namespace intel_gna
+} // namespace ov
--- a/src/plugins/intel_gna/src/ops/util/util.hpp
+++ b/src/plugins/intel_gna/src/ops/util/util.hpp
@@ -3,16 +3,30 @@
 //

 #pragma once
+#include <vector>
+#include <memory>
+
+#include <transformations/utils/utils.hpp>
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/opsets/opset7.hpp>

 #include <legacy/ngraph_ops/crop_ie.hpp>
-#include "gna_lib_ver_selector.hpp"
+#include <legacy/ngraph_ops/convolution_ie.hpp>
+#include <legacy/ngraph_ops/eltwise.hpp>
+#include <legacy/ngraph_ops/fully_connected.hpp>
+#include <legacy/ngraph_ops/scaleshift.hpp>
+#include <legacy/ngraph_ops/power.hpp>
+#include <legacy/ngraph_ops/relu_ie.hpp>
+
 #include "backend/gna_limitations.hpp"
 #include "layers/gna_permute.hpp"
 #include <transformations/utils/utils.hpp>
 #include <transformations/rt_info/gna_transpose_fusable.hpp>
-#include <ngraph/opsets/opset8.hpp>
-#include <vector>
-#include <memory>
+
+#include "ops/copy.hpp"
+#include "ops/identity.hpp"
+#include "ops/pwl.hpp"

 namespace ov {
 namespace intel_gna {
@@ -139,6 +153,127 @@ inline bool is_one_dim_shapes(const ov::Shape& in_dims, const ov::Shape& out_dim
    return is_one_dim_shape(in_dims) && is_one_dim_shape(out_dims);
 }

+static bool is_power_activation(const ov::Node* node) noexcept {
+    if (auto power_op = dynamic_cast<const ngraph::opset9::Power*>(node)) {
+        auto const_node = std::dynamic_pointer_cast<ngraph::opset9::Constant>(power_op->get_input_node_shared_ptr(1));
+        if (!const_node)
+            return false;
+        float value;
+        if (!ngraph::op::util::get_single_value(const_node, value)) {
+            return true;
+        }
+        return (1.0f != value);
+    } else if (auto power_op = std::dynamic_pointer_cast<ngraph::op::PowerIE>(node)) {
+        return (1.0f != power_op->power);
+    }
+    return false;
+}
+
+static bool is_power_activation(const std::shared_ptr<ngraph::Node>& node) noexcept {
+    return is_power_activation(node.get());
+}
+
+static bool is_eltwise_mul(const ngraph::Output<ngraph::Node>& node) {
+    auto eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node.get_node_shared_ptr());
+    if (!eltwise) return false;
+    return eltwise->eltwise_type == ELTWISE_TYPE::Prod;
+}
+
+static bool is_eltwise_add(const ngraph::Output<ngraph::Node>& node) {
+    auto eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node.get_node_shared_ptr());
+    if (!eltwise) return false;
+    return eltwise->eltwise_type == ELTWISE_TYPE::Sum;
+}
+
+static bool is_pooling(const ngraph::Output<ngraph::Node>& node) {
+    return (std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node.get_node_shared_ptr()) != nullptr);
+}
+
+template <typename T>
+static bool is_Tbit_fq(const std::shared_ptr<ngraph::Node>& node) {
+    auto fq_node = std::dynamic_pointer_cast<ngraph::opset9::FakeQuantize>(node);
+    if (!fq_node)
+        return false;
+    auto levels = fq_node->get_levels();
+    return std::numeric_limits<T>::max() == levels;
+}
+
+static bool is_32bit_fq(const std::shared_ptr<ngraph::Node>& node) {
+    return is_Tbit_fq<uint32_t>(node);
+}
+
+static bool is_16bit_fq(const std::shared_ptr<ngraph::Node>& node) {
+    return is_Tbit_fq<uint16_t>(node);
+}
+
+static bool is_8bit_fq(const std::shared_ptr<ngraph::Node>& node) {
+    return is_Tbit_fq<uint8_t>(node);
+}
+
+static bool is_activation(const ov::Node* node) noexcept {
+    return ((dynamic_cast<const ngraph::opset9::Clamp*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Sigmoid*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Relu*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::op::ReLUIE*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Tanh*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::PRelu*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Exp*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Log*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Sign*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::Abs*>(node) != nullptr) ||
+            (dynamic_cast<const ngraph::opset9::SoftSign*>(node) != nullptr) ||
+            is_power_activation(node) ||
+            (dynamic_cast<const ngraph::opset9::FakeQuantize*>(node) != nullptr) ||
+            (dynamic_cast<const ov::intel_gna::op::Pwl*>(node) != nullptr) ||
+            (dynamic_cast<const ov::intel_gna::op::Identity*>(node) != nullptr));
+}
+
+static bool is_activation(const std::shared_ptr<ngraph::Node>& node) noexcept {
+    return is_activation(node.get());
+}
+
+static bool is_gna_precision_agnostic(std::shared_ptr<ngraph::Node> node) {
+    return ((std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Slice>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Concat>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Reshape>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Squeeze>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Unsqueeze>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Transpose>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ov::intel_gna::op::Copy>(node) != nullptr) ||
+            ((std::dynamic_pointer_cast<ngraph::op::CropIE>(node) != nullptr) && !is_crop_affined(node)));
+}
+
+static bool has_8bit_or_16_bit_output(const std::shared_ptr<ngraph::Node>& node) noexcept {
+    return ((ngraph::op::is_parameter(node)) ||
+            (ngraph::op::is_constant(node)) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::ReadValue>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Assign>(node) != nullptr) ||
+            (is_activation(node) && (!is_32bit_fq(node))) ||
+            (is_8bit_fq(node) || (is_16bit_fq(node))) ||
+            is_gna_precision_agnostic(node));
+}
+
+static bool has_32bit_output(const std::shared_ptr<ngraph::Node>& node) {
+    return ((std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Convolution>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Add>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::opset9::Multiply>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::op::Eltwise>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
+            is_pooling(node) ||
+            ((std::dynamic_pointer_cast<ngraph::opset9::Power>(node) != nullptr) && !is_power_activation(node)) ||
+            ((std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) && !is_power_activation(node)) ||
+            is_crop_affined(node) ||
+            is_32bit_fq(node));
+}
+
+inline bool has_32bit_input(const std::shared_ptr<ngraph::Node>& node) {
+    return is_activation(node) || is_pooling(node);
+}
 } // namespace ngraph_util
 } // namespace intel_gna
 } // namespace ov
--- a/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp
+++ b/src/plugins/intel_gna/src/transformations/insert_identity_layer.cpp
@@ -0,0 +1,228 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <openvino/cc/ngraph/itt.hpp>
+
+#include "transformations/insert_identity_layer.hpp"
+#include "transformations/rt_info/gna_precision_change_flag.hpp"
+
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ops/identity.hpp>
+#include <legacy/ngraph_ops/eltwise.hpp>
+#include "ops/util/util.hpp"
+
+using namespace ov::intel_gna::pass;
+using namespace ov::intel_gna::rt_info;
+using namespace ov::intel_gna::ngraph_util;
+
+namespace {
+void mark_for_identity_insertion(std::shared_ptr<ngraph::Node> node,
+                                 size_t input_index) {
+    gnalog() << "Mark input as candidate for identity insertion " << input_index << ":" << node->get_friendly_name() << std::endl;
+    auto input = node->input(input_index);
+    add_precision_change_flag(input, ov::element::i32, ov::element::i16);
+}
+
+std::shared_ptr<ov::intel_gna::op::Identity> create_indentity(std::shared_ptr<ngraph::Node>& input_op) {
+    auto identity_op = std::make_shared<ov::intel_gna::op::Identity>(input_op);
+    // Keep name of previous operation
+    identity_op->set_friendly_name(input_op->get_friendly_name());
+    input_op->set_friendly_name(input_op->get_friendly_name() + "/previous");
+    ngraph::copy_runtime_info(input_op, identity_op);
+    return identity_op;
+}
+
+void insert_identity_layer_after(std::shared_ptr<ngraph::Node>& input_op,
+                                 size_t index) {
+    NGRAPH_CHECK(input_op);
+
+    gnalog() << "Insert identity layer after " << input_op->get_friendly_name() <<
+        " (" << input_op->get_type_name() << "):"<< index << std::endl;
+
+    auto consumers = input_op->output(index).get_target_inputs();
+    auto identity_op = create_indentity(input_op);
+    for (auto& consumer : consumers) {
+        consumer.replace_source_output(identity_op);
+    }
+}
+
+void insert_identity_layer_between(std::shared_ptr<ngraph::Node>& input_op,
+                                   std::shared_ptr<ngraph::Node>& output_op,
+                                   size_t index) {
+    NGRAPH_CHECK(input_op);
+    NGRAPH_CHECK(output_op);
+
+    gnalog() << "Insert identity layer after " << input_op->get_friendly_name() <<
+        " (" << input_op->get_type_name() << ") and before " << index << ":" <<
+         output_op->get_friendly_name() << " (" << output_op->get_type_name() << ")" << std::endl;
+
+    auto identity_op = create_indentity(input_op);
+    output_op->input(index).replace_source_output(identity_op);
+}
+
+// forward declaration
+bool walk_through_the_outputs(std::shared_ptr<ov::Node>& prev_node,
+                              size_t& prev_node_output_index,
+                              const std::shared_ptr<ov::Node>& node,
+                              bool first_iteration = false);
+
+bool process_next_node(std::shared_ptr<ov::Node>& prev_node,
+                       size_t& prev_node_output_index,
+                       const std::shared_ptr<ov::Node>& node,
+                       const size_t input_index) {
+    // Check whether node is going to be skipped
+    bool to_be_skipped = (is_gna_precision_agnostic(node) && !std::dynamic_pointer_cast<ngraph::opset9::Concat>(node)) ||
+        is_pooling(node);
+    if (to_be_skipped) {
+        // if it is pooling, update previous node, since activation
+        // should be inserted after the pooling
+        if (is_pooling(node)) {
+            prev_node = node;
+            // supported pooling from opset7 has 1 output port
+            prev_node_output_index = 0;
+        }
+        // walk over all outputs of this node
+        return walk_through_the_outputs(prev_node, prev_node_output_index, node);
+    }
+    // Don't skip this node, check whether precision is changed
+    if (is_precision_changed(node->input(input_index))) {
+        // if at least one target input requires Identity insertion,
+        // process the appropriate output port
+        // if there are other comsumers with i32 input
+        // diagonal layer will be insrted anyway before them
+        insert_identity_layer_after(prev_node, prev_node_output_index);
+        // graph modified
+        return true;
+    }
+    return false;
+}
+
+bool walk_through_the_outputs(std::shared_ptr<ov::Node>& prev_node,
+                              size_t& prev_node_output_index,
+                              const std::shared_ptr<ov::Node>& node,
+                              bool first_iteration) {
+    bool is_identity_inserted = false;
+    // walk over all outputs
+    for (size_t i = 0; i < node->get_output_size(); i++) {
+        // check all target inputs node of this output
+        for (auto&& input : node->output(i).get_target_inputs()) {
+            // if it is first iteration track output port id
+            // because prev_node is functional
+            if (first_iteration)
+                prev_node_output_index = i;
+            // recursively check next node, skipping precision agnostic
+            if (process_next_node(prev_node, prev_node_output_index, input.get_node()->shared_from_this(), input.get_index())) {
+                // graph is modified
+                is_identity_inserted = true;
+                // go to the next output, other target inputs are not interesting anymore
+                break;
+            }
+        }
+    }
+    return is_identity_inserted;
+}
+} // namespace
+
+bool MarkIdentityCandidates::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_FUNCTION_SCOPE(MarkIdentityCandidates);
+    for (auto& node : m->get_ordered_ops()) {
+        auto check_previos_node_and_mark = [&node](){
+            for (size_t i = 0; i < node->get_input_size(); i++) {
+                auto prev_node = node->get_input_node_shared_ptr(i);
+                prev_node = get_prev_node_skipping_certain(prev_node, is_gna_precision_agnostic);
+                if (has_32bit_output(prev_node) || is_pooling(prev_node)) {
+                    mark_for_identity_insertion(node, i);
+                }
+            }
+        };
+        if (std::dynamic_pointer_cast<ngraph::op::Eltwise>(node)) {
+            auto input0_node = node->get_input_node_shared_ptr(0);
+            auto input1_node = node->get_input_node_shared_ptr(1);
+            auto func_input0_node = get_prev_node_skipping_certain(input0_node, is_gna_precision_agnostic);
+            auto func_input1_node = get_prev_node_skipping_certain(input1_node, is_gna_precision_agnostic);
+            if (is_eltwise_add(node) && !is_low_precision_input) {
+                if (!has_32bit_output(func_input0_node) || !has_32bit_output(func_input1_node))
+                    continue;
+
+                mark_for_identity_insertion(node, 0);
+            } else if (is_eltwise_mul(node) || (is_eltwise_add(node) && is_low_precision_input)) {
+                if (has_8bit_or_16_bit_output(func_input0_node) && has_8bit_or_16_bit_output(func_input1_node))
+                    continue;
+
+                if (has_32bit_output(func_input0_node)) {
+                    mark_for_identity_insertion(node, 0);
+                }
+
+                if (has_32bit_output(func_input1_node)) {
+                    mark_for_identity_insertion(node, 1);
+                }
+            }
+        } else if (std::dynamic_pointer_cast<ngraph::opset9::Concat>(node) != nullptr) {
+            check_previos_node_and_mark();
+        } else {
+            if (is_gna_precision_agnostic(node) || has_32bit_input(node) ||
+                ngraph::op::is_parameter(node) || ngraph::op::is_constant(node) ||
+                ngraph::op::is_output(node) || ngraph::op::is_sink(node)) {
+                continue;
+            }
+            check_previos_node_and_mark();
+        }
+    }
+    return false;
+}
+
+BreakFusingOfOutputLayers::BreakFusingOfOutputLayers() {
+    MATCHER_SCOPE(BreakFusingOfOutputLayers);
+
+    auto result_op = ngraph::pattern::wrap_type<ngraph::opset9::Result>({ngraph::pattern::any_input()});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto result_node = pattern_map.at(result_op).get_node_shared_ptr();
+        auto input_node = result_node->get_input_node_shared_ptr(0);
+
+        for (size_t i = 0; i < input_node->get_output_size(); i++) {
+            for (auto&& input : input_node->output(i).get_target_inputs()) {
+                if (!is_activation(input.get_node())) {
+                    continue;
+                }
+                insert_identity_layer_between(input_node, result_node, 0);
+                return true;
+            }
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(result_op, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+bool InsertIdentity::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_FUNCTION_SCOPE(InsertIdentity);
+    bool is_graph_modifed = false;
+
+    for (auto& node : m->get_ordered_ops()) {
+        // if node has 8 bit or 16 bit output already or Result or State, it is not our case, skip it
+        if (has_8bit_or_16_bit_output(node) || ngraph::op::is_output(node) || ngraph::op::is_sink(node))
+            continue;
+
+        // walk through the all outputs
+        std::shared_ptr<ov::Node> prev_node = node;
+        size_t prev_node_output_index = 0;
+        is_graph_modifed |= walk_through_the_outputs(prev_node, prev_node_output_index, node, true);
+    }
+    return is_graph_modifed;
+}
+
+bool IdentityCandidatesCleanup::run_on_model(const std::shared_ptr<ov::Model>& f) {
+    RUN_ON_FUNCTION_SCOPE(IdentityCandidatesCleanup);
+    for (auto& node : f->get_ordered_ops()) {
+        for (auto& input : node->inputs()) {
+            remove_precision_change_flag(input);
+        }
+    }
+    return false;
+}
--- a/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp
+++ b/src/plugins/intel_gna/src/transformations/insert_identity_layer.hpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+
+namespace ov {
+namespace intel_gna {
+namespace pass {
+/**
+ * Group of transformations which insert Identity layer in the following cases:
+ * in case of eltwise sum in 16-bit input precision, one of inputs is 4 bytes, the other is 2 bytes
+ * in case of eltwise mul in 16-bit input precision, both inputs are 2 bytes
+ * in case of eltwise sum in low (8-bit) input precision, both inputs are 1 byte
+ * in case of eltwise mul in low (8-bit) input precision, both inputs are 1 byte
+ * for e sum if we have 4-4 inputs we will handle that by inserting identity activation -- handling here
+ * for e sum if we have 4-2 - OK
+ * for e sum if we have 2-2 inputs we need to insert diagonal
+ * for e sum if we have 1-1 inputs in low precision mode - OK
+ * for e mul if we have 2-2 - OK
+ * for e mul if we have 1-1 in low precision mode - OK
+ * for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights -- handling here
+ * for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights -- handling here
+*/
+
+/**
+ * @brief Transformation is looking for nodes before which Identity should be inserted and mark them with appropriate rt attribute
+ */
+class MarkIdentityCandidates : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("MarkIdentityCandidates", "0");
+    MarkIdentityCandidates(bool is_low_precision_input) : is_low_precision_input(is_low_precision_input) {}
+    bool run_on_model(const std::shared_ptr<ov::Model>& f) override;
+private:
+    bool is_low_precision_input;
+};
+
+
+/**
+ * @brief Transformation inserts Identity layer based on rt attribute
+ */
+class InsertIdentity : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("InsertIdentity", "0");
+    bool run_on_model(const std::shared_ptr<ov::Model>& f) override;
+};
+
+/**
+* @brief In cases that network output layer is connected to only one layer which is activation additional identity is inserted
+* so the operation is not fused with the activation allowing to get the results from said layer
+*/
+class BreakFusingOfOutputLayers : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("BreakFusingOfOutputLayers", "0");
+    BreakFusingOfOutputLayers();
+};
+
+/**
+ * @brief IdentityCandidates removes attribute mark for identity insertion
+ */
+class IdentityCandidatesCleanup : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("IdentityCandidatesCleanup", "0");
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+};
+
+} // namespace pass
+} // namespace intel_gna
+} // namespace ov
--- a/src/plugins/intel_gna/src/transformations/reorder_activation_and_pooling.cpp
+++ b/src/plugins/intel_gna/src/transformations/reorder_activation_and_pooling.cpp
@@ -40,10 +40,6 @@ ReorderActivationAndPooling::ReorderActivationAndPooling() {
        auto pool = std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(pool_node);
        IE_ASSERT(pool != nullptr);
        auto kernel_shape = pool->get_kernel();
-        if (kernel_shape.size() > 1 && kernel_shape[0] > 1 && kernel_shape[1] > 1) {
-            return false;
-        }
-
        auto act = pool_node->input_value(0).get_node_shared_ptr();
        IE_ASSERT(act != nullptr);

--- a/src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.cpp
+++ b/src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gna_precision_change_flag.hpp"
+
+void ov::intel_gna::rt_info::add_precision_change_flag(ov::Input<Node>& node,
+    const ov::element::Type& in, const ov::element::Type& out) {
+    RTMap& rt_info = node.get_rt_info();
+    rt_info[GNAPrecisionChangeFlag::get_type_info_static()] = GNAPrecisionChangeFlag{in, out};
+}
+
+void ov::intel_gna::rt_info::remove_precision_change_flag(ov::Input<Node>& node) {
+    RTMap& rt_info = node.get_rt_info();
+    auto it = rt_info.find(GNAPrecisionChangeFlag::get_type_info_static());
+    if (it != rt_info.end()) {
+        rt_info.erase(it);
+    }
+}
+
+bool ov::intel_gna::rt_info::is_precision_changed(const ov::Input<Node>& node) {
+    const RTMap& rt_info = node.get_rt_info();
+    if (rt_info.count(GNAPrecisionChangeFlag::get_type_info_static()) > 0) {
+        auto flag = rt_info.at(GNAPrecisionChangeFlag::get_type_info_static()).as<GNAPrecisionChangeFlag>();
+        return flag.is_changed();
+    }
+    return false;
+}
--- a/src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.hpp
+++ b/src/plugins/intel_gna/src/transformations/rt_info/gna_precision_change_flag.hpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/runtime_attribute.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace rt_info {
+
+void add_precision_change_flag(ov::Input<Node>& node, const ov::element::Type& in, const ov::element::Type& out);
+
+void remove_precision_change_flag(ov::Input<Node>& node);
+
+bool is_precision_changed(const ov::Input<Node>& node);
+
+/**
+ * @ingroup ie_runtime_attr_api
+ * @brief GNAPrecisionChangeFlag class represents runtime info attribute that marks that precision
+ * is have to be changed before operation
+ */
+class GNAPrecisionChangeFlag : public RuntimeAttribute {
+public:
+    OPENVINO_RTTI("gna_precision_change_flag", "0");
+
+    GNAPrecisionChangeFlag(const ov::element::Type& in, const ov::element::Type& out) : in(in), out(out) {}
+
+    bool is_copyable() const override {
+        return false;
+    }
+
+    bool is_changed() {
+        return in != out;
+    }
+private:
+    ov::element::Type in;
+    ov::element::Type out;
+};
+} // namespace rt_info
+} // namespace intel_gna
+} // namespace ov
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_identity_layer.cpp
@@ -0,0 +1,618 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <transformations/init_node_info.hpp>
+#include <common_test_utils/ngraph_test_utils.hpp>
+#include <transformations/utils/utils.hpp>
+#include <legacy/ngraph_ops/eltwise.hpp>
+
+#include "ops/identity.hpp"
+#include "transformations/insert_identity_layer.hpp"
+#include "transformations/rt_info/gna_precision_change_flag.hpp"
+
+namespace testing {
+
+class InsertIdentityLayerTest: public CommonTestUtils::TestsCommon {
+public:
+    virtual void Validate();
+    virtual void Run();
+public:
+    std::shared_ptr<ngraph::Function> m_func, m_ref_func;
+    ngraph::Shape m_input_shape{10};
+    bool m_low_precision = false;
+};
+
+void InsertIdentityLayerTest::Validate() {
+    ngraph::pass::Manager m;
+    m.register_pass<ngraph::pass::InitNodeInfo>();
+    m.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(m_low_precision);
+    m.register_pass<ov::intel_gna::pass::InsertIdentity>();
+    m.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
+    m.run_passes(m_func);
+    ASSERT_NO_THROW(check_rt_info(m_func));
+
+    auto result = compare_functions(m_func, m_ref_func);
+    ASSERT_TRUE(result.first);
+
+    // Cleanup rt info and check
+    m.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
+    m.run_passes(m_func);
+    for (auto& node : m_func->get_ordered_ops()) {
+        for (auto& input : node->inputs()) {
+            const ov::RTMap& rt_info = input.get_rt_info();
+            ASSERT_EQ(rt_info.count(ov::intel_gna::rt_info::GNAPrecisionChangeFlag::get_type_info_static()), 0);
+        }
+    }
+}
+
+void InsertIdentityLayerTest::Run() {
+    SetUp();
+    Validate();
+}
+
+/******************************************************* Concat layer tests *******************************************************/
+
+typedef std::tuple<
+        size_t,    // Concat axis
+        size_t     // input number
+> InsertIdentityConcatTestParams;
+
+class InsertIdentityLayerConcatTest: public InsertIdentityLayerTest,
+                                     public ::testing::WithParamInterface<InsertIdentityConcatTestParams> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<InsertIdentityConcatTestParams>& obj) {
+        size_t axis, inputs_num;
+        std::tie(axis, inputs_num) = obj.param;
+
+        std::ostringstream result;
+        result << "inputsNum=" << inputs_num << "_";
+        result << "axis=" << axis;
+
+        return result.str();
+    }
+    void SetUp() override {
+        size_t axis, inputs_num;
+        std::tie(axis, inputs_num) = this->GetParam();
+
+        InsertIdentityLayerTest::SetUp();
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            ngraph::OutputVector concat_inputs = {add};
+            for (size_t i = 1; i < inputs_num; ++i) {
+                auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {i});
+                auto mul = std::make_shared<ngraph::opset9::Multiply>(add, const_mul);
+                concat_inputs.push_back(mul);
+            }
+            auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, axis);
+            auto result = std::make_shared<ngraph::opset9::Result>(concat);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            auto identity = std::make_shared<ov::intel_gna::op::Identity>(add);
+            ngraph::OutputVector concat_inputs = {identity};
+            for (size_t i = 1; i < inputs_num; ++i) {
+                auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {i});
+                auto mul = std::make_shared<ngraph::opset9::Multiply>(identity, const_mul);
+                auto identity_mul = std::make_shared<ov::intel_gna::op::Identity>(mul);
+                concat_inputs.push_back(identity_mul);
+            }
+            auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, axis);
+            auto result = std::make_shared<ngraph::opset9::Result>(concat);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                            ngraph::ParameterVector{params});
+        }
+    }
+};
+
+const size_t axis = 0;
+const std::vector<size_t> inputCounts = {1, 8};
+
+TEST_P(InsertIdentityLayerConcatTest, CompareWithRefs) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, InsertIdentityLayerConcatTest,
+                         ::testing::Combine(
+                                ::testing::Values(axis),
+                                ::testing::ValuesIn(inputCounts)),
+                         InsertIdentityLayerConcatTest::getTestCaseName);
+
+/******************************************************* Split layer tests *******************************************************/
+
+class InsertIdentityLayerSplitTest: public InsertIdentityLayerTest {
+public:
+    void SetUp() override {
+        InsertIdentityLayerTest::SetUp();
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            auto axis_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0});
+            auto split = std::make_shared<ngraph::opset9::Split>(add, axis_const, 2);
+            auto result1 = std::make_shared<ngraph::opset9::Result>(split->output(0));
+            auto const_reshape = ngraph::opset9::Constant::create(ngraph::element::i64, {2}, {1, 5});
+            auto reshape = std::make_shared<ngraph::opset9::Reshape>(split->output(1), const_reshape, false);
+            auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, {1, 5}, {1});
+            auto mul = std::make_shared<ngraph::opset9::Multiply>(reshape, const_mul);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(mul);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            auto identity = std::make_shared<ov::intel_gna::op::Identity>(add);
+            auto axis_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0});
+            auto split = std::make_shared<ngraph::opset9::Split>(identity, axis_const, 2);
+            auto result1 = std::make_shared<ngraph::opset9::Result>(split->output(0));
+            auto const_reshape = ngraph::opset9::Constant::create(ngraph::element::i64, {2}, {1, 5});
+            auto reshape = std::make_shared<ngraph::opset9::Reshape>(split->output(1), const_reshape, false);
+            auto const_mul = ngraph::opset9::Constant::create(ngraph::element::f32, {1, 5}, {1});
+            auto mul = std::make_shared<ngraph::opset9::Multiply>(reshape, const_mul);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(mul);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                            ngraph::ParameterVector{params});
+        }
+    }
+};
+
+TEST_F(InsertIdentityLayerSplitTest, CompareWithRefs) {
+    Run();
+}
+
+/******************************************************* Eltwise layer tests *******************************************************/
+
+typedef std::tuple<
+        ELTWISE_TYPE,   // eltwise type
+        bool,           // use low precision input
+        bool            // both 32bit inputs
+> InsertIdentityEltwiseTestParams;
+
+class InsertIdentityLayerEltwiseTest: public InsertIdentityLayerTest,
+                                      public ::testing::WithParamInterface<InsertIdentityEltwiseTestParams> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<InsertIdentityEltwiseTestParams>& obj) {
+        ELTWISE_TYPE type;
+        bool low_precision, both_inputs_32bits;
+        std::tie(type, low_precision, both_inputs_32bits) = obj.param;
+
+        std::ostringstream result;
+        result << "type=";
+        switch (type) {
+        case ELTWISE_TYPE::Sum:
+            result << "sum";
+            break;
+        case ELTWISE_TYPE::Prod:
+            result << "prod";
+            break;
+        default:
+            break;
+        }
+        result << "_low_precision=" << low_precision;
+        result << "_both_inputs_32bits=" << both_inputs_32bits;
+
+        return result.str();
+    }
+    void SetUp() override {
+        ELTWISE_TYPE type;
+        bool both_inputs_32bits;
+        std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam();
+
+        InsertIdentityLayerTest::SetUp();
+        {
+            ngraph::ParameterVector params;
+            auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            params.push_back(input1);
+            auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto eltwise1 = std::make_shared<ngraph::op::Eltwise>(input1, const_input1, type);
+            std::shared_ptr<ov::Node> second_input;
+
+            if (both_inputs_32bits) {
+                auto input2 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+                params.push_back(input2);
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto eltwise2 = std::make_shared<ngraph::op::Eltwise>(input2, const_input2, type);
+                second_input = eltwise2;
+            } else {
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                second_input = const_input2;
+            }
+
+            auto eltwise3 = std::make_shared<ngraph::op::Eltwise>(eltwise1, second_input, type);
+
+            auto result = std::make_shared<ngraph::opset9::Result>(eltwise3);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            ngraph::ParameterVector params;
+            auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            params.push_back(input1);
+            auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto eltwise1 = std::make_shared<ngraph::op::Eltwise>(input1, const_input1, type);
+            std::shared_ptr<ov::Node> first_input, second_input;
+            first_input = eltwise1;
+
+            if (both_inputs_32bits) {
+                auto input2 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+                params.push_back(input2);
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto eltwise2 = std::make_shared<ngraph::op::Eltwise>(input2, const_input2, type);
+                second_input = eltwise2;
+            } else {
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                second_input = const_input2;
+            }
+
+            if (type == ELTWISE_TYPE::Sum && !m_low_precision && both_inputs_32bits) {
+                auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                first_input = identity;
+            } else if (type == ELTWISE_TYPE::Prod || m_low_precision) {
+                auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                first_input = identity;
+                if (both_inputs_32bits) {
+                    auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                    second_input = identity;
+                }
+            }
+
+            auto eltwise3 = std::make_shared<ngraph::op::Eltwise>(first_input, second_input, type);
+
+            auto result = std::make_shared<ngraph::opset9::Result>(eltwise3);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                            ngraph::ParameterVector{params});
+        }
+    }
+};
+
+TEST_P(InsertIdentityLayerEltwiseTest, CompareWithRefs) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, InsertIdentityLayerEltwiseTest,
+                         ::testing::Combine(
+                                ::testing::ValuesIn({ELTWISE_TYPE::Sum, ELTWISE_TYPE::Prod}),
+                                ::testing::ValuesIn({true, false}),
+                                ::testing::ValuesIn({true, false})),
+                         InsertIdentityLayerEltwiseTest::getTestCaseName);
+
+/******************************************* Eltwise layer tests (Multiple outputs) *************************************************/
+
+class InsertIdentityLayerEltwiseMultipleOutputTest: public InsertIdentityLayerEltwiseTest {
+public:
+    void SetUp() override {
+        ELTWISE_TYPE type;
+        bool both_inputs_32bits;
+        std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam();
+
+        InsertIdentityLayerTest::SetUp();
+        {
+            ngraph::ParameterVector params;
+            auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            params.push_back(input1);
+            auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto eltwise1 = std::make_shared<ngraph::op::Eltwise>(input1, const_input1, type);
+            std::shared_ptr<ov::Node> second_input;
+
+            if (both_inputs_32bits) {
+                auto input2 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+                params.push_back(input2);
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto eltwise2 = std::make_shared<ngraph::op::Eltwise>(input2, const_input2, type);
+                second_input = eltwise2;
+            } else {
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                second_input = const_input2;
+            }
+            auto relu = std::make_shared<ngraph::opset9::Relu>(eltwise1);
+            auto eltwise3 = std::make_shared<ngraph::op::Eltwise>(eltwise1, second_input, type);
+
+            auto result1 = std::make_shared<ngraph::opset9::Result>(relu);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(eltwise3);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            ngraph::ParameterVector params;
+            auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            params.push_back(input1);
+            auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto eltwise1 = std::make_shared<ngraph::op::Eltwise>(input1, const_input1, type);
+            std::shared_ptr<ov::Node> first_input, second_input;
+            first_input = eltwise1;
+
+            if (both_inputs_32bits) {
+                auto input2 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+                params.push_back(input2);
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto eltwise2 = std::make_shared<ngraph::op::Eltwise>(input2, const_input2, type);
+                second_input = eltwise2;
+            } else {
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                second_input = const_input2;
+            }
+
+            if (type == ELTWISE_TYPE::Sum && !m_low_precision && both_inputs_32bits) {
+                auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                first_input = identity;
+            } else if (type == ELTWISE_TYPE::Prod || m_low_precision) {
+                auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                first_input = identity;
+                if (both_inputs_32bits) {
+                    auto identity = std::make_shared<ov::intel_gna::op::Identity>(eltwise1);
+                    second_input = identity;
+                }
+            }
+            auto relu = std::make_shared<ngraph::opset9::Relu>(first_input);
+            auto eltwise3 = std::make_shared<ngraph::op::Eltwise>(first_input, second_input, type);
+
+            auto result1 = std::make_shared<ngraph::opset9::Result>(relu);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(eltwise3);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                            ngraph::ParameterVector{params});
+        }
+    }
+};
+
+TEST_P(InsertIdentityLayerEltwiseMultipleOutputTest, CompareWithRefs) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, InsertIdentityLayerEltwiseMultipleOutputTest,
+                         ::testing::Combine(
+                                ::testing::ValuesIn({ELTWISE_TYPE::Sum, ELTWISE_TYPE::Prod}),
+                                ::testing::ValuesIn({true, false}),
+                                ::testing::ValuesIn({true, false})),
+                         InsertIdentityLayerEltwiseMultipleOutputTest::getTestCaseName);
+
+
+/*************************************************** Eltwise with FQ layer tests ****************************************************/
+
+class InsertIdentityLayerEltwiseFQTest: public InsertIdentityLayerEltwiseTest {
+public:
+    void SetUp() override {
+        ELTWISE_TYPE type;
+        bool both_inputs_32bits;
+        std::tie(type, m_low_precision, both_inputs_32bits) = this->GetParam();
+
+        InsertIdentityLayerTest::SetUp();
+
+        auto add_fake_quantize = [&](const std::shared_ptr<ngraph::Node>& node) {
+            auto levels = (m_low_precision) ? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
+            auto input_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+            auto input_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
+            auto output_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
+            auto output_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
+            return std::make_shared<ngraph::opset9::FakeQuantize>(node, input_low, input_high, output_low, output_high, levels);
+        };
+
+        {
+            ngraph::ParameterVector params;
+            auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            params.push_back(input1);
+            auto input1_fq = add_fake_quantize(input1);
+            auto const_input1 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto const_input1_fq = add_fake_quantize(const_input1);
+            auto eltwise1 = std::make_shared<ngraph::op::Eltwise>(input1_fq, const_input1_fq, type);
+            auto eltwise1_fq = add_fake_quantize(eltwise1);
+            std::shared_ptr<ov::Node> second_input;
+
+            if (both_inputs_32bits) {
+                auto input2 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+                params.push_back(input2);
+                auto input2_fq = add_fake_quantize(input2);
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto const_input2_fq = add_fake_quantize(const_input2);
+                auto eltwise2 = std::make_shared<ngraph::op::Eltwise>(input2_fq, const_input2_fq, type);
+                auto eltwise2_fq = add_fake_quantize(eltwise2);
+                second_input = eltwise2_fq;
+            } else {
+                auto const_input2 = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+                auto const_input2_fq = add_fake_quantize(const_input2);
+                second_input = const_input2_fq;
+            }
+
+            auto eltwise3 = std::make_shared<ngraph::op::Eltwise>(eltwise1_fq, second_input, type);
+            auto eltwise3_fq = add_fake_quantize(eltwise3);
+
+            auto result = std::make_shared<ngraph::opset9::Result>(eltwise3_fq);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            m_ref_func = m_func->clone();
+        }
+    }
+};
+
+TEST_P(InsertIdentityLayerEltwiseFQTest, CompareWithRefs) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, InsertIdentityLayerEltwiseFQTest,
+                         ::testing::Combine(
+                                ::testing::ValuesIn({ELTWISE_TYPE::Sum, ELTWISE_TYPE::Prod}),
+                                ::testing::ValuesIn({true, false}),
+                                ::testing::ValuesIn({true, false})),
+                         InsertIdentityLayerEltwiseFQTest::getTestCaseName);
+
+/***************************************************** Convolution layer tests *****************************************************/
+
+typedef std::tuple<
+        bool,           // with pooling
+        bool,           // with activation
+        bool            // swap matmul input
+> InsertIdentityConvTestParams;
+
+class InsertIdentityLayerConvMatMulTest: public InsertIdentityLayerTest,
+                                         public ::testing::WithParamInterface<InsertIdentityConvTestParams> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<InsertIdentityConvTestParams>& obj) {
+        bool with_pool, with_act, swap_matmul;
+        std::tie(with_pool, with_act, swap_matmul) = obj.param;
+
+        std::ostringstream result;
+        result << "with_pool=" << with_pool;
+        result << "_with_act=" << with_act;
+        result << "_swap_matmul=" << swap_matmul;
+
+        return result.str();
+    }
+    void SetUp() override {
+        bool with_pool, with_act, swap_matmul;
+        std::tie(with_pool, with_act, swap_matmul) = this->GetParam();
+
+        InsertIdentityLayerTest::SetUp();
+
+        m_input_shape = {1, 3, 1, 64};
+        auto reshape_shape = ngraph::Shape{3, 64};
+
+        {
+            std::shared_ptr<ov::Node> last_node;
+            auto input = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto weights = ngraph::opset9::Constant::create(ngraph::element::f32,
+                                                            ngraph::Shape{3, 3, 1, 2}, {1});
+            auto conv = std::make_shared<ngraph::opset9::Convolution>(input, weights,
+                                                                      ngraph::Strides{1, 1},
+                                                                      ngraph::CoordinateDiff{0, 0},
+                                                                      ngraph::CoordinateDiff{0, 1},
+                                                                      ngraph::Strides{1, 1});
+            last_node = conv;
+            if (with_pool) {
+                auto max_pool = std::make_shared<ngraph::opset7::MaxPool>(last_node,
+                                                                          ngraph::Strides{1, 1},
+                                                                          ngraph::Shape{0, 0},
+                                                                          ngraph::Shape{0, 1},
+                                                                          ngraph::Shape{1, 2});
+                last_node = max_pool;
+            }
+            if (with_act) {
+                auto relu = std::make_shared<ngraph::opset9::Relu>(last_node);
+                last_node = relu;
+            }
+            auto reshape_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape);
+            auto reshape = std::make_shared<ngraph::opset9::Reshape>(last_node, reshape_const, false);
+            auto matmul_const = ngraph::opset9::Constant::create(ngraph::element::f32, {64, 3}, {1.2});
+            auto matmul = swap_matmul ? std::make_shared<ngraph::opset9::MatMul>(matmul_const, reshape) :
+                                        std::make_shared<ngraph::opset9::MatMul>(reshape, matmul_const);
+
+            auto result = std::make_shared<ngraph::opset9::Result>(matmul);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                        ngraph::ParameterVector{input});
+        }
+
+        {
+            std::shared_ptr<ov::Node> last_node;
+            auto input = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto weights = ngraph::opset9::Constant::create(ngraph::element::f32,
+                                                            ngraph::Shape{3, 3, 1, 2}, {1});
+            auto conv = std::make_shared<ngraph::opset9::Convolution>(input, weights,
+                                                                       ngraph::Strides{1, 1},
+                                                                       ngraph::CoordinateDiff{0, 0},
+                                                                       ngraph::CoordinateDiff{0, 1},
+                                                                       ngraph::Strides{1, 1});
+            last_node = conv;
+            if (with_pool) {
+                auto max_pool = std::make_shared<ngraph::opset7::MaxPool>(last_node,
+                                                                          ngraph::Strides{1, 1},
+                                                                          ngraph::Shape{0, 0},
+                                                                          ngraph::Shape{0, 1},
+                                                                          ngraph::Shape{1, 2});
+                last_node = max_pool;
+            }
+            if (with_act) {
+                auto relu = std::make_shared<ngraph::opset9::Relu>(last_node);
+                last_node = relu;
+            } else {
+                auto identity = std::make_shared<ov::intel_gna::op::Identity>(last_node);
+                last_node = identity;
+            }
+            auto reshape_const = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape);
+            auto reshape = std::make_shared<ngraph::opset9::Reshape>(last_node, reshape_const, false);
+            auto matmul_const = ngraph::opset9::Constant::create(ngraph::element::f32, {64, 3}, {1.2});
+            auto matmul = swap_matmul ? std::make_shared<ngraph::opset9::MatMul>(matmul_const, reshape) :
+                                        std::make_shared<ngraph::opset9::MatMul>(reshape, matmul_const);
+
+            auto result = std::make_shared<ngraph::opset9::Result>(matmul);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                            ngraph::ParameterVector{input});
+        }
+    }
+};
+
+TEST_P(InsertIdentityLayerConvMatMulTest, CompareWithRefs) {
+    Run();
+}
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, InsertIdentityLayerConvMatMulTest,
+                         ::testing::Combine(
+                                ::testing::ValuesIn({true, false}),
+                                ::testing::ValuesIn({true, false}),
+                                ::testing::ValuesIn({true, false})),
+                         InsertIdentityLayerConvMatMulTest::getTestCaseName);
+
+/***************************************************** Result layer tests *****************************************************/
+
+class InsertIdentityLayerResultTest: public InsertIdentityLayerTest {
+public:
+    void SetUp() override {
+        InsertIdentityLayerTest::SetUp();
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            auto relu = std::make_shared<ngraph::opset9::Relu>(add);
+            auto result1 = std::make_shared<ngraph::opset9::Result>(add);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(relu);
+            m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                        ngraph::ParameterVector{params});
+        }
+
+        {
+            auto params = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, m_input_shape);
+            auto const_add = ngraph::opset9::Constant::create(ngraph::element::f32, m_input_shape, {1});
+            auto add = std::make_shared<ngraph::opset9::Add>(params, const_add);
+            auto identity = std::make_shared<ov::intel_gna::op::Identity>(add);
+            auto relu = std::make_shared<ngraph::opset9::Relu>(add);
+            auto result1 = std::make_shared<ngraph::opset9::Result>(identity);
+            auto result2 = std::make_shared<ngraph::opset9::Result>(relu);
+            m_ref_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                            ngraph::ParameterVector{params});
+        }
+    }
+    void Validate() override {
+        ngraph::pass::Manager m;
+        m.register_pass<ngraph::pass::InitNodeInfo>();
+        m.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
+        m.run_passes(m_func);
+        ASSERT_NO_THROW(check_rt_info(m_func));
+
+        auto result = compare_functions(m_func, m_ref_func);
+        ASSERT_TRUE(result.first);
+    }
+};
+
+TEST_F(InsertIdentityLayerResultTest, CompareWithRefs) {
+    Run();
+}
+} // namespace testing