[GNA] Support new kaldi irs (#9474)

* Support new kaldi IRs (generated in NHWC layout) * Update tests with activation and fq * Cleanup * Fix reordering FQ and MaxPool and problem with overflow * Fix win * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp Co-authored-by: Elizaveta Lobanova <elizaveta.lobanova@intel.com> * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova <elizaveta.lobanova@intel.com> * Update inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova <elizaveta.lobanova@intel.com> * Code review Co-authored-by: Elizaveta Lobanova <elizaveta.lobanova@intel.com>
2022-01-17 14:16:23 +03:00 · 2022-01-17 14:16:23 +03:00 · 56581dbe2e
commit 56581dbe2e
parent 3c7589184d
16 changed files with 697 additions and 79 deletions
--- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp
@ -0,0 +1,225 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "transformations/unfuse_reshape_and_transpose.hpp"
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/init_node_info.hpp>
+
+namespace testing {
+namespace {
+
+class IActivationFactory {
+public:
+    virtual ~IActivationFactory() = default;
+    virtual std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& in) = 0;
+};
+
+template <typename T>
+class ActivationFactory : public IActivationFactory {
+public:
+    ActivationFactory() = default;
+    std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& operation_before) override {
+        return std::make_shared<T>(operation_before);
+    }
+private:
+    ActivationFactory(const ActivationFactory&) = delete;
+    ActivationFactory& operator=(const ActivationFactory& ) = delete;
+};
+
+template <>
+class ActivationFactory <ngraph::opset8::Clamp> : public IActivationFactory {
+public:
+    ActivationFactory(const double min, const double max) : min_(min), max_(max) {}
+    std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& operation_before) override {
+        return std::make_shared<ngraph::opset8::Clamp>(operation_before, min_, max_);
+    }
+private:
+    ActivationFactory(const ActivationFactory&) = delete;
+    ActivationFactory& operator=(const ActivationFactory& ) = delete;
+private:
+    const double min_;
+    const double max_;
+};
+
+using ActivationFactoryPtr = std::shared_ptr<IActivationFactory>;
+
+template <typename T, typename ... Args>
+ActivationFactoryPtr createActivationFactory(Args&& ... args) {
+    return std::make_shared<ActivationFactory<T>>(std::forward<Args>(args) ...);
+}
+
+static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& conv_input_shape,
+                                                        const ngraph::Shape& conv_filter_shape,
+                                                        bool with_bias,
+                                                        bool with_pool,
+                                                        ActivationFactoryPtr activation_factory,
+                                                        bool with_fq,
+                                                        bool single_reshape_before,
+                                                        bool single_reshape_after) {
+    size_t total_in = std::accumulate(std::begin(conv_input_shape), std::end(conv_input_shape), 1, std::multiplies<int>());
+    auto input = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, total_in});
+    std::shared_ptr<ngraph::Node> last_node, last_const;
+    auto add_fake_quantize = [&](const std::shared_ptr<ngraph::Node>& node) {
+        auto input_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+        auto input_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
+        auto output_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
+        auto output_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
+        return std::make_shared<ngraph::opset8::FakeQuantize>(node, input_low, input_high, output_low, output_high, 11);
+    };
+    if (single_reshape_before) {
+        auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, conv_input_shape);
+        auto reshape_in = std::make_shared<ngraph::opset8::Reshape>(input, reshape_in_const, false);
+        last_node = reshape_in;
+    } else {
+        auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+            ngraph::Shape{conv_input_shape[0], conv_input_shape[2], conv_input_shape[3], conv_input_shape[1]});
+        auto reshape_in = std::make_shared<ngraph::opset8::Reshape>(input, reshape_in_const, false);
+        auto transpose_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
+        auto transpose_in = std::make_shared<ngraph::opset8::Transpose>(reshape_in, transpose_in_const);
+        last_node = transpose_in;
+    }
+    auto conv_weights = ngraph::opset8::Constant::create(ngraph::element::f32, conv_filter_shape, {1});
+    last_const = conv_weights;
+    if (with_fq) {
+        auto conv_input_fq = add_fake_quantize(last_node);
+        last_node = conv_input_fq;
+        auto conv_weights_fq = add_fake_quantize(conv_weights);
+        last_const = conv_weights_fq;
+    }
+    auto conv = std::make_shared<ngraph::opset8::Convolution>(last_node,
+                                                              last_const,
+                                                              ngraph::Strides{1, 1},
+                                                              ngraph::CoordinateDiff{0, 0},
+                                                              ngraph::CoordinateDiff{0, 0},
+                                                              ngraph::Strides{1, 1});
+    last_node = conv;
+    auto conv_output_shape = conv->get_output_shape(0);
+    size_t total_out = std::accumulate(std::begin(conv_output_shape), std::end(conv_output_shape), 1, std::multiplies<int>());
+    if (with_bias) {
+        auto add_const = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{1, conv_output_shape.at(1), 1, 1}, {1});
+        auto add = std::make_shared<ngraph::opset8::Add>(conv, add_const);
+        last_node = add;
+    }
+    if (with_fq) {
+        auto conv_bias_fq = add_fake_quantize(last_node);
+        last_node = conv_bias_fq;
+    }
+    if (with_pool) {
+        auto pool = std::make_shared<ngraph::opset7::MaxPool>(last_node,
+            ngraph::Strides{1, 1}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, ngraph::Shape{1, 1});
+        last_node = pool;
+    }
+    if (activation_factory) {
+        if (with_fq) {
+            auto act_fq_in = add_fake_quantize(last_node);
+            last_node = act_fq_in;
+        }
+        auto act = activation_factory->createNode(last_node);
+        last_node = act;
+        if (with_fq) {
+            auto act_fq_out = add_fake_quantize(last_node);
+            last_node = act_fq_out;
+        }
+    }
+    auto reshape_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, total_out});
+    if (!single_reshape_after) {
+        auto transpose_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
+        auto transpose_out = std::make_shared<ngraph::opset8::Transpose>(last_node, transpose_out_const);
+        last_node = transpose_out;
+    }
+    auto reshape_out = std::make_shared<ngraph::opset8::Reshape>(last_node, reshape_out_const, false);
+
+    auto result = std::make_shared<ngraph::opset8::Result>(reshape_out);
+    auto func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input});
+
+    return func;
+}
+
+typedef std::tuple<
+        std::tuple<ngraph::Shape, ngraph::Shape, bool, bool>,
+        bool,                               // with bias
+        bool,                               // with pooling
+        ActivationFactoryPtr,               // with activation
+        bool                                // with fq
+> UnfuseReshapeAndTransposeParams;
+
+class UnfuseReshapeAndTransposeTestSuiteFixture: public CommonTestUtils::TestsCommon,
+                               public ::testing::WithParamInterface<UnfuseReshapeAndTransposeParams> {
+public:
+    void SetUp() override;
+public:
+    std::shared_ptr<ngraph::Function> function, reference_function;
+};
+
+void UnfuseReshapeAndTransposeTestSuiteFixture::SetUp() {
+    std::tuple<ngraph::Shape, ngraph::Shape, bool, bool> conv_data;
+    bool with_bias;
+    bool with_pool;
+    bool with_fq;
+    ActivationFactoryPtr af;
+    std::tie(conv_data, with_bias, with_pool, af, with_fq) = this->GetParam();
+    ngraph::Shape conv_input_shape;
+    ngraph::Shape conv_filter_shape;
+    bool replace_before;
+    bool replace_after;
+    std::tie(conv_input_shape, conv_filter_shape, replace_before, replace_after) = conv_data;
+    function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, true, true);
+    reference_function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, !replace_before, !replace_after);
+}
+
+void execute_test(std::shared_ptr<ngraph::Function> function,
+                  std::shared_ptr<ngraph::Function> reference_function) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<GNAPluginNS::Unfuse2dto4dReshapeAndTranspose>();
+    manager.register_pass<GNAPluginNS::Unfuse4dto2dReshapeAndTranspose>();
+    manager.run_passes(function);
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
+    const FunctionsComparator::Result result = func_comparator(function, reference_function);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
+TEST_P(UnfuseReshapeAndTransposeTestSuiteFixture, CompareFunctions) {
+    execute_test(function, reference_function);
+}
+
+const std::vector<ActivationFactoryPtr> activationFactories = {
+    nullptr,
+    createActivationFactory<ngraph::opset8::Relu>(),
+    createActivationFactory<ngraph::opset8::Sigmoid>(),
+    createActivationFactory<ngraph::opset8::Tanh>(),
+    createActivationFactory<ngraph::opset8::Abs>(),
+    createActivationFactory<ngraph::opset8::Log>(),
+    createActivationFactory<ngraph::opset8::Exp>(),
+    createActivationFactory<ngraph::opset8::Sign>(),
+    createActivationFactory<ngraph::opset8::Clamp>(0.1, 0.2)
+};
+
+INSTANTIATE_TEST_SUITE_P(UnfuseReshapeAndTransposeTestSuite, UnfuseReshapeAndTransposeTestSuiteFixture,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(
+                                std::vector<std::tuple<ngraph::Shape, ngraph::Shape, bool, bool>>{
+                                            {ngraph::Shape{1, 1, 1, 168}, ngraph::Shape{12, 1, 1, 8}, true, false},
+                                            {ngraph::Shape{1, 1, 1, 640}, ngraph::Shape{256, 1, 1, 512}, true, false},
+                                            {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 512}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{128, 1, 33, 9}, true, false},
+                                            {ngraph::Shape{1, 1, 11, 13}, ngraph::Shape{128, 1, 11, 9}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 23}, ngraph::Shape{128, 1, 11, 5}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true},
+                                            {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 1024}, true, true},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true}}),
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),   // with bias
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),   // with max pool
+                            ::testing::ValuesIn(activationFactories),              // with activation
+                            ::testing::ValuesIn(std::vector<bool>{true, false}))); // with fq
+
+} // namespace
+} // namespace testing
--- a/src/plugins/intel_gna/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/gna_graph_compiler.cpp
@ -266,13 +266,21 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
        std::swap(convolution._dilation_x, convolution._dilation_y);
    }

+    auto in_kernel_w = convolution._kernel_x;
+    auto in_kernel_h = convolution._kernel_y;
+    bool transpose_h_w = false;
+
    // Map 2d convolution to 1d if it's possible
-    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
+    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
+                                                  convolution._kernel_y, convolution._kernel_x,
+                                                  convolution._stride_y, convolution._stride_x)) {
+        transpose_h_w = (in_height == convolution._kernel_y);
        in_width *= in_height;
        in_height = 1;
        out_width *= out_height;
        out_height = 1;
-        convolution._stride_x *= (convolution._stride_y * convolution._kernel_x);
+        convolution._stride_x *= transpose_h_w ? (convolution._stride_y * convolution._kernel_y) :
+                                                 (convolution._stride_y * convolution._kernel_x);
        convolution._kernel_x *= convolution._kernel_y;
        convolution._kernel_y = 1;
    }
@ -304,19 +312,20 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
        in_height != 1) {
        // TensorFlow default layout is NHWC
        // OpenVino Default layout is   NCHW
-        // GNA Convolution input is     NHCW
+        // GNA Convolution input is     NHCW (old) or NHWC (new)
        // When layer layout is in NHWC it means that is was created by PassManager
        return finalizeConvolution2DPrimitive(layer, in_batch, in_channels, in_height, in_width,
                                              out_batch, out_channels, out_height, out_width);
        THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution 2D is not supported on GNA 1.0 library";
    }
    finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_width,
-                                   out_batch, out_channels, out_width);
+                                   out_batch, out_channels, out_width, in_kernel_w, in_kernel_h, transpose_h_w);
 }

 void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr layer,
    uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
-    uint32_t out_batch, uint32_t out_channels, uint32_t out_width) {
+    uint32_t out_batch, uint32_t out_channels, uint32_t out_width,
+    uint32_t in_kernel_w, uint32_t in_kernel_h, bool transpose_h_w) {
    auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
    printConvolutionLayer(convolution);

@ -429,7 +438,10 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
        ptr_weights,
        ptr_biases);

-    if (inputs->getLayout() == Layout::NHWC) {
+    // Keep both variants of kaldi models working:
+    // Old one has layout which is different from NHWC
+    // New one has layout NHWC, but it is mapped from 2d by H
+    if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) {
        currentComponent.orientation_in  = kDnnInterleavedOrientation;
        currentComponent.orientation_out = kDnnInterleavedOrientation;
    }
@ -447,7 +459,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP

    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
    if (!dnn->do_rotate_input) {
-        if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
+         if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) {
            //  Kaldi features are opposite orientation
            dnn->do_rotate_input = true;
            dnn->num_rotate_rows = effectiveStride;
@ -459,12 +471,16 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP

    connectOutput(layer, ptr_outputs, num_data_bytes_out);

+    // Transpose H with W or C with HW
+    auto A = transpose_h_w ? in_kernel_h : in_channels;
+    auto B = transpose_h_w ? in_kernel_w : convolution._kernel[X_AXIS];
+
    std::vector<uint8_t> transposedWeights;
    for (uint32_t k = 0; k < convolution._out_depth; k++) {
        uint8_t * ptr_filt_current
            = convolution._weights->cbuffer().as<uint8_t*>() +
-            k * in_channels * convolution._kernel[X_AXIS] * convolution.precision.size();
-        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, convolution._kernel[X_AXIS]);
+            k * A * B * convolution.precision.size();
+        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
        transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
    }
    if (transposedWeights.size() != convolution._weights->byteSize()) {
--- a/src/plugins/intel_gna/gna_graph_compiler.hpp
+++ b/src/plugins/intel_gna/gna_graph_compiler.hpp
@ -128,8 +128,8 @@ public:

    void finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr,
        uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
-        uint32_t out_batch, uint32_t out_channels, uint32_t out_width);
-
+        uint32_t out_batch, uint32_t out_channels, uint32_t out_width,
+        uint32_t in_kernel_x, uint32_t in_kernel_y, bool transpose);
    void finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerPtr,
        uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
        uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width);
--- a/src/plugins/intel_gna/gna_graph_patterns.hpp
+++ b/src/plugins/intel_gna/gna_graph_patterns.hpp
@ -89,7 +89,8 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin

    auto next = getInputTo(layer->outData.front()).begin()->second;
    // Permute is inserted before Reshape by MO in NHWC models, so we need to find either permute, or reshape, or output
-    while (!LayerInfo(next).isPermute() && !LayerInfo(next).isOutput() && next->outData.size() == 1) {
+    while (!LayerInfo(next).isPermute() && !LayerInfo(next).isPermuteViaReshape() &&
+           !LayerInfo(next).isOutput() && next->outData.size() == 1) {
        if (LayerInfo(next).isNonFunctional() && !IsReshapeFrom4dTo3d(next) && !IsReshapeFrom3dTo4d(next)) {
            break;
        }
@ -111,15 +112,28 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
        if (next->outData.size() != 1) {
            return std::make_pair(nullptr, nullptr);
        }
+        auto input_dims = next->insData[0].lock()->getDims();
+        auto in_dims_size = input_dims.size();
+        auto output_dims = next->outData[0]->getDims();
+        auto out_dims_size = output_dims.size();
+        if (in_dims_size == 4 && out_dims_size == 4) {
+            if (!LayerInfo(next).isPermuteViaReshape() ||
+                (input_dims[0] != output_dims[0]) || // N
+                (input_dims[1] != output_dims[3]) || // C
+                (input_dims[2] != output_dims[1]) || // H
+                (input_dims[3] != output_dims[2])) { // W
+                return std::make_pair(nullptr, nullptr);
+            }
+        } else {
            // Check if reshape is expected for this pattern:
            // the next layer has the both, height and width dimensions > 1
-        auto in_dim_size = next->insData[0].lock()->getDims().size();
-        IE_ASSERT(in_dim_size == 3 || in_dim_size == 4);
-        size_t height = in_dim_size == 3 ? 1 : GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H);
+            IE_ASSERT(in_dims_size == 3 || in_dims_size == 4);
+            size_t height = in_dims_size == 3 ? 1 : GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H);
            size_t width = GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W);
-        if (next->outData[0]->getDims().size() < 3 || height != 1 || width != 1) {
+            if (out_dims_size < 3 || height != 1 || width != 1) {
                return std::make_pair(nullptr, nullptr);
            }
+        }
    } else {
        return std::make_pair(nullptr, nullptr);
    }
@ -127,7 +141,8 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
    // Permute is inserted after Reshape by MO in NHWC models, so we need to find either permute, or reshape, or input
    auto parent = InferenceEngine::CNNNetPrevLayer(layer);
    auto prev = parent;
-    while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isInput() && InferenceEngine::CNNNetHasPrevLayer(prev.get())) {
+    while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isPermuteViaReshape() &&
+           !LayerInfo(prev).isInput() && InferenceEngine::CNNNetHasPrevLayer(prev.get())) {
        if (LayerInfo(prev).isNonFunctional() && !IsReshapeFrom4dTo3d(prev) && !IsReshapeFrom3dTo4d(prev)) {
            break;
        }
@ -143,19 +158,35 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
            return std::make_pair(nullptr, nullptr);
        }
    } else if (LayerInfo(prev).isReshape()) {
+        auto input_dims = prev->insData[0].lock()->getDims();
+        auto in_dims_size = input_dims.size();
+        auto output_dims = prev->outData[0]->getDims();
+        auto out_dims_size = output_dims.size();
+
+        if (in_dims_size == 4 && out_dims_size == 4) {
+            if (!LayerInfo(prev).isPermuteViaReshape() ||
+               (input_dims[0] != output_dims[0]) || // N
+               (input_dims[1] != output_dims[2]) || // H
+               (input_dims[2] != output_dims[3]) || // W
+               (input_dims[3] != output_dims[1])) { // C
+                return std::make_pair(nullptr, nullptr);
+            }
+        } else {
            if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) {
                return std::make_pair(nullptr, nullptr);
            }
            // Check if reshape is expected for this pattern:
            // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1
-        size_t out_dims_size = parent->outData[0]->getDims().size();
+            in_dims_size = parent->insData[0].lock()->getDims().size();
+            out_dims_size = parent->outData[0]->getDims().size();
            IE_ASSERT(out_dims_size == 3 || out_dims_size == 4);
            size_t channels = GetDataDimSize(parent->outData[0], out_dims_size - 1);
            size_t height = out_dims_size == 3 ? 1 : GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H);
            size_t width = GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W);
-        if (parent->insData[0].lock()->getDims().size() < 3 || channels != 1 && (height != 1 || width != 1)) {
+            if (in_dims_size < 3 || channels != 1 && (height != 1 || width != 1)) {
                return std::make_pair(nullptr, nullptr);
            }
+        }
    } else {
        return std::make_pair(nullptr, nullptr);
    }
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@ -79,6 +79,7 @@
 #include "transformations/decompose_mvn.hpp"
 #include "transformations/substitute_softsign.hpp"
 #include "transformations/convert_precision.hpp"
+#include "transformations/unfuse_reshape_and_transpose.hpp"

 #include <ngraph/opsets/opset7.hpp>

@ -698,6 +699,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        manager.register_pass<SwapInputMatMul>();
        manager.register_pass<HandleTransposesAroundMatMul>();
        manager.register_pass<InsertTransposeAfterConvOrPool>();
+        manager.register_pass<Unfuse2dto4dReshapeAndTranspose>();
+        manager.register_pass<Unfuse4dto2dReshapeAndTranspose>();
+        manager.register_pass<RemoveExtraReshapes>();
        manager.register_pass<ReorderActivationAndPooling>();
        manager.register_pass<RemoveSingleInputConcat>();
        manager.register_pass<SubstituteSoftsign>();
@ -792,7 +796,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {

        passes->registerPass<SubstitutePReluPass>();

+        if (!isNgraphPassesUsed) {
            passes->registerPass<ReorderMaxPoolPass>();
+        }
+
        passes->registerPass<EltwiseSplitOverChannelsPass>();
        passes->registerPass<InsertSplitAligningFilterPass>();

--- a/src/plugins/intel_gna/layers/gna_convolution_layer.cpp
+++ b/src/plugins/intel_gna/layers/gna_convolution_layer.cpp
@ -16,8 +16,13 @@

 namespace GNAPluginNS {
 namespace GNAConvolutionLayer {
-bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
-    return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
+
+bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t in_channels,
+                          const uint32_t kernelHeight, const uint32_t kernelWidth,
+                          const uint32_t strideHeight, const uint32_t strideWidth) {
+    return ((inHeight > 1 && inWidth > 1) &&
+            ((inWidth == kernelWidth && strideWidth == 1) ||
+             (inHeight == kernelHeight && strideHeight == 1 && in_channels == 1)));
 }

 // 3D input or 2D kernel
@ -39,7 +44,7 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
    const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
    const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
    if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
-         !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
+         !isMappableFrom2DTo1D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x, conv._stride_y, conv._stride_x)) {
        const auto kernelSize = conv._kernel_x * conv._kernel_y;
        auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
            [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
--- a/src/plugins/intel_gna/layers/gna_convolution_layer.hpp
+++ b/src/plugins/intel_gna/layers/gna_convolution_layer.hpp
@ -10,7 +10,9 @@

 namespace GNAPluginNS {
 namespace GNAConvolutionLayer {
-bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth);
+bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels,
+                          const uint32_t kernelHeight, const uint32_t kernelWidth,
+                          const uint32_t strideHeight, const uint32_t strideWidth);

 // 3D input or 2D kernel
 bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
--- a/src/plugins/intel_gna/layers/gna_layer_info.hpp
+++ b/src/plugins/intel_gna/layers/gna_layer_info.hpp
@ -271,6 +271,24 @@ class LayerInfo {
    bool isPermute() const noexcept {
        return isOfType("permute");
    }
+    bool isPermuteViaReshape() const {
+        if (!isOfType("reshape")) return false;
+
+        auto input_dims = layer->insData[0].lock()->getDims();
+        auto output_dims = layer->outData[0]->getDims();
+
+        if (input_dims.size() != output_dims.size()) {
+            return false;
+        }
+
+        input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 1), input_dims.end());
+        output_dims.erase(std::remove(output_dims.begin(), output_dims.end(), 1), output_dims.end());
+
+        if (input_dims != output_dims) {
+            return false;
+        }
+        return true;
+    }
    // @brief this not only mathematically trivial, has some WA for kaldi case
    bool isTrivialPermute() const {
        if (!isPermute()) return false;
--- a/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp
@ -633,11 +633,11 @@ void RemovePermutationsNHWCToNCHWPass::run() {

        if (prev == nullptr || next == nullptr) continue;

-        if (LayerInfo(prev).isPermute()) {
+        if (LayerInfo(prev).isPermute() || LayerInfo(prev).isPermuteViaReshape()) {
            permutations_to_remove.insert(prev);
        }

-        if (LayerInfo(next).isPermute()) {
+        if (LayerInfo(next).isPermute() || LayerInfo(prev).isPermuteViaReshape()) {
            permutations_to_remove.insert(next);
        }

@ -699,7 +699,8 @@ void RemovePermutationsNHWCToNCHWPass::run() {
        };
        propogateNHWCOrderRecursive(current_layer);

-        if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) {
+        if ((LayerInfo(pattern_start).isPermute() || LayerInfo(pattern_start).isPermuteViaReshape()) &&
+         !getInputTo(pattern_start->outData.front()).empty()) {
            auto layer_before_permute = CNNNetPrevLayer(pattern_start);
            DataPtr output = nullptr;
            for (auto before_output : layer_before_permute->outData) {
@ -2017,11 +2018,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
    };

    auto allowFQFuse = [](CNNLayerPtr layer) -> bool {
-        auto doNotSkup = [](CNNLayerPtr layer) {
+        auto doNotSkip = [](CNNLayerPtr layer) {
            return false;
        };

-        if (CNNNetGetAllNextLayersSkipCertain(layer, -1, doNotSkup).empty()) {
+        if (CNNNetGetAllNextLayersSkipCertain(layer, -1, doNotSkip).empty()) {
            return false;
        }

@ -2142,7 +2143,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {

        // Before FQ layer is removed, the previous functional layer has to be updated with its quantization data
        auto prevFuncLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, [](CNNLayerPtr layer) {
-            return LayerInfo(layer).isNonFunctional();
+            return LayerInfo(layer).isNonFunctional() || LayerInfo(layer).isPooling();
        });
        auto quantParamsPrevLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevFuncLayer);
        quantParamsPrevLayer->_dst_quant.SetLevels(fqLevels);
--- a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp
+++ b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp
@ -128,7 +128,9 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
    // GNA supported features or handled otherwise - there is no need to decompose such convolution
    if (graph_data.conv_count == 1 && (((conv_data.input_height == 1 || conv_data.input_width == 1) &&
        conv_data.filter_dilation_width == 1 && conv_data.filter_dilation_height == 1) ||
-        GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.filter_width, conv_data.filter_stride_width)))
+        GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.input_channel_count,
+                                                  conv_data.filter_height, conv_data.filter_width,
+                                                  conv_data.filter_stride_height, conv_data.filter_stride_width)))
        return false;

    return true;
--- a/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp
+++ b/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp
@ -8,6 +8,7 @@

 #include <ngraph/opsets/opset7.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>

 using namespace GNAPluginNS;

@ -15,16 +16,15 @@ NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);

 RemoveExtraReshapes::RemoveExtraReshapes() {
    MATCHER_SCOPE(RemoveExtraReshapes);
-    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>();
+    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>(
+        [](const ngraph::Output<ngraph::Node>& value) {
+        return (value.get_node_shared_ptr()->get_input_shape(0) == value.get_node_shared_ptr()->get_output_shape(0));
+    });
    const auto pooling = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({reshape});

    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
        const auto& pattern_map = m.get_pattern_value_map();
        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
-        if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
-            return false;
-        }
-
        ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
        return true;
    };
--- a/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp
@ -33,9 +33,10 @@ static bool shouldSplitCnn(const ngraph::Output<ngraph::Node>& node) {
        uint32_t height = input.at(2);
        auto kH = filters.at(2);
        auto kW = filters.at(3);
+        auto sH = convolution->get_strides().at(0);
        auto sW = convolution->get_strides().at(1);
        if (GNAConvolutionLayer::isConv2D(height, width, in_channels, kH, kW) &&
-            !GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, kW, sW)) {
+            !GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, in_channels, kH, kW, sH, sW)) {
            return false;
        }
    }
--- a/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp
+++ b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp
@ -0,0 +1,189 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/cc/ngraph/itt.hpp>
+
+#include "transformations/unfuse_reshape_and_transpose.hpp"
+#include "transformations/utils/utils.hpp"
+#include "transformations/utils/transformation_helper.hpp"
+#include <ngraph/rt_info.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(Unfuse2dto4dReshapeAndTranspose, "Unfuse2dto4dReshapeAndTranspose", 0);
+
+Unfuse2dto4dReshapeAndTranspose::Unfuse2dto4dReshapeAndTranspose() {
+    MATCHER_SCOPE(Unfuse2dto4dReshapeAndTranspose);
+    auto is_required_reshape = [](const ngraph::Output<ngraph::Node>& value) {
+        auto input_shape = value.get_node_shared_ptr()->get_input_shape(0);
+        auto output_shape = value.get_node_shared_ptr()->get_output_shape(0);
+        return ((input_shape.size() == 2) && (output_shape.size() == 4) &&
+                ((output_shape.at(1) == 1) || (output_shape.at(2)*output_shape.at(3) == 1)));
+    };
+    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset8::Reshape>(is_required_reshape);
+    auto fq = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({reshape,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    const auto conv = ngraph::pattern::wrap_type<ngraph::opset8::Convolution>({std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{reshape, fq}),
+        ngraph::pattern::any_input()});
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
+        auto consumers = reshape_node->output(0).get_target_inputs();
+
+        auto N = reshape_node->get_output_shape(0)[0];
+        auto C = reshape_node->get_output_shape(0)[1];
+        auto H = reshape_node->get_output_shape(0)[2];
+        auto W = reshape_node->get_output_shape(0)[3];
+
+        // Create reshape NxW => NxHxWxC (C or HxW is equal to 1)
+        auto data = reshape_node->input_value(0);
+        auto reshape_nhwc_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{N, H, W, C});
+        auto reshape_nhwc = register_new_node<ngraph::opset8::Reshape>(data, reshape_nhwc_const, false);
+        reshape_nhwc->set_friendly_name(reshape_node->get_friendly_name() + "/Reshape");
+
+        // Create transpose NxHxWxC => NxCxHxW
+        auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2});
+        auto transpose = register_new_node<ngraph::opset8::Transpose>(reshape_nhwc, transpose_const);
+        transpose->set_friendly_name(reshape_node->get_friendly_name());
+
+        ngraph::copy_runtime_info(reshape, {reshape_nhwc, transpose});
+        for (auto consumer : consumers) {
+            consumer.replace_source_output(transpose);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(conv, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(Unfuse4dto2dReshapeAndTranspose, "Unfuse4dto2dReshapeAndTranspose", 0);
+
+Unfuse4dto2dReshapeAndTranspose::Unfuse4dto2dReshapeAndTranspose() {
+    MATCHER_SCOPE(Unfuse4dto2dReshapeAndTranspose);
+    auto is_required_reshape = [](const ngraph::Output<ngraph::Node>& value) {
+        auto input_shape = value.get_node_shared_ptr()->get_input_shape(0);
+        auto output_shape = value.get_node_shared_ptr()->get_output_shape(0);
+        return ((input_shape.size() == 4) && (output_shape.size() == 2) &&
+                ((input_shape.at(1) == 1) || (input_shape.at(2)*input_shape.at(3) == 1)));
+    };
+    // Convolution
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset8::Convolution>({ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({conv,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    // Bias
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset8::Add>({conv, ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({bias,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    // Max Pooling
+    auto max_pool_conv = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({conv},
+        consumers_and_rank(1, 4));
+    auto max_pool_fq_conv = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({fq_conv},
+        consumers_and_rank(1, 4));
+    auto max_pool_bias = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({bias},
+        consumers_and_rank(1, 4));
+    auto max_pool_fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({fq_bias},
+        consumers_and_rank(1, 4));
+    // Activation
+    auto fq_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({fq_conv,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({fq_bias,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto act_conv = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({conv},
+        consumers_and_rank(1, 4));
+    auto act_bias = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({bias},
+        consumers_and_rank(1, 4));
+    auto act_max_pool_conv = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({max_pool_conv},
+        consumers_and_rank(1, 4));
+    auto act_max_pool_bias = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({max_pool_bias},
+        consumers_and_rank(1, 4));
+    auto act_fq_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({fq_fq_conv},
+        consumers_and_rank(1, 4));
+    auto act_fq_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({fq_fq_bias},
+        consumers_and_rank(1, 4));
+    auto fq_max_pool_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({max_pool_fq_conv,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto act_fq_max_pool_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({fq_max_pool_fq_conv},
+        consumers_and_rank(1, 4));
+    auto fq_max_pool_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({max_pool_fq_bias,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto act_fq_max_pool_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::Relu, ngraph::opset8::Sigmoid,
+        ngraph::opset8::Tanh, ngraph::opset8::Abs, ngraph::opset8::Log, ngraph::opset8::Exp,
+        ngraph::opset8::Sign, ngraph::opset8::Clamp>({fq_max_pool_fq_bias},
+        consumers_and_rank(1, 4));
+    auto fq_act_fq_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({act_fq_fq_conv,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_act_fq_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({act_fq_fq_bias,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_act_fq_max_pool_fq_conv = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({act_fq_max_pool_fq_conv,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto fq_act_fq_max_pool_fq_bias = ngraph::pattern::wrap_type<ngraph::opset8::FakeQuantize>({act_fq_max_pool_fq_bias,
+        ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        consumers_and_rank(1, 4));
+    auto root_reshape =
+        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool_conv, max_pool_fq_conv, max_pool_bias, max_pool_fq_bias,
+            fq_conv, fq_bias, act_conv, act_bias, act_max_pool_conv, act_max_pool_bias,
+            fq_act_fq_fq_conv, fq_act_fq_fq_bias, fq_act_fq_max_pool_fq_conv, fq_act_fq_max_pool_fq_bias});
+    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset8::Reshape>({root_reshape, ngraph::pattern::any_input()}, is_required_reshape);
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
+        auto consumers = reshape_node->output(0).get_target_inputs();
+
+        auto N = reshape_node->get_input_shape(0)[0];
+        auto W = reshape_node->get_input_shape(0)[1]*reshape_node->get_input_shape(0)[2]*reshape_node->get_input_shape(0)[3];
+
+        // Create transpose NxCxHxW => NxHxWxC
+        auto data = reshape_node->input_value(0);
+        auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1});
+        auto transpose = register_new_node<ngraph::opset8::Transpose>(data, transpose_const);
+        transpose->set_friendly_name(reshape_node->get_friendly_name()  + "/Transpose");
+
+        // Create reshape NxHxWxC => NxW (C or HxW is equal to 1)
+        auto reshape_nw_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{N, W});
+        auto reshape_nw = register_new_node<ngraph::opset8::Reshape>(transpose, reshape_nw_const, false);
+        reshape_nw->set_friendly_name(reshape_node->get_friendly_name());
+
+        ngraph::copy_runtime_info(reshape_node, {transpose, reshape_nw});
+        for (auto consumer : consumers) {
+            consumer.replace_source_output(reshape_nw);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(reshape, matcher_name);
+    this->register_matcher(m, callback);
+}
--- a/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp
+++ b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp
@ -0,0 +1,72 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+/**
+ * @brief Replace 2d->4d reshape to pair of 2 reshapes (before Convolution)
+ * Before:
+ *      [N, HW]
+ *         |
+ *      Reshape
+ *         |
+ *   [N, C, H, W]
+ *         |
+ *    Convolution
+ *
+ * After (TransposeSinking friendly):
+ *      [N, HW]
+ *         |
+ *      Reshape
+ *         |
+ *   [N, H, W, C]
+ *         |
+ *      Reshape
+ *         |
+ *   [N, C, H, W]
+ *         |
+ *    Convolution
+ */
+class Unfuse2dto4dReshapeAndTranspose : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  Unfuse2dto4dReshapeAndTranspose();
+};
+
+/**
+ * @brief Replace 2d->4d reshape to pair of 2 reshapes (after Convolution)
+ * Before:
+ *    Convolution (optionally + bias/pooling/activation)
+ *         |
+ *    [N, C, H, W]
+ *         |
+ *      Reshape
+ *         |
+ *      [N, HW]
+ *
+ * After (TransposeSinking friendly):
+ *    Convolution
+ *         |
+ *    [N, C, H, W]
+ *         |
+ *      Reshape
+ *         |
+ *   [N, H, W, C]
+ *         |
+ *      Reshape
+ *         |
+ *      [N, HW]
+ *
+ */
+class Unfuse4dto2dReshapeAndTranspose : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  Unfuse4dto2dReshapeAndTranspose();
+};
+
+} // namespace GNAPluginNS
--- a/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
+++ b/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
@ -24,16 +24,20 @@ typedef std::tuple<
    std::string,                        // Target Device
    std::map<std::string, std::string>, // Configuration
    std::vector<size_t>,                // Input Shape
-    std::pair<float, float>,            // Input Min and Max
-    size_t                              // Levels
+    std::pair<float, float>,            // Input Min and Max (before conv)
+    std::pair<float, float>,            // Input Min and Max (after conv)
+    size_t,                             // Levels
+    bool                                // Reshape between FQ and Pooling
 > fqMaxpoolReorderingParams;

 namespace LayerTestsDefinitions {

 class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
    public LayerTestsUtils::LayerTestsCommon {
-    float inputDataMin = 0.0f;
-    float inputDataMax = 0.0f;
+    float inputDataMin1 = 0.0f;
+    float inputDataMax1 = 0.0f;
+    float inputDataMin2 = 0.0f;
+    float inputDataMax2 = 0.0f;
    float inputDataResolution = 1.0f;

 public:
@ -42,9 +46,11 @@ public:
        std::string targetDevice;
        std::map<std::string, std::string> configuration;
        std::vector<size_t> inputShape;
-        std::pair<float, float> inputMinMax;
+        std::pair<float, float> inputMinMax1;
+        std::pair<float, float> inputMinMax2;
        size_t levels = 0;
-        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
+        bool reshape = false;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax1, inputMinMax2, levels, reshape) = obj.param;

        std::ostringstream result;
        result << "netPRC=" << netPrecision.name() << "_";
@ -53,14 +59,16 @@ public:
            result << "_configItem=" << configItem.first << "_" << configItem.second;
        }
        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
-        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        result << "_inputMinMax1=(" << inputMinMax1.first << ".." << inputMinMax1.second << ")";
+        result << "_inputMinMax2=(" << inputMinMax2.first << ".." << inputMinMax2.second << ")";
        result << "_levels=" << levels;
+        result << "_reshape=" << reshape;

        return result.str();
    }

    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
-        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax1 - inputDataMin1, inputDataMin1, 1 / inputDataResolution);
    }

 protected:
@ -68,23 +76,28 @@ protected:
        InferenceEngine::Precision netPrecision;

        std::vector<size_t> inputShape;
-        std::pair<float, float> inputMinMax;
+        std::pair<float, float> inputMinMax1;
+        std::pair<float, float> inputMinMax2;
        size_t levels = 0;
-        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
+        bool reshape = false;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax1, inputMinMax2, levels, reshape) = this->GetParam();
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

-        std::tie(inputDataMin, inputDataMax) = inputMinMax;
-        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin });
-        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax });
+        std::tie(inputDataMin1, inputDataMax1) = inputMinMax1;
+        std::tie(inputDataMin2, inputDataMax2) = inputMinMax2;
+        auto inputLowNode1 = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin1 });
+        auto inputHighNode1 = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax1 });
+        auto inputLowNode2 = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin2 });
+        auto inputHighNode2 = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax2 });

        auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});

        auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
-            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+            inputLowNode1, inputHighNode1, inputLowNode1, inputHighNode1, levels);

        auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
-        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
-        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
+        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1});
+        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1});
        auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
            convLowNode, convHighNode, convLowNode, convHighNode, levels);
        auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
@ -97,9 +110,20 @@ protected:
        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);

        auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
-            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+            inputLowNode2, inputHighNode2, inputLowNode2, inputHighNode2, levels);

-        auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
+        std::shared_ptr<ngraph::Node> node_before_pooling = convFQNode;
+        if (reshape) {
+            const auto& shape = conv->get_output_shape(0);
+            size_t total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+            auto reshapeConst1 = ngraph::builder::makeConstant(ngraph::element::i64, std::vector<size_t>{ 2 }, ngraph::Shape{1, total});
+            auto reshapeNode1 = std::make_shared<ngraph::opset1::Reshape>(convFQNode, reshapeConst1, false);
+            auto reshapeConst2 = ngraph::builder::makeConstant(ngraph::element::i64, std::vector<size_t>{ 4 }, shape);
+            auto reshapeNode2 = std::make_shared<ngraph::opset1::Reshape>(reshapeNode1, reshapeConst2, false);
+            node_before_pooling = reshapeNode2;
+        }
+
+        auto maxpool = ngraph::builder::makePooling(node_before_pooling, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
                                                    ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);

        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
@ -130,7 +154,9 @@ const std::vector<std::vector<size_t>> inputShape = {
 const std::vector<std::pair<float, float>> inputMinMax = {
    {-0.5, 0.5},
    {-2, 2},
-    {-8, 8}
+    {-8, 8},
+    {-5, 5},
+    {-17.5, 17.5},
 };

 const std::vector<size_t> levels = {
@ -144,6 +170,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
        ::testing::ValuesIn(configs),
        ::testing::ValuesIn(inputShape),
        ::testing::ValuesIn(inputMinMax),
-        ::testing::ValuesIn(levels)),
+        ::testing::ValuesIn(inputMinMax),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(std::vector<bool>{true, false})),
    FQMaxpoolReordering::getTestCaseName);
 } // namespace LayerTestsDefinitions
--- a/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
+++ b/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
@ -18,6 +18,7 @@
 #include "ngraph_functions/builders.hpp"

 #include "ngraph_functions/pass/convert_prc.hpp"
+#include "transformations/common_optimizations/transpose_to_reshape.hpp"

 typedef std::tuple<
    InferenceEngine::Precision,         // Network Precision
@ -31,7 +32,8 @@ typedef std::tuple<
    std::string,                        // Target Device
    std::map<std::string, std::string>, // Configuration
    std::vector<size_t>,                // Input shape
-    bool                                // additional bool parameter
+    bool,                               // additional bool parameter
+    bool                                // transpose to reshape
 > removePermutationsAddParamPassParams;

 namespace LayerTestsDefinitions {
@ -106,7 +108,8 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<
            std::map<std::string, std::string> configuration;
            std::vector<size_t> inputShape;
            bool output1D;
-            std::tie(netPrecision, targetDevice, configuration, inputShape, output1D) = obj.param;
+            bool transpose_to_reshape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, output1D, transpose_to_reshape) = obj.param;

            std::ostringstream result;
            result << "netPRC=" << netPrecision.name() << "_";
@ -116,6 +119,7 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<
            }
            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
            result << "_1d_out=" << output1D;
+            result << "_transpose2reshape=" << transpose_to_reshape;
            return result.str();
        }

@ -133,7 +137,8 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<
            InferenceEngine::Precision netPrecision;
            std::vector<size_t> inputShape;
            bool output1D;
-            std::tie(netPrecision, targetDevice, configuration, inputShape, output1D) = this->GetParam();
+            bool transpose_to_reshape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, output1D, transpose_to_reshape) = this->GetParam();
            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

            size_t shape_size = inputShape.size();
@ -158,6 +163,11 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<

            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
            function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationsTest");
+            if (transpose_to_reshape) {
+                ngraph::pass::Manager manager;
+                manager.register_pass<ngraph::pass::TransposeToReshape>();
+                manager.run_passes(function);
+            }
        }
 };

@ -212,7 +222,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            std::map<std::string, std::string> configuration;
            std::vector<size_t> inputShape;
            bool withActivation;
-            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = obj.param;
+            bool transpose_to_reshape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation, transpose_to_reshape) = obj.param;

            std::ostringstream result;
            result << "netPRC=" << netPrecision.name() << "_";
@ -222,6 +233,7 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            }
            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
            result << "_withActivation=" << withActivation;
+            result << "_transpose2reshape=" << transpose_to_reshape;
            return result.str();
        }

@ -255,7 +267,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            InferenceEngine::Precision netPrecision;
            std::vector<size_t> inputShape;
            bool withActivation;
-            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = this->GetParam();
+            bool transpose_to_reshape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation, transpose_to_reshape) = this->GetParam();
            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

            size_t shape_size = inputShape.size();
@ -280,6 +293,12 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<

            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
            function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationsWithPoolAndActTest");
+
+            if (transpose_to_reshape) {
+                ngraph::pass::Manager manager;
+                manager.register_pass<ngraph::pass::TransposeToReshape>();
+                manager.run_passes(function);
+            }
        }
 };

@ -512,7 +531,8 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes),
-            ::testing::ValuesIn(std::vector<bool>{false, true})), // with 1d output of convolution
+            ::testing::ValuesIn(std::vector<bool>{false, true}), // with 1d output of convolution
+            ::testing::ValuesIn(std::vector<bool>{false, true})),// transpose to reshape
        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);

    INSTANTIATE_TEST_SUITE_P(smoke_PermutationPass, RemovePermutationsNHWCToNCHWPassNoReshapesTest,
@ -529,7 +549,8 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes),
-            ::testing::ValuesIn(std::vector<bool>{false, true})), // with activation
+            ::testing::ValuesIn(std::vector<bool>{false, true}), // with activation
+            ::testing::ValuesIn(std::vector<bool>{false, true})),// transpose to reshape
        RemovePermutationsWithPoolAndActTest::getTestCaseName);

    INSTANTIATE_TEST_SUITE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,