[GNA] Convolution without --disable_nhwc_to_nchw option for TF models. (#3918)

2021-01-27 17:15:34 +03:00 · 2021-01-27 17:15:34 +03:00 · 65053df07c
commit 65053df07c
parent 1902e14ba0
5 changed files with 418 additions and 54 deletions
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -201,7 +201,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
    }

    if (comp.num_rows_in * comp.num_columns_in % 8 != 0) {
-        THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent is not multiply by 8";
+        THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << comp.num_rows_in * comp.num_columns_in <<
+                               ") is not a multiply by 8";
    }
    if (comp.op.conv1D.num_filters < GNALimitations::convMinFiltersNum ||
        comp.op.conv1D.num_filters > GNALimitations::convMaxFiltersNum ||
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -356,7 +356,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
    uint32_t num_filters = convolution._out_depth;
    uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
    uint32_t num_filter_rows = num_filter_coefficients / num_feature_map_columns;
-    uint32_t num_columns_in = num_inputs;
+    uint32_t num_columns_in = num_inputs + num_input_padding;

    uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
    uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / num_feature_map_columns) + 1) * convolution._out_depth;
@ -841,15 +841,20 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {

    IE_ASSERT(!layer->insData.empty());
    IE_ASSERT(!layer->outData.empty());
+    printPoolingLayer(pooling);
+
    auto inputs = layer->insData.begin()->lock();
    auto outputs = *layer->outData.begin();

-    uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
-    uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
-    uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
-    uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
-    uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
-    uint32_t c_dim_out = FROM_IR_DIM(outputs, 3);
+    auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
+    uint32_t w_dim_in = FROM_IR_DIM(inputs, in_order[3]);
+    uint32_t h_dim_in = FROM_IR_DIM(inputs, in_order[2]);
+    uint32_t c_dim_in = FROM_IR_DIM(inputs, in_order[1]);
+
+    auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
+    uint32_t w_dim_out = FROM_IR_DIM(outputs, out_order[3]);
+    uint32_t h_dim_out = FROM_IR_DIM(outputs, out_order[2]);
+    uint32_t c_dim_out = FROM_IR_DIM(outputs, out_order[1]);

    if (w_dim_in == 1) {  // swap dimensions if needed to support swapped 1D case
        swap(h_dim_in, w_dim_in);
@ -2410,6 +2415,24 @@ void GNAGraphCompiler::printConvolutionLayer(const InferenceEngine::ConvolutionL
    printTensorDesc("Output", layer.outData.front()->getTensorDesc());
 }

+void GNAGraphCompiler::printPoolingLayer(const InferenceEngine::PoolingLayer& layer) {
+    const char x = 'x';
+
+    gnalog() << "PoolingLayer '"
+        << layer.name
+        << "' Kernel: "
+        << layer._kernel_x << x << layer._kernel_y
+        << " Padding: "
+        << layer._padding_x << x << layer._padding_y
+        << " Stride: "
+        << layer._stride_x << x << layer._stride_y
+        << " Auto Padding: '"
+        << layer._auto_pad << "'";
+    gnalog() << "\n";
+    printTensorDesc("Input", layer.input()->getTensorDesc());
+    printTensorDesc("Output", layer.outData.front()->getTensorDesc());
+}
+
 std::vector<uint8_t>
 GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
    std::vector<uint8_t> temp_buffer(num_rows * num_cols * element_size);
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -48,6 +48,7 @@ private:

    static void printTensorDesc(const std::string& name, const InferenceEngine::TensorDesc& desc);
    static void printConvolutionLayer(const InferenceEngine::ConvolutionLayer& layer);
+    static void printPoolingLayer(const InferenceEngine::PoolingLayer& layer);
    static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
    std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
    std::vector<std::size_t> static getFromIRDimsOrderNCHW(InferenceEngine::Layout layout);
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -628,7 +628,8 @@ void ReversePermutationsPass::run() {
 }

 void RemovePermutationsNHWCToNCHWPass::run() {
-    std::list<CNNLayerPtr> permutationsToRemove;
+    std::set<CNNLayerPtr> permutations_to_remove;
+    std::list<std::pair<CNNLayerPtr, CNNLayerPtr>> nhwc_layout_patterns;
    for (auto& l : *pLayers) {
        if (!LayerInfo(l).isConvolution()) {
            continue;
@ -641,8 +642,18 @@ void RemovePermutationsNHWCToNCHWPass::run() {
        if (getInputTo(l->outData.front()).empty()) {
            continue;
        }
+
+        if (!CNNNetHasPrevLayer(l.get())) {
+            continue;
+        }
+
        auto next = getInputTo(l->outData.front()).begin()->second;
-        auto prev = CNNNetPrevLayer(l);
+        while (!LayerInfo(next).isPermute() && !LayerInfo(next).isNonFunctional() && !LayerInfo(next).isOutput() &&
+               next->outData.size() == 1) {
+            auto input_to = getInputTo(next->outData.front());
+            if (input_to.size() != 1) break;
+            next = input_to.begin()->second;
+        }

        // The next layer must be NCHW to NHWC permute
        if (!LayerInfo(next).isPermute() || next->input()->getLayout() != Layout::NCHW ||
@ -650,6 +661,12 @@ void RemovePermutationsNHWCToNCHWPass::run() {
            continue;
        }

+        auto parent = CNNNetPrevLayer(l);
+        auto prev = parent;
+        while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isNonFunctional() &&
+               !LayerInfo(prev).isInput() && CNNNetHasPrevLayer(prev.get())) {
+            prev = CNNNetPrevLayer(prev);
+        }
        // The previous layer must be NHWC to NCHW permute or have 1D data
        if (LayerInfo(prev).isPermute()) {
            if (prev->outData[0]->getLayout() != Layout::NCHW ||
@ -658,62 +675,79 @@ void RemovePermutationsNHWCToNCHWPass::run() {
            }

            if (getPassManager()->getPolicy().NHWCToNCHWPolicy == Policy::NHWCToNCHW::REMOVE_ALL) {
-                permutationsToRemove.push_back(prev);
+                permutations_to_remove.insert(prev);
            }
        } else  {
-            if (prev->outData.size() != 1 || getInputTo(prev->outData[0]).size() != 1) {
+            if (parent->outData.size() != 1 || getInputTo(parent->outData[0]).size() != 1) {
                continue;
            }
-            auto prev_dims = prev->outData[0]->getDims();
+            auto parent_dims = parent->outData[0]->getDims();
            // Check if the previous layer has all dimensions except one to be equal to 1
-            if (std::count_if(std::begin(prev_dims), std::end(prev_dims), [](size_t dim) { return dim != 1; }) > 1) {
+            if (std::count_if(std::begin(parent_dims), std::end(parent_dims), [](size_t dim) { return dim != 1; }) > 1) {
                continue;
            }
        }
-        permutationsToRemove.push_back(next);
+        permutations_to_remove.insert(next);
+        nhwc_layout_patterns.push_back({prev, next});
+
+        auto* convolution = dynamic_cast<ConvolutionLayer*>(l.get());
+        if (!convolution) {
+            THROW_GNA_EXCEPTION << "Invalid type of convolution layer";
+        }
+        if (convolution->_kernel_y != 1) {
+            THROW_GNA_LAYER_EXCEPTION(l) << "this case is not implemented yet";
+        }
+        auto in_channels = convolution->input()->getDims()[1];
+        convolution->_kernel_y = in_channels;
    }

-    for (auto&& toRemove : permutationsToRemove) {
-        gnalog() << toRemove->type << " layer '" << toRemove->name << "' will be removed" << '\n';
+    for (const auto& layers : nhwc_layout_patterns) {
+        auto pattern_start = layers.first;
+        auto pattern_end = layers.second;

-        if (!getInputTo(toRemove->outData.front()).empty()) {
-            auto next = getInputTo(toRemove->outData.front()).begin()->second;
-            IE_ASSERT(next != nullptr);
+        auto setNHWCOrder = [](InferenceEngine::DataPtr data) {
+            if (data->getLayout() == Layout::NHWC) return;
+            auto dims = data->getDims();
+            auto order = GetPermuteOrder(Layout::NCHW, Layout::NHWC);
+            InferenceEngine::SizeVector new_dims;
+            for (int i = 0; i < dims.size(); ++i) {
+                new_dims.push_back(dims[order[i]]);
+            }
+            data->setDims(new_dims);
+            data->setLayout(Layout::NHWC);
+        };

-            if (LayerInfo(next).isConvolution()) {
-                next->input()->setDims(toRemove->input()->getDims());
-                next->input()->setLayout(Layout::NHWC);
-                auto layerBeforePermute = CNNNetPrevLayer(toRemove);
-                DataPtr output = nullptr;
-                for (auto before_output : layerBeforePermute->outData) {
-                    if (areEqualDatas(toRemove->input(), before_output)) {
-                        output = before_output;
-                        output->setLayout(Layout::NHWC);
-                        break;
-                    }
-                }
-                if (output == nullptr) {
-                    THROW_GNA_EXCEPTION << "Could not find correct data link between " << toRemove->name << " and " << layerBeforePermute->name;
-                }
+        auto current_layer = getInputTo(pattern_start->outData[0]).begin()->second;
+        setNHWCOrder(current_layer->input());
+        while (current_layer != pattern_end) {
+            setNHWCOrder(current_layer->outData[0]);
+            current_layer = getInputTo(current_layer->outData[0]).begin()->second;
+        }

-                auto* convolution = dynamic_cast<ConvolutionLayer*>(next.get());
-                if (!convolution) {
-                    THROW_GNA_EXCEPTION << "There needs to be a convolution between permutations for RemovePermutationsNHWCToNCHWPass!";
+        if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) {
+            auto layer_before_permute = CNNNetPrevLayer(pattern_start);
+            DataPtr output = nullptr;
+            for (auto before_output : layer_before_permute->outData) {
+                if (areEqualDatas(pattern_start->input(), before_output)) {
+                    output = before_output;
+                    output->setLayout(Layout::NHWC);
+                    break;
                }
-
-                if (convolution->_kernel_y != 1) {
-                    THROW_GNA_LAYER_EXCEPTION(next) << "this case is not implemented yet";
-                }
-                auto in_channels = next->input()->getDims()[3];
-                convolution->_kernel_y = in_channels;
+            }
+            if (output == nullptr) {
+                THROW_GNA_EXCEPTION << "Could not find correct data link between " << pattern_start->name << " and " << layer_before_permute->name;
            }
        }
-        auto prev = CNNNetPrevLayer(toRemove);
-        if (LayerInfo(prev).isConvolution()) {
-            prev->outData[0]->setDims(toRemove->outData[0]->getDims());
-            prev->outData[0]->setLayout(Layout::NHWC);
+
+        if (!pattern_end->outData.empty() && !getInputTo(pattern_end->outData.front()).empty()) {
+            auto layer_after_permute = getInputTo(pattern_end->outData.front()).begin()->second;
+            layer_after_permute->input()->setLayout(Layout::NHWC);
        }
-        CNNNetworkRemoveLayer(toRemove, false);
+    }
+
+    for (auto&& to_remove : permutations_to_remove) {
+        gnalog() << to_remove->type << " layer '" << to_remove->name << "' will be removed" << '\n';
+        CNNNetworkRemoveLayer(to_remove, false);
    }
 }

--- a/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2020 Intel Corporation
+// Copyright (C) 2020-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -133,6 +133,275 @@ protected:
    }
 };

+class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsPassParams>,
+                                             public LayerTestsUtils::LayerTestsCommon {
+    public:
+        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+            InferenceEngine::Precision netPrecision;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+
+            std::ostringstream result;
+            result << "netPRC=" << netPrecision.name() << "_";
+            result << "targetDevice=" << targetDevice << "_";
+            for (auto const& configItem : configuration) {
+                result << "_configItem=" << configItem.first << "_" << configItem.second;
+            }
+            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
+            return result.str();
+        }
+
+    protected:
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+            InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
+            blob->allocate();
+            auto precision = info.getPrecision();
+
+            auto* rawBlobDataPtr = blob->buffer().as<float*>();
+            std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
+            for (size_t i = 0; i < blob->size(); i++) {
+                rawBlobDataPtr[i] = values[i];
+            }
+            return blob;
+        }
+
+        void SetUp() override {
+            //      Reshape
+            //          |
+            //      Permute (order: [0, 3, 1, 2])
+            //          |
+            //        Relu
+            //          |
+            //      Convolution
+            //          |
+            //       Pooling
+            //          |
+            //        Relu
+            //          |
+            //      Permute (order: [0, 2, 3, 1])
+            //          |
+            //      Reshape
+            InferenceEngine::Precision netPrecision;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+            size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
+            auto params = ngraph::builder::makeParams(ngPrc, { {1, in_total_dims_size} });
+
+            auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
+            auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
+            auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
+
+            auto relu1 = std::make_shared<ngraph::opset3::Relu>(permute1);
+
+            size_t num_out_channels = 12;
+            size_t kernal_size = 8;
+            std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
+                                                                                        -0.2f, 0.2f);
+            auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
+            auto pool = ngraph::builder::makePooling(conv1, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
+                                                     ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
+
+            size_t out_width = ((inputShape[2] - kernal_size) + 1) / 2;
+            auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
+
+            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
+
+            std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
+            auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
+            auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
+
+            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
+            function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationPass");
+        }
+};
+
+class RemovePermutationsWithTwoConvTest : public testing::WithParamInterface<removePermutationsPassParams>,
+                                          public LayerTestsUtils::LayerTestsCommon {
+    public:
+        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+            InferenceEngine::Precision netPrecision;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+
+            std::ostringstream result;
+            result << "netPRC=" << netPrecision.name() << "_";
+            result << "targetDevice=" << targetDevice << "_";
+            for (auto const& configItem : configuration) {
+                result << "_configItem=" << configItem.first << "_" << configItem.second;
+            }
+            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
+            return result.str();
+        }
+
+    protected:
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+            InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
+            blob->allocate();
+            auto precision = info.getPrecision();
+
+            auto* rawBlobDataPtr = blob->buffer().as<float*>();
+            std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), 0.0f, 0.5f);
+            for (size_t i = 0; i < blob->size(); i++) {
+                rawBlobDataPtr[i] = values[i];
+            }
+            return blob;
+        }
+
+        void SetUp() override {
+            //      Reshape
+            //          |
+            //      Permute (order: [0, 3, 1, 2])
+            //          |
+            //      Convolution
+            //          |
+            //      Convolution
+            //          |
+            //      Permute (order: [0, 2, 3, 1])
+            //          |
+            //      Reshape
+            InferenceEngine::Precision netPrecision;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+            size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
+            auto params = ngraph::builder::makeParams(ngPrc, { {1, in_total_dims_size} });
+
+            auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
+            auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
+            auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
+
+            size_t num_out_channels = 12;
+            size_t kernal_size = 8;
+            std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
+                                                                                          0.0f, 0.5f);
+            auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
+            size_t out_width = ((inputShape[2] - kernal_size) + 1);
+
+            std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * num_out_channels * kernal_size,
+                                                                                          -0.2f, 0.2f);
+            auto conv2 = ngraph::builder::makeConvolution(conv1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
+            out_width = ((out_width - kernal_size) + 1);
+
+            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv2,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
+
+            std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
+            auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
+            auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
+
+            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
+            function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationPass");
+        }
+};
+
+class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<removePermutationsPassParams>,
+                                          public LayerTestsUtils::LayerTestsCommon {
+    public:
+        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+            InferenceEngine::Precision netPrecision;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+
+            std::ostringstream result;
+            result << "netPRC=" << netPrecision.name() << "_";
+            result << "targetDevice=" << targetDevice << "_";
+            for (auto const& configItem : configuration) {
+                result << "_configItem=" << configItem.first << "_" << configItem.second;
+            }
+            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
+            return result.str();
+        }
+
+    protected:
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+            InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
+            blob->allocate();
+            auto precision = info.getPrecision();
+
+            auto* rawBlobDataPtr = blob->buffer().as<float*>();
+            std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
+            for (size_t i = 0; i < blob->size(); i++) {
+                rawBlobDataPtr[i] = values[i];
+            }
+            return blob;
+        }
+
+        void SetUp() override {
+            //      Reshape                                 Reshape
+            //          |                                      |
+            //      Permute (order: [0, 3, 1, 2])          Permute (order: [0, 3, 1, 2])
+            //          |                                      |
+            //      Convolution                            Convolution
+            //          |______________________________________|
+            //                              |
+            //                             Add
+            //                              |
+            //                  Permute (order: [0, 2, 3, 1])
+            //                              |
+            //                            Reshape
+            InferenceEngine::Precision netPrecision;
+            std::vector<size_t> inputShape;
+            std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+            size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
+            auto params = ngraph::builder::makeParams(ngPrc, { {1, 2 * in_total_dims_size} });
+            auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
+            auto in_width = inputShape[2];
+            auto in_channels = inputShape[3];
+
+            auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
+            auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(split->output(0), pattern1, false);
+            auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
+
+            size_t num_out_channels = 12;
+            size_t kernal_size = 8;
+            std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
+                                                                                          -0.2f, 0.2f);
+            auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
+
+            auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
+            auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(split->output(1), pattern2, false);
+            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(reshape2,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
+
+            std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
+                                                                                          -0.2f, 0.2f);
+            auto conv2 = ngraph::builder::makeConvolution(permute2, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
+
+            auto add = std::make_shared<ngraph::opset1::Add>(conv1, conv2);
+
+            auto permute3 = std::make_shared<ngraph::opset1::Transpose>(add,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
+
+            size_t out_width = ((in_width - kernal_size) + 1);
+            std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
+            auto pattern3 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
+            auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(permute3, pattern3, false);
+
+            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape3) };
+            function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationPass");
+        }
+};
+
    TEST_P(RemovePermutationsNHWCToNCHWPassTest, CompareWithRefImpl) {
        Run();
    };
@ -141,6 +410,18 @@ protected:
        Run();
    };

+    TEST_P(RemovePermutationsWithPoolAndActTest, CompareWithRefImpl) {
+        Run();
+    };
+
+    TEST_P(RemovePermutationsWithTwoConvTest, CompareWithRefImpl) {
+        Run();
+    };
+
+    TEST_P(RemovePermutationsWithEltwiseTest, CompareWithRefImpl) {
+        Run();
+    };
+
    const std::vector<InferenceEngine::Precision> netPrecisions = {
        InferenceEngine::Precision::FP32,
        InferenceEngine::Precision::FP16
@ -178,5 +459,29 @@ protected:
            ::testing::ValuesIn(inputShapes)),
        RemovePermutationsNHWCToNCHWPass4DOutputTest::getTestCaseName);

+    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithPoolAndActTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(inputShapes)),
+        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(inputShapes)),
+        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithEltwiseTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(inputShapes)),
+        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+
 } // namespace LayerTestsDefinitions