[GNA] Fix compilation of topologies with only 2 functional layers: convolution and pooling (#5501)

2021-05-07 12:01:44 +03:00 · 2021-05-07 12:01:44 +03:00 · 696aa37c37
commit 696aa37c37
parent 349e2910fe
3 changed files with 71 additions and 34 deletions
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@ -1736,8 +1736,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                                outputTensor.Shape.Dimensions[beginOfHInNHWC + dimHW] =
                                    outputFromPooling(outFromConv, poolWindow->Dimensions[beginOfHInHW + dimHW], poolStride->Dimensions[beginOfHInHW + dimHW]);
                            }
-                            AdvanceOperationIfAllApplied(component, i, gnaOperation);
                        }
+                        AdvanceOperationIfAllApplied(component, i, gnaOperation);
                    }
 #else
                } else if (pLayer->nLayerKind == INTEL_CONVOLUTIONAL) {
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -777,7 +777,39 @@ void RemovePermutationsNHWCToNCHWPass::run() {

 void InsertIdentityLayerPass::run() {
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
+    auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
+        int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
+        auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
+        CNNLayerPtr activationLayer =
+            std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
+        CNNLayerPtr activationLayerWithQuant = quantized ?
+                                        InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
+                                        activationLayer;
+        auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), tensorDesc);
+        getCreatorLayer(dataPtr) = activationLayerWithQuant;
+        activationLayerWithQuant->outData.push_back(dataPtr);
+        return activationLayerWithQuant;
+    };
+
    for (auto & l : *pLayers) {
+        if (LayerInfo(l).isPooling()) {
+            // Identity should be inserted after 1D pooling if it's the last functional layer.
+            auto pooling = LayerInfo(l).as<PoolingLayer*>();
+            IE_ASSERT(pooling != nullptr);
+            if (is2D(pooling->_kernel)) continue;
+
+            auto hasNextFuncLayer = CNNNetHasNextLayerSkipCertain(l, 0, 0, [](CNNLayerPtr layer) {
+                return LayerInfo(layer).isNonFunctional();
+            });
+            if (hasNextFuncLayer) continue;
+
+            auto identityLayer = createIdentityLayer(l->outData[0]->getTensorDesc());
+            gnalog() << "Inserted "<< identityLayer->name << " after " << l->name << std::endl;
+
+            auto nextLayer = CNNNetCheckNextLayerSkipCertain(l, 0, 0, true, [](CNNLayerPtr layer) { return false; }).first;
+            CNNNetworkInsertLayer(l, nextLayer, identityLayer);
+        }
+
        for (auto && prev : getCandidatesForIdentityInsertion(l, getPassManager())) {
            // Do an upstream search until Functional layer is found
            auto original_prev_layer = prev;
@ -817,15 +849,6 @@ void InsertIdentityLayerPass::run() {
            if (reconnected)
                continue;

-            int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
-            // actual insertion
-            auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
-
-            gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
-
-            CNNLayerPtr activationLayer =
-                std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
-
            // TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
            // detecting ins-data-idx
            size_t insDataIdx = std::numeric_limits<size_t>::max();
@ -840,13 +863,10 @@ void InsertIdentityLayerPass::run() {
            }

            auto inputData = true_layer->insData[insDataIdx].lock();
+            auto identityLayer = createIdentityLayer(inputData->getTensorDesc());
+
+            gnalog() << "Inserted "<< identityLayer->name << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;

-            auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
-            auto activationLayerWithQuant = quantized ?
-                                            InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
-                                            activationLayer;
-            getCreatorLayer(dataPtr) = activationLayerWithQuant;
-            activationLayerWithQuant->outData.push_back(dataPtr);
            // wether 1 identity or all outputs TODO possible grouping here, need to implement special grouped inserter
            bool notAll = false;
            for (auto && nextData  : prev->outData) {
@ -860,14 +880,14 @@ void InsertIdentityLayerPass::run() {
            }
            // copy offset - to be used while connecting outputs
            if (prev->params.find("output_offset") != prev->params.end()) {
-                activationLayerWithQuant->params["output_offset"] = prev->params["output_offset"];
+                identityLayer->params["output_offset"] = prev->params["output_offset"];
            }
            // copy offset - to be used while connecting outputs
            if (prev->params.find("original_num_rows") != prev->params.end()) {
-                activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"];
+                identityLayer->params["original_num_rows"] = prev->params["original_num_rows"];
            }

-            CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant);
+            CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), identityLayer);
        }
    }
 }
@ -1662,6 +1682,10 @@ void BreakFusingOfOutputLayersPass::run() {
 #endif
    OutputsDataMap outputsMap = this->getPassManager()->getNetwork().getOutputsInfo();
    for (auto layer : *pLayers) {
+        /* Inserion of the second activation after pooling will break Conv - Pooling - Activation component
+         * since scaleshift layers will be inserted between the pooling and activations
+         */
+        if (LayerInfo(layer).isPooling()) continue;
        for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
            auto& output = layer->outData[output_idx];
            auto& input_to = getInputTo(output);
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
@ -26,6 +26,14 @@ typedef std::tuple<
    std::vector<size_t>                 // Input shape
 > removePermutationsPassParams;

+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input shape
+    bool                                // with activation
+> removePermutationsWithPoolPassParams;
+
 namespace LayerTestsDefinitions {

 class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<removePermutationsPassParams>,
@ -137,15 +145,16 @@ protected:
    }
 };

-class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsPassParams>,
+class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsWithPoolPassParams>,
                                             public LayerTestsUtils::LayerTestsCommon {
    public:
-        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+        static std::string getTestCaseName(testing::TestParamInfo<removePermutationsWithPoolPassParams> obj) {
            InferenceEngine::Precision netPrecision;
            std::string targetDevice;
            std::map<std::string, std::string> configuration;
            std::vector<size_t> inputShape;
-            std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
+            bool withActivation;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = obj.param;

            std::ostringstream result;
            result << "netPRC=" << netPrecision.name() << "_";
@ -154,6 +163,7 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
                result << "_configItem=" << configItem.first << "_" << configItem.second;
            }
            result << "_IS=" << CommonTestUtils::vec2str(inputShape);
+            result << "_withActivation=" << withActivation;
            return result.str();
        }

@ -175,8 +185,6 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            //          |
            //      Permute (order: [0, 3, 1, 2])
            //          |
-            //        Relu
-            //          |
            //      Convolution
            //          |
            //       Pooling
@ -188,7 +196,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            //      Reshape
            InferenceEngine::Precision netPrecision;
            std::vector<size_t> inputShape;
-            std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
+            bool withActivation;
+            std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = this->GetParam();
            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

            size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
@ -199,14 +208,12 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
            auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));

-            auto relu1 = std::make_shared<ngraph::opset3::Relu>(permute1);
-
            size_t num_out_channels = 12;
            size_t kernal_size = 8;
            auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
            std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
                                                                                        -0.2f, 0.2f);
-            auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+            auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
                ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
            auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1});
            auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR,
@ -214,9 +221,14 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<

            size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1];
            size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0];
-            auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);

-            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2,
+            auto pool_output = pool;
+            if (withActivation) {
+                auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
+                pool_output = relu2;
+            }
+
+            auto permute2 = std::make_shared<ngraph::opset1::Transpose>(pool_output,
                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));

            std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
@ -480,8 +492,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::ValuesIn(netPrecisions),
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
-            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+            ::testing::ValuesIn(inputShapes),
+            ::testing::ValuesIn(std::vector<bool>{false, true})), // with activation
+        RemovePermutationsWithPoolAndActTest::getTestCaseName);

    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,
        ::testing::Combine(
@ -489,7 +502,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+        RemovePermutationsWithTwoConvTest::getTestCaseName);

    INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithEltwiseTest,
        ::testing::Combine(
@ -497,7 +510,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(configs),
            ::testing::ValuesIn(inputShapes)),
-        RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+        RemovePermutationsWithEltwiseTest::getTestCaseName);

 } // namespace LayerTestsDefinitions