From 6b2ac800aa030c9cc983ac9186e5c16ad2b934b8 Mon Sep 17 00:00:00 2001
From: Andrey Dmitriev <andrey.dmitriev@intel.com>
Date: Thu, 3 Sep 2020 13:23:58 +0300
Subject: [PATCH] [GNA] Fixed case of unconnected output of split layer (#1344)

[GNA] Fixed case of unconnected output of split layer

[GNA] Fixed case of unconnected output of split layer

test

[GNA] Fixed case of unconnected output of split layer

fixed
---
 .../src/gna_plugin/gna_graph_compiler.cpp     |   9 +-
 .../src/gna_plugin/gna_graph_tools.hpp        |  22 +++-
 .../gna_plugin/optimizer/gna_pass_manager.cpp | 111 +++++++++---------
 .../subgraph_tests/split_relu.cpp             |  56 +++++++++
 .../include/subgraph_tests/split_relu.hpp     |  33 ++++++
 .../shared/src/subgraph_tests/split_relu.cpp  |  53 +++++++++
 6 files changed, 227 insertions(+), 57 deletions(-)
 create mode 100644 inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp

diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 65fe47b1f4d..5b745b009dc 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -140,7 +140,6 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
         size_t padding = 0;
         size_t output_layer_size = 0;
 
-
         for (int j = 0; j != getInputTo(layer->outData[i]).size(); j++) {
             auto outFunctionalLayer = CNNNetGetNextLayerSkipCertain(layer, i, j,  [](CNNLayerPtr l) {
                 return LayerInfo(l).isNonFunctional();
@@ -171,6 +170,13 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
             }
         }
 
+        // in case of unconnected split - we need properly increment size
+        if (getInputTo(layer->outData[i]).empty()) {
+            output_layer_size =
+                    InferenceEngine::details::product(begin(layer->outData[i]->getDims()),
+                                                      end(layer->outData[i]->getDims())) * layer->outData[i]->getPrecision().size();
+        }
+
         split_size += padding + output_layer_size;
     }
     layerInfoItem.reserved_size = split_size;
@@ -1932,7 +1938,6 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
         return LayerInfo(l).isNonFunctional();
     });
 
-
     gnalog() << "Connecting input " << layer->name << " to " << prevLayer->name << " ...\n";
 
     // real input not a memory input
diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
index a593feb265e..a1851741e45 100644
--- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
@@ -459,6 +459,7 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after,
     bool bLocated = false;
     bool hasOutputIndex = outDataIndex != invalid_data_idx;
     if (after != nullptr) {
+        int nUnconnectedOData = 0;
         for (auto && data : after->outData) {
             if (hasOutputIndex && outDataIndex) {
                 --outDataIndex;
@@ -485,8 +486,8 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after,
                     break;
                 }
             }
-            if (getInputTo(data).empty()) {
-                bLocated = true;
+            if (inputTo.empty()) {
+                nUnconnectedOData++;
             }
             if (bLocated) {
                 // erasing all connection
@@ -503,6 +504,23 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after,
             }
         }
 
+        // separately checking case of possible single unconnected output of given layer
+        if (!bLocated && !before && !hasOutputIndex) {
+            if (nUnconnectedOData != 1) {
+                THROW_GNA_EXCEPTION << "Cannot insert layer: " << LAYER_NAME(layerToInsert) <<" after: " << LAYER_NAME(after);
+            }
+
+            for (auto && data : after->outData) {
+                if (!getInputTo(data).empty()) continue;
+
+                bLocated = true;
+                getInputTo(data)[layerToInsert->outData.front()->getName()]  = layerToInsert;
+                layerToInsert->insData.push_back(data);
+
+                break;
+            }
+        }
+
         // if given outputDataIndex is not correct, lets find index that matches *before* layer
         if (!bLocated) {
             if (before != nullptr) {
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 402b7c2357d..708ad70bcda 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -87,7 +87,6 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
 
     getCreatorLayer(dataPtr) = diagonalWithQuant;
     diagonalWithQuant->outData.push_back(dataPtr);
-
     // actual insertion
     CNNNetworkInsertLayer(prevLayer, nextLayer, diagonalWithQuant);
 }
@@ -944,68 +943,74 @@ void InsertSplitAligningFilterPass::run() {
             auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
 
             if (currentOffset != ALIGN64(currentOffset)) {
-                // this split output not beginning from 64 bytes aligned boundary - need to correct by aligning filter layer
+                // check that this split output actually connected to further layers
+                if (getInputTo(splitOutput).empty()) {
+                    gnalog() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n";
+                } else {
+                    // this split output not beginning from 64 bytes aligned boundary - need to correct by aligning filter layer
+                    // insert the filter
+                    auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
+
 #ifdef PLOT
-                // getting list of layers attached to current split output
-                gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and ";
-                for (auto &&followingLayers : getInputTo(splitOutput)) {
-                    if (getInputTo(splitOutput).size() != 1) {
-                        gnalog() << "\n    ";
+                    // getting list of layers attached to current split output
+                    gnalog() << "Inserted Affine Filter: " << filterName << " between: " << l->name << " and ";
+                    for (auto &&followingLayers : getInputTo(splitOutput)) {
+                        if (getInputTo(splitOutput).size() != 1) {
+                            gnalog() << "\n    ";
+                        }
+                        gnalog() << followingLayers.second->name;
                     }
-                    gnalog() << followingLayers.second->name;
-                }
-                gnalog() << std::endl;
+                    gnalog() << std::endl;
 #endif
-                // insert the filter
-                auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
-                auto filterLayer =
-                        std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
+                    auto filterLayer =
+                            std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
 
+                    auto inputData = splitOutput;
 
-                auto inputData = splitOutput;
+                    size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
+                    size_t
+                            newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
+                                            / bytesPerSplitElement;
 
-                size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
-                size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
-                                       / bytesPerSplitElement;
+                    IE_ASSERT(filterLayer != nullptr);
 
-                IE_ASSERT(filterLayer != nullptr);
+                    // encodes offset to beginning of split layer input
+                    filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement);
 
-                // encodes offset to beginning of split layer input
-                filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement);
+                    auto dims = splitOutput->getTensorDesc().getDims();
+                    if (dims.size() > 3) {
+                        THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
+                    }
 
-                auto dims = splitOutput->getTensorDesc().getDims();
-                if (dims.size() > 3) {
-                    THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
+                    auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1);
+                    std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
+
+                    auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
+
+                    for (int i = 0; i != outputSize; i++) {
+                        filterWeights[offset] = 1.0f;
+                        offset += newOutputSize + 1;
+                    }
+
+                    filterLayer->_weights = make_shared_blob<float>(TensorDesc(
+                            inputData->getTensorDesc().getPrecision(),
+                            SizeVector({filterWeights.size()}),
+                            Layout::C));
+                    filterLayer->_weights->allocate();
+                    CopyVectorToBlob(filterLayer->_weights, filterWeights);
+
+                    auto outData = std::make_shared<Data>(filterName,
+                                                          TensorDesc(splitOutput->getTensorDesc().getPrecision(),
+                                                                     splitOutput->getTensorDesc().getDims(),
+                                                                     inputData->getTensorDesc().getLayout()));
+
+                    auto filterWithQuant = quantized ?
+                                           InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
+                                           filterLayer;
+                    getCreatorLayer(outData) = filterWithQuant;
+                    filterWithQuant->outData.push_back(outData);
+                    CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
                 }
-
-                auto num_rows_out = dims[1]  * (dims.size() != 2 ? dims[2] : 1);
-                std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
-
-                auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
-
-                for (int i = 0; i != outputSize; i++) {
-                    filterWeights[offset] = 1.0f;
-                    offset += newOutputSize + 1;
-                }
-
-                filterLayer->_weights = make_shared_blob<float>(TensorDesc(
-                        inputData->getTensorDesc().getPrecision(),
-                        SizeVector({filterWeights.size()}),
-                        Layout::C));
-                filterLayer->_weights->allocate();
-                CopyVectorToBlob(filterLayer->_weights, filterWeights);
-
-                auto outData = std::make_shared<Data>(filterName,
-                                                      TensorDesc(splitOutput->getTensorDesc().getPrecision(),
-                                                                 splitOutput->getTensorDesc().getDims(),
-                                                                 inputData->getTensorDesc().getLayout()));
-
-                auto filterWithQuant = quantized ?
-                                       InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
-                                       filterLayer;
-                getCreatorLayer(outData) = filterWithQuant;
-                filterWithQuant->outData.push_back(outData);
-                CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
             }
 
             // search data that starts from unaligned location
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp
new file mode 100644
index 00000000000..55001c6ba4b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <vector>
+#include "subgraph_tests/split_relu.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "gna/gna_config.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    std::vector<std::vector<std::vector<size_t>>> inputs{
+            {{1, 64}},
+            {{1, 128}},
+            {{1, 96}},
+            {{1, 16}}
+    };
+
+    std::vector<std::vector<size_t>> connect_index{
+            {0, 1, 2, 3},
+            {0, 1, 2},
+            {0, 1, 3},
+            {0, 2, 3},
+            {1, 2, 3},
+            {0, 1},
+            {0, 2},
+            {0, 3},
+            {1, 2},
+            {1, 3},
+            {2, 3},
+            {0},
+            {1},
+            {2},
+            {3},
+    };
+
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16,
+    };
+
+    std::map<std::string, std::string> additional_config = {
+            {"GNA_SCALE_FACTOR_1", "1"},
+            {"GNA_SCALE_FACTOR_2", "1"},
+            {"GNA_SCALE_FACTOR_3", "1"},
+            {"GNA_SCALE_FACTOR_4", "1"},
+            {std::string(GNA_CONFIG_KEY(COMPACT_MODE)), "NO"}
+    };
+
+    INSTANTIATE_TEST_CASE_P(split_connected, SplitRelu,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(inputs),
+                                    ::testing::ValuesIn(connect_index),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                    ::testing::Values(additional_config)),
+                            SplitRelu::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp
new file mode 100644
index 00000000000..de6a254ecde
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "../../../../../ngraph_functions/include/ngraph_functions/builders.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        std::vector<std::vector<size_t>>, //input shapes
+        std::vector<size_t >,             //index connected layer
+        InferenceEngine::Precision,       //Network precision
+        std::string,                      //Device name
+        std::map<std::string, std::string> //Configuration
+> SplitReluTuple;
+
+
+class SplitRelu:
+        public testing::WithParamInterface<SplitReluTuple>,
+        public LayerTestsUtils::LayerTestsCommon{
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<SplitReluTuple> &obj);
+protected:
+    void SetUp() override;
+};
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp
new file mode 100644
index 00000000000..914df8931b9
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <debug.h>
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/precision_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "subgraph_tests/split_relu.hpp"
+
+namespace LayerTestsDefinitions {
+    std::string SplitRelu::getTestCaseName(const testing::TestParamInfo<SplitReluTuple> &obj) {
+        std::vector<std::vector<size_t>> input;
+        std::vector<size_t > connect_input;
+        InferenceEngine::Precision netPrecision;
+        std::string targetName;
+        std::map<std::string, std::string> additional_config;
+        std::tie(input, connect_input, netPrecision, targetName, additional_config) = obj.param;
+        std::ostringstream results;
+
+        results << "IS=" << CommonTestUtils::vec2str(input[0]) << "_";
+        results << "ConnectInput=" << CommonTestUtils::vec2str(connect_input) << "_";
+        results << "netPRC=" << netPrecision.name() << "_";
+        results << "targetDevice=" << targetName << "_";
+        return results.str();
+    }
+
+    void SplitRelu::SetUp() {
+        std::vector<std::vector<size_t>> inputs;
+        std::vector<size_t> connect_index;
+        InferenceEngine::Precision netPrecision;
+        std::map<std::string, std::string> additional_config;
+        std::tie(inputs, connect_index, netPrecision, targetDevice, additional_config) = this->GetParam();
+        configuration.insert(additional_config.begin(), additional_config.end());
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        auto input = ngraph::builder::makeParams(ngPrc, {inputs});
+        auto split = ngraph::builder::makeSplit(input[0], ngPrc, 4, 1);
+        ngraph::ResultVector results;
+
+        for (size_t i : connect_index) {
+            auto relu = std::make_shared<ngraph::opset1::Relu>(split->output(i));
+            results.push_back(std::make_shared<ngraph::opset1::Result>(relu));
+        }
+        function = std::make_shared<ngraph::Function>(results, input, "split_relu");
+    }
+
+    TEST_P(SplitRelu, CompareWithRefs){
+        Run();
+    };
+} // namespace LayerTestsDefinitions