[GNA] fix InsertCopyLayerPass (#3058)

* [GNA] fix InsertCopyLayerPass * style * [GNA] fixed Concat -> Memory case for InserCopyLayer * Style * [GNA] fix some FWL size mismatch cases * [GNA] fix crop|split -> memory cases * [GNA] One layer -> multiple concats fix tests * style * [GNA] move copy insertion after trivial non-functional layers
2020-11-24 18:08:09 +01:00 · 2020-11-24 18:08:09 +01:00 · 860fae2f27
commit 860fae2f27
parent 42478ff0ce
6 changed files with 233 additions and 37 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -1563,8 +1563,12 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
    }

    if (dnn->new_num_conv_columns) {
-        num_rows = dnn->new_num_conv_columns;
-        if (inputs->getDims().size() == 4) num_rows /= num_columns;
+        if (dnn->new_num_conv_columns % num_columns == 0) {
+            num_rows = dnn->new_num_conv_columns / num_columns;
+        } else {
+            num_columns = dnn->new_num_conv_columns;
+            num_rows = 1;
+        }
        dnn->new_num_conv_columns = 0;
    }

@ -2031,7 +2035,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
            // if request for allocation less that realTensorInput - we need to extend request
            auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer);
            if (num_data_bytes_in < minInput) {
-                gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8) << "\n";
+                gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8);
                num_data_bytes_in = ALIGN(minInput, 8);
            }

--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -779,51 +779,99 @@ void InsertIdentityLayerPass::run() {
 }

 void InsertCopyLayerPass::run() {
+    // Copy layer insertion happens in few cases:
+    // Crop output goes to concat layer -> copy layer insertion
+    // Concat|Split|Crop layer goes to memory layer -> delayed copy layer insertion
+    // One output goes to multiple concat and/or memory layers -> delayed copies before memory layers
+    // and copies before concay layers (one less copy than outputs)
    for (auto & l : *pLayers) {
-        if (l->insData.empty()) continue;
-        auto prevLayers = CNNNetGetPrevLayersSkip(l, [](CNNLayerPtr origin){
-            return !LayerInfo(origin).isNonFunctional();
-        });
+        if (LayerInfo(l).isNonFunctional()) continue;
+        // Crop -> Concat and Concat -> Memory cases
+        if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat()) {
+            std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples;
+            std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples;

-        for (int i=0; i != prevLayers.size(); i++) {
-            auto & prevIndirectLayer = prevLayers[i].first;
-            bool bInsert = false;
-            /// Delayed copy layers need to be moved to the very end of processing
-            bool bInsertDelayed = false;
+            for (auto output : l->outData) {
+                auto& inputTo = getInputTo(output);
+                for (auto& childLayer : inputTo) {
+                    auto original_child = childLayer.second;
+                    auto original_parent = l;
+                    auto current_layer = original_child;
+                    size_t input_idx = CNNLayerFindInsDataIdxes(output, original_child)[0];

-            auto isInserted = [&bInsertDelayed, &bInsert]() {
-                return bInsert || bInsertDelayed;
-            };
-
-            if (LayerInfo(l).isMemory()) {
-                if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()
-                    || LayerInfo(prevIndirectLayer).isSplit()) { bInsertDelayed = true;}
-                // memory usualy preceded by either activation or split, or other layers in order to have 2b precision
-                for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
-                    auto current_layer = inputto.second;
-                    while (LayerInfo(current_layer).isNonFunctional() || LayerInfo(current_layer).isSplit()) {
+                    while (LayerInfo(current_layer).isNonFunctional()) {
                        if (current_layer->outData.size() == 0) break;
                        if (getInputTo(current_layer->outData[0]).size() == 0) break;
-                        auto new_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
-                        current_layer = new_layer;
+
+                        auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
+                        if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) {
+                            original_child = next_layer;
+                            original_parent = current_layer;
+                            input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0];
+                        }
+                        current_layer = next_layer;
                    }
-                    // if preceding layer is common for memory and concat
-                    if (LayerInfo(current_layer).isConcat()) {
-                        bInsertDelayed = true;
-                        break;
+
+                    if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
+                        // Concat|Split|Crop -> Memory case
+                        delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
+                    } else if (LayerInfo(l).isCrop() && LayerInfo(current_layer).isConcat()) {
+                        // Crop -> Concat case
+                        copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
                    }
                }
            }
-            if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }

-            if (isInserted()) {
-                if (LayerInfo(prevIndirectLayer).isCropAffined()) {
-                    // The crop will be replaced by affine.
-                    // Copy layer insertion is not required
-                    continue;
+            for (auto& tuple : delayed_copy_insertion_tuples) {
+                // Concat -> Memory case
+                InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), DelayedCopyLayerName);
+            }
+            for (auto& tuple : copy_insertion_tuples) {
+                // Crop -> Concat case
+                InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), CopyLayerName);
+            }
+        }
+
+        // Layer -> multiple concat/memory case
+        for (auto output : l->outData) {
+            std::vector<std::pair<CNNLayerPtr, size_t>> MemoryLayers;
+            std::vector<std::pair<CNNLayerPtr, size_t>> ConcatLayers;
+            auto& inputTo = getInputTo(output);
+            if (inputTo.size() < 2) continue;
+            for (auto& childLayer : inputTo) {
+                auto layer_to_insert = childLayer.second;
+                auto current_layer = childLayer.second;
+                auto previous_layer = l;
+                size_t input_idx = CNNLayerFindInsDataIdxes(output, current_layer)[0];
+
+                while (LayerInfo(current_layer).isNonFunctional()) {
+                    if (current_layer->outData.size() == 0) break;
+                    if (getInputTo(current_layer->outData[0]).size() == 0) break;
+                    previous_layer = current_layer;
+                    current_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
                }
-                auto prevLayer = CNNNetPrevLayer(l, i);
-                InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
+                if (LayerInfo(current_layer).isConcat()) {
+                    ConcatLayers.push_back(make_pair(layer_to_insert, input_idx));
+                } else if (LayerInfo(current_layer).isMemory()) {
+                    MemoryLayers.push_back(make_pair(layer_to_insert, input_idx));
+                }
+            }
+            if (MemoryLayers.empty() && ConcatLayers.empty()) continue;
+            auto toCopyCount = MemoryLayers.size() + ConcatLayers.size() - 1;
+            size_t currentCopyIdx = 0;
+            while (currentCopyIdx < toCopyCount) {
+                if (currentCopyIdx < MemoryLayers.size()) {
+                    size_t memoryIdx = currentCopyIdx;
+                    auto memoryLayer = MemoryLayers[memoryIdx].first;
+                    auto inputIdx = MemoryLayers[memoryIdx].second;
+                    InsertCopyLayer(l, memoryLayer, inputIdx, this->getPassManager(), DelayedCopyLayerName);
+                } else {
+                    size_t concatIdx = currentCopyIdx - MemoryLayers.size();
+                    auto concatLayer = ConcatLayers[concatIdx].first;
+                    auto inputIdx = ConcatLayers[concatIdx].second;
+                    InsertCopyLayer(l, concatLayer, inputIdx, this->getPassManager(), CopyLayerName);
+                }
+                currentCopyIdx++;
            }
        }
    }
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@ -42,6 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
        // TODO: Issue 39358
        R"(.*unaligned.*MultipleConcatTest.*)",
+        R"(.*ActivationConcatsEltwise.*CS=35.*)",
        // TODO: Issue: 40960
        R"(.*(ConstantResultSubgraphTest).*)",
    };
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/activation_concats_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/activation_concats_eltwise.cpp
@ -0,0 +1,43 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/activation_concats_eltwise.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+namespace {
+std::vector<size_t> input_sizes = {
+    7,
+    16,
+    35,
+    64
+};
+
+std::vector<size_t> concat_const_sizes = {
+    7,
+    16,
+    35,
+    64
+};
+
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+std::map<std::string, std::string>  additional_config = {};
+
+INSTANTIATE_TEST_CASE_P(smoke_CompareRefs, ActivationConcatsEltwise,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(input_sizes),
+                                ::testing::ValuesIn(concat_const_sizes),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GNA),
+                                ::testing::Values(additional_config)),
+                        ActivationConcatsEltwise::getTestCaseName);
+
+} // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/activation_concats_eltwise.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/activation_concats_eltwise.hpp
@ -0,0 +1,31 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+
+using ActivationConcatsEltwiseParamsTuple = typename std::tuple<
+    size_t,                             // input size
+    size_t,                             // concat const size
+    InferenceEngine::Precision,         // precision
+    std::string,                        // device name
+    std::map<std::string, std::string>  // configuration
+>;
+
+
+class ActivationConcatsEltwise : public testing::WithParamInterface<ActivationConcatsEltwiseParamsTuple>,
+                          public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ParamType> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/activation_concats_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/activation_concats_eltwise.cpp
@ -0,0 +1,69 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <debug.h>
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/data_utils.hpp"
+#include "functional_test_utils/precision_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "subgraph_tests/activation_concats_eltwise.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+using namespace CommonTestUtils;
+using namespace InferenceEngine;
+
+std::string ActivationConcatsEltwise::getTestCaseName(testing::TestParamInfo<ParamType> obj) {
+    InferenceEngine::Precision netPrecision;
+    size_t inputSize;
+    size_t concatSize;
+    std::string targetDevice;
+    std::map<std::string, std::string> configuration;
+    std::tie(inputSize, concatSize, netPrecision, targetDevice, configuration) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << inputSize << "_";
+    result << "CS=" << concatSize << "_";
+    result << "PRC=" << netPrecision.name() << "_";
+    result << "dev=" << targetDevice;
+    return result.str();
+}
+
+void ActivationConcatsEltwise::SetUp() {
+    InferenceEngine::Precision netPrecision;
+    size_t inputSize;
+    size_t concatSize;
+    std::map<std::string, std::string> config;
+    std::tie(inputSize, concatSize, netPrecision, targetDevice, config) = this->GetParam();
+    configuration.insert(config.begin(), config.end());
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    auto input = ngraph::builder::makeParams(ngPrc, { {1, inputSize} });
+
+    auto relu = ngraph::builder::makeActivation(input[0], ngPrc, ngraph::helpers::ActivationTypes::Relu);
+
+    auto concat_vals_1 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
+    auto concat_vals_2 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
+    auto concat_const_1 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_1);
+    auto concat_const_2 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_2);
+
+    auto concat_1 = ngraph::builder::makeConcat({concat_const_1, relu}, 1);
+    auto concat_2 = ngraph::builder::makeConcat({concat_const_2, relu}, 1);
+
+    auto eltw = ngraph::builder::makeEltwise(concat_1, concat_2, ngraph::helpers::EltwiseTypes::ADD);
+
+    auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({1, inputSize + concatSize}));
+    auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(eltw, reshape_pattern, false);
+    function = std::make_shared<ngraph::Function>(final_reshape, input, "ActivationConcatsEltwise");
+}
+
+TEST_P(ActivationConcatsEltwise, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions