[GNA] Flatten trivial concatenations (#3396)

* [GNA] Flatten trivial concatenations * [GNA] move test to subgraph tests * Style * style * review * fix * fixes
2020-12-01 16:33:36 +01:00 · 2020-12-01 16:33:36 +01:00 · 86e5461d4b
commit 86e5461d4b
parent 4a91f914e2
9 changed files with 161 additions and 141 deletions
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -423,7 +423,12 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
        auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*concatLayer);
        std::vector<InferenceEngine::CNNLayerPtr> inputLayers;
        for (auto input_idx = 0; input_idx != concatLayer->insData.size(); input_idx++) {
-            inputLayers.push_back(InferenceEngine::CNNNetPrevLayer(concatLayer, input_idx));
+            auto prev_layer = InferenceEngine::CNNNetPrevLayer(concatLayer, input_idx);
+            // FlattenConcat inserts reshape between concat and its inputs, which results in taking wrong layers as inputs for scale factor calulation
+            if (prev_layer->type == "reshape" && prev_layer->insData.size() == 1 && prev_layer->outData.size() == 1) {
+                prev_layer = InferenceEngine::CNNNetPrevLayer(prev_layer, 0);
+            }
+            inputLayers.push_back(prev_layer);
        }

        // if all inputs have same quant value - trivial propagation
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -498,7 +498,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        passes->registerPass<EltwiseSplitOverChannelsPass>();
        passes->registerPass<InsertSplitAligningFilterPass>();

-        passes->registerPass<Concat4Dto2DPass>();
+        passes->registerPass<FlattenTrivialConcatPass>();
        passes->registerPass<InsertConcatAligningFilterPass>();
        passes->registerPass<ReorderConcatInputsPass>();
        if (policy.PermutePolicy != Policy::Permute::DISABLED) {
--- a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
@ -34,10 +34,10 @@ class Policy {
        AUTO_PERMUTE
    } PermutePolicy = Permute::DISABLED;

-    enum class Concat4Dto2DConversion {
+    enum class FlattenTrivialConcatConversion {
        DISABLED,
        ENABLED
-    } ConcatConversionPolicy = Concat4Dto2DConversion::ENABLED;
+    } ConcatConversionPolicy = FlattenTrivialConcatConversion::ENABLED;

    enum class ConcatAlignment {
        DISABLED,
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -881,18 +881,27 @@ void InsertCopyLayerPass::run() {
    }
 }

-void Concat4Dto2DPass::run() {
-    // Find 4D concat layers that will have to use ConcatAlignFilters and can be substituted by 2D concat
+void FlattenTrivialConcatPass::run() {
+    // change all trivial concatenations (concatenation where output buffer is a buffer made by appending input buffers)
+    // by reshaping its inputs to 1 x total_input_size and its output to 1 x total_cocat_size and chaning the axis to 1
    // for example if 4D concat have unaligned inputs then ConcatAlignFilters need to be used if sizes before
    // axis are all ones then concat can be changed to 2D for example, lets say all unputs have same shape equal to:
    // 1, 1, 5, 3 then for axis 0, 1, 2 the change will be made and inputs will be reshaped to 1, 15,
    // but for shape 2, 1, 5, 3 only axis 0 is valid and inputs will reshape to 1, 30
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
-
-    if (getPassManager()->getPolicy().ConcatConversionPolicy == Policy::Concat4Dto2DConversion::DISABLED) return;
+    if (getPassManager()->getPolicy().ConcatConversionPolicy == Policy::FlattenTrivialConcatConversion::DISABLED) return;
    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) return;
    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) return;

+    auto getLayerByIndex = [](int idx, ConcatLayer* concatLayer) {
+        auto input = concatLayer->insData[idx];
+        auto lockedInput = input.lock();
+        if (!lockedInput) {
+            THROW_GNA_EXCEPTION << "cannot get insdata : "<< idx << " for layer: " << concatLayer->name;
+        }
+        return lockedInput;
+    };
+
    for (auto & l : *pLayers) {
        LayerInfo info(l);
        auto concatLayer = info.as<ConcatLayer*>();
@ -900,63 +909,58 @@ void Concat4Dto2DPass::run() {
        if (concatLayer->insData.size() < 1) continue;

        auto dims_size = concatLayer->insData[0].lock()->getDims().size();
-        if (dims_size > 2) {
-            auto axis = concatLayer->_axis;
-            bool skip_layer = false;
-            for (int i = 0; i < axis; i++) {
-                if (concatLayer->insData[0].lock()->getDims()[i] != 1) skip_layer = true;
-            }
-            if (skip_layer) continue;
-            skip_layer = true;
-            std::vector<size_t> total_sizes;
-            for (auto& input : concatLayer->insData) {
-                auto input_dims = input.lock()->getDims();
-                total_sizes.push_back(std::accumulate(input_dims.begin(), input_dims.end(), size_t(1), std::multiplies<size_t>()));
-                if (total_sizes.back() % 64 != 0) skip_layer = false;
-            }
-            if (skip_layer) continue;
+        if (dims_size < 2 || concatLayer->_axis == dims_size - 1) continue;

-            for (size_t input_idx = 0; input_idx != concatLayer->insData.size(); input_idx++) {
-                auto getLayerByIndex = [&concatLayer](int idx) {
-                    auto input = concatLayer->insData[idx];
-                    auto lockedInput = input.lock();
-                    if (!lockedInput) {
-                        THROW_GNA_EXCEPTION << "cannot get insdata : "<< idx << " for layer: " << concatLayer->name;
-                    }
-                    return lockedInput;
-                };
-
-                auto concatInput = getLayerByIndex(input_idx);
-
-                auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc());
-                tensor.reshape(SizeVector({1, total_sizes[input_idx]}), Layout::NC);
-                auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape";
-                auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
-
-                CNNNetworkInsertLayer(getCreatorLayer(concatInput).lock(), l, reshape);
-                gnalog() << "\tInserted " << reshapeName << " between " << getCreatorLayer(concatInput).lock()->name << " and " << l->name << std::endl;
-            }
-
-            for (auto output_idx = 0; output_idx != concatLayer->outData.size(); output_idx++) {
-                auto output = concatLayer->outData[output_idx];
-                auto output_tensor_copy = TensorDesc(output->getTensorDesc());
-
-                auto dims = output_tensor_copy.getDims();
-                auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
-
-                auto new_tensor = output->getTensorDesc();
-                new_tensor.reshape(SizeVector({1, total_size}), Layout::NC);
-
-                auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor);
-                gnalog() << "\tChanged " << output->getName() << " dims to 2D" << std::endl;
-
-                auto reshapeName = l->name + "_output_"+ std::to_string(output_idx) +"_reshape";
-
-                auto reshape = CNNNetworkCreateReshape(output_tensor_copy, reshapeName, quantized);
-                CNNNetworkInsertLayer(l, nullptr, reshape, output_idx);
-                gnalog() << "\tInserted " << reshapeName << " after " << l->name << std::endl;
-            }
+        auto axis = concatLayer->_axis;
+        bool skip_layer = false;
+        for (int i = 0; i < axis; i++) {
+            if (concatLayer->insData[0].lock()->getDims()[i] != 1) skip_layer = true;
        }
+        if (skip_layer) continue;
+        std::vector<size_t> total_sizes;
+        for (auto& input : concatLayer->insData) {
+            auto input_dims = input.lock()->getDims();
+            total_sizes.push_back(std::accumulate(input_dims.begin(), input_dims.end(), size_t(1), std::multiplies<size_t>()));
+        }
+
+        for (size_t input_idx = 0; input_idx != concatLayer->insData.size(); input_idx++) {
+            auto concatInput = getLayerByIndex(input_idx, concatLayer);
+
+            auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc());
+            tensor.reshape(SizeVector({1, total_sizes[input_idx]}), Layout::NC);
+            auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape";
+            auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
+
+            CNNNetworkInsertLayer(getCreatorLayer(concatInput).lock(), l, reshape);
+            gnalog() << "\tInserted " << reshapeName << " between " << getCreatorLayer(concatInput).lock()->name << " and " << l->name << std::endl;
+        }
+
+        for (auto output_idx = 0; output_idx != concatLayer->outData.size(); output_idx++) {
+            auto output = concatLayer->outData[output_idx];
+            auto output_tensor_copy = TensorDesc(output->getTensorDesc());
+
+            auto dims = output_tensor_copy.getDims();
+            auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
+
+            auto new_tensor = output->getTensorDesc();
+            new_tensor.reshape(SizeVector({1, total_size}), Layout::NC);
+
+            auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor);
+            gnalog() << "\tChanged " << output->getName() << " dims to 2D" << std::endl;
+
+            auto reshapeName = l->name + "_output_"+ std::to_string(output_idx) +"_reshape";
+
+            auto reshape = CNNNetworkCreateReshape(output_tensor_copy, reshapeName, quantized);
+            if (getInputTo(new_output).empty()) {
+                reshape->insData.push_back(new_output);
+                getInputTo(new_output)[reshape->name] = reshape;
+            } else {
+                CNNNetworkInsertLayer(l, nullptr, reshape, output_idx);
+            }
+            gnalog() << "\tInserted " << reshapeName << " after " << l->name << std::endl;
+        }
+
+        concatLayer->_axis = 1;
    }
 }

--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
@ -142,9 +142,9 @@ DECL_PASS(InsertCopyLayer);
 DECL_PASS(InsertSplitAligningFilter);

 /**
-* @brief Pass that changes 4D concat to 2D concat in cases that would have to use ConcatAlignFilter
+* @brief Pass that flattens trivial concatenations inputs and output and changes its axis to 1
 */
-DECL_PASS(Concat4Dto2D);
+DECL_PASS(FlattenTrivialConcat);

 /**
 * @brief concat-aligning filter layer insertion required in cases when concat inputs size are not 64-aligned
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/trivial_concat.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/trivial_concat.cpp
@ -4,7 +4,7 @@

 #include <vector>

-#include "single_layer_tests/concat_4D.hpp"
+#include "subgraph_tests/trivial_concat.hpp"
 #include "common_test_utils/test_constants.hpp"

 using namespace LayerTestsDefinitions;
@ -13,6 +13,9 @@ namespace {
 std::vector<std::vector<size_t>> inShapes = {
    {1, 1, 33, 16},
    {1, 1, 65, 16},
+    {10, 16},
+    {10, 64},
+    {15, 15},
 };

 std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
@ -24,11 +27,11 @@ std::map<std::string, std::string> additional_config = {
    {"GNA_SCALE_FACTOR_0", "2000.0"},
 };

-INSTANTIATE_TEST_CASE_P(smoke_Concat4D_Basic, Concat4DLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_trivial_concat_Basic, TrivialConcatLayerTest,
    ::testing::Combine(
        ::testing::ValuesIn(inShapes),
        ::testing::ValuesIn(netPrecisions),
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
        ::testing::Values(additional_config)),
-    Concat4DLayerTest::getTestCaseName);
+    TrivialConcatLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/trivial_concat.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/trivial_concat.hpp
@ -14,17 +14,17 @@
 #include "ngraph_functions/utils/ngraph_helpers.hpp"

 namespace LayerTestsDefinitions {
-using concat4DParamsTuple = typename std::tuple<
+using trivialConcatParamsTuple = typename std::tuple<
    std::vector<size_t>,               // Inputs shape
    InferenceEngine::Precision,        // Network precision
    std::string,                       // Device name
    std::map<std::string, std::string> // Configuration
 >;

-class Concat4DLayerTest : public testing::WithParamInterface<concat4DParamsTuple>,
+class TrivialConcatLayerTest : public testing::WithParamInterface<trivialConcatParamsTuple>,
    virtual public LayerTestsUtils::LayerTestsCommon {
 public:
-    static std::string getTestCaseName(const testing::TestParamInfo<concat4DParamsTuple> &obj);
+    static std::string getTestCaseName(const testing::TestParamInfo<trivialConcatParamsTuple> &obj);
 protected:
    void SetUp() override;
 };
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat_4D.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat_4D.cpp
@ -1,70 +0,0 @@
-// Copyright (C) 2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <tuple>
-#include <string>
-#include <vector>
-#include <memory>
-#include <functional>
-
-#include "ie_core.hpp"
-
-#include "common_test_utils/common_utils.hpp"
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/data_utils.hpp"
-#include "functional_test_utils/precision_utils.hpp"
-#include "functional_test_utils/plugin_cache.hpp"
-#include "functional_test_utils/skip_tests_config.hpp"
-
-#include "single_layer_tests/concat_4D.hpp"
-
-namespace LayerTestsDefinitions {
-
-    std::string Concat4DLayerTest::getTestCaseName(const testing::TestParamInfo<concat4DParamsTuple> &obj) {
-        int axis;
-        std::vector<size_t> inputShapes;
-        InferenceEngine::Precision netPrecision;
-        InferenceEngine::Precision inPrc, outPrc;
-        InferenceEngine::Layout inLayout, outLayout;
-        std::string targetName;
-        std::map<std::string, std::string> config;
-        std::tie(inputShapes, netPrecision, targetName, config) = obj.param;
-        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
-        result << "netPRC=" << netPrecision.name() << "_";
-        result << "trgDev=" << targetName << "_";
-        return result.str();
-    }
-
-    void Concat4DLayerTest::SetUp() {
-        int axis = 1;
-        InferenceEngine::SizeVector inputShape;
-        InferenceEngine::Precision netPrecision;
-        std::map<std::string, std::string> additional_config;
-        std::tie(inputShape, netPrecision, targetDevice, additional_config) = this->GetParam();
-        configuration.insert(additional_config.begin(), additional_config.end());
-
-        auto total_size = std::accumulate(inputShape.begin(), inputShape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
-        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
-        auto input = params[0];
-
-        auto constant_values = CommonTestUtils::generate_float_numbers(total_size, 11.0f, 12.0f);
-        auto constant = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>({1, total_size}), constant_values);
-        auto first_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
-            ngraph::Shape{4}, std::vector<size_t>(inputShape));
-        auto first_reshape = std::make_shared<ngraph::op::v1::Reshape>(constant, first_reshape_pattern, false);
-        auto constant_2 = ngraph::builder::makeConstant(ngPrc, inputShape, constant_values);
-
-        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector({first_reshape, input, constant_2}), axis);
-        auto act = ngraph::builder::makeActivation(concat, ngPrc, ngraph::helpers::ActivationTypes::Relu);
-        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(act)};
-        function = std::make_shared<ngraph::Function>(results, params, "concat");
-    }
-
-
-    TEST_P(Concat4DLayerTest, CompareWithRefs) {
-        Run();
-    };
-}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/trivial_concat.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/trivial_concat.cpp
@ -0,0 +1,78 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <functional>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/data_utils.hpp"
+#include "functional_test_utils/precision_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+
+#include "subgraph_tests/trivial_concat.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string TrivialConcatLayerTest::getTestCaseName(const testing::TestParamInfo<trivialConcatParamsTuple> &obj) {
+    int axis;
+    std::vector<size_t> inputShapes;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    std::string targetName;
+    std::map<std::string, std::string> config;
+    std::tie(inputShapes, netPrecision, targetName, config) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "trgDev=" << targetName << "_";
+    return result.str();
+}
+
+void TrivialConcatLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::Precision netPrecision;
+    std::map<std::string, std::string> additional_config;
+    std::tie(inputShape, netPrecision, targetDevice, additional_config) = this->GetParam();
+    configuration.insert(additional_config.begin(), additional_config.end());
+    int axis = inputShape.size() - 2;
+    size_t total_size = std::accumulate(inputShape.begin(), inputShape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {{1, total_size}});
+
+    auto input_relu = ngraph::builder::makeActivation(params[0], ngPrc, ngraph::helpers::ActivationTypes::Relu);
+
+    auto input_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+        ngraph::Shape{inputShape.size()}, std::vector<size_t>(inputShape));
+    auto input = std::make_shared<ngraph::op::v1::Reshape>(input_relu, input_reshape_pattern, false);
+
+    auto constant_values = CommonTestUtils::generate_float_numbers(total_size, 15.5f, 16.1f);
+    auto constant = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>({1, total_size}), constant_values);
+
+    auto first_reshape = std::make_shared<ngraph::op::v1::Reshape>(constant, input_reshape_pattern, false);
+
+    auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector({first_reshape, input}), axis);
+
+    auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+        ngraph::Shape{2}, std::vector<size_t>({1, 2 * total_size}));
+    auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(concat, final_reshape_pattern, false);
+
+    auto act = ngraph::builder::makeActivation(final_reshape, ngPrc, ngraph::helpers::ActivationTypes::Relu);
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(act)};
+    function = std::make_shared<ngraph::Function>(results, params, "trivial_concat");
+}
+
+
+TEST_P(TrivialConcatLayerTest, CompareWithRefs) {
+    Run();
+};
+}  // namespace LayerTestsDefinitions