[GNA] Fixed insertion of delayed copy error (#7944)

* [GNA] Fixed error with delayed copy insertion * [GNA] Added test
2021-10-14 19:04:15 +03:00 · 2021-10-14 19:04:15 +03:00 · e9bde06f7b
commit e9bde06f7b
parent d21572d7cb
5 changed files with 201 additions and 61 deletions
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -824,6 +824,30 @@ void InsertIdentityLayerPass::run() {
 void InsertCopyLayerPass::run() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
    using FuncChildrenInfo = std::tuple<
        CNNLayerPtr,   // parent layer
        CNNLayerPtr,   // child layer
        int32_t        // input index
    >;
    // recursively searches for children functional layers skipping non-functional ones
    std::function<std::vector<FuncChildrenInfo>(CNNLayerPtr, CNNLayerPtr, int32_t)> find_func_layers =
        [&find_func_layers](CNNLayerPtr currentLayer, CNNLayerPtr parentLayer, int32_t input_idx) {
        if (!LayerInfo(currentLayer).isNonFunctional() ||
            currentLayer->outData.size() == 0 ||
            getInputTo(currentLayer->outData[0]).size() == 0) {
            return std::vector<FuncChildrenInfo>{std::make_tuple(parentLayer, currentLayer, input_idx)};
        }
        std::vector<FuncChildrenInfo> results;
        for (size_t i = 0; i < getInputTo(currentLayer->outData[0]).size(); ++i) {
            auto next_layer = CNNNetGetNextLayerSkipCertain(currentLayer, 0, i,
                [](CNNLayerPtr origin) {return false; }).first;
            auto result = find_func_layers(next_layer, currentLayer,
                CNNLayerFindInsDataIdxes(currentLayer->outData[0], next_layer)[0]);
            results.insert(std::end(results), std::begin(result), std::end(result));
        }
        return results;
    };
    // Copy layer insertion happens in few cases:
    // Crop output goes to concat layer -> copy layer insertion
    // Splitted part of input goes to concat layer -> copy layer insertion
@ -854,37 +878,24 @@ void InsertCopyLayerPass::run() {
        // Crop -> Concat, Input -> Split -> Concat and Concat -> Memory cases
        if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat() || LayerInfo(l).isSplit()) {
-            std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples;
+            std::vector<FuncChildrenInfo> copy_insertion_tuples;
-            std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples;
+            std::vector<FuncChildrenInfo> delayed_copy_insertion_tuples;
            for (auto output : l->outData) {
                auto& inputTo = getInputTo(output);
                for (auto& childLayer : inputTo) {
-                    auto original_child = childLayer.second;
+                    std::vector<int> connections = CNNLayerFindInsDataIdxes(output, childLayer.second);
                    auto original_parent = l;
                    auto current_layer = original_child;
                    std::vector<int> connections = CNNLayerFindInsDataIdxes(output, original_child);
                    for (auto input_idx : connections) {
-                        while (LayerInfo(current_layer).isNonFunctional()) {
+                        auto children_info = find_func_layers(childLayer.second, l, input_idx);
-                            if (current_layer->outData.size() == 0) break;
+                        for (const auto &child_info : children_info) {
-                            if (getInputTo(current_layer->outData[0]).size() == 0) break;
+                            CNNLayerPtr child = std::get<1>(child_info);
-
+                            if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(child).isMemory()) {
-                            auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin) {return false; }).first;
+                                // Concat|Split|Crop -> Memory case
-                            if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) {
+                                delayed_copy_insertion_tuples.push_back(child_info);
-                                original_child = next_layer;
+                            } else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(child).isConcat()) {
-                                original_parent = current_layer;
+                                // Split|Crop -> Concat case
-                                input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0];
+                                // concat may be connected to previous layer with multiple connections
                                copy_insertion_tuples.push_back(child_info);
                            }
                            current_layer = next_layer;
                        }
                        if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
                            // Concat|Split|Crop -> Memory case
                            delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
                        } else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(current_layer).isConcat()) {
                            // Split|Crop -> Concat case
                            // concat may be connected to previous layer with multiple connections
                            copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
                        }
                    }
                }
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp
@ -13,14 +13,28 @@ namespace {
    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
    };
-    std::map<std::string, std::string> additional_config = {
+    std::vector<std::map<std::string, std::string>> additional_config = {
-            {"GNA_COMPACT_MODE", "NO"}
+        {{"GNA_DEVICE_MODE", "GNA_SW_FP32"}},
        {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}}
    };
    std::vector<size_t> memory_sizes = {
        128, 256, 32
    };
    INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyTest,
                            ::testing::Combine(
            ::testing::ValuesIn(netPrecisions),
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
-            ::testing::Values(additional_config)),
+            ::testing::ValuesIn(additional_config),
-                            DelayedCopyTest::getTestCaseName);
+            ::testing::ValuesIn(memory_sizes)),
                            DelayedCopyTestBase::getTestCaseName);
    INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyAfterReshapeWithMultipleConnTest,
                            ::testing::Combine(
            ::testing::ValuesIn(netPrecisions),
            ::testing::Values(CommonTestUtils::DEVICE_GNA),
            ::testing::ValuesIn(additional_config),
            ::testing::ValuesIn(memory_sizes)),
                            DelayedCopyTestBase::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp
@ -12,4 +12,8 @@ TEST_P(DelayedCopyTest, CompareWithRefs) {
    Run();
 };
 TEST_P(DelayedCopyAfterReshapeWithMultipleConnTest, CompareWithRefs) {
    Run();
 };
 } // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp
@ -16,20 +16,37 @@
 namespace SubgraphTestsDefinitions {
 typedef std::tuple<
-        InferenceEngine::Precision,        //Network precision
+        InferenceEngine::Precision,         // Network precision
-        std::string,                       //Device name
+        std::string,                        // Device name
-        std::map<std::string, std::string> //Configuration
+        std::map<std::string, std::string>, // Configuration
-> ConcatSplitReluTuple;
+        size_t                              // Memory layer size
 > DelayedCopyTuple;
-class DelayedCopyTest
+class DelayedCopyTestBase
-        : public testing::WithParamInterface<ConcatSplitReluTuple>,
+       : public testing::WithParamInterface<DelayedCopyTuple>,
-          public LayerTestsUtils::LayerTestsCommon {
+         public LayerTestsUtils::LayerTestsCommon {
 private:
-    void switchToNgraphFriendlyModel();
+    void InitMemory();
    virtual void switchToNgraphFriendlyModel() = 0;
 protected:
    void Run() override;
    std::vector<float> memory_init;
 public:
-    static std::string getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj);
+    static std::string getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj);
 };
 class DelayedCopyTest : public DelayedCopyTestBase {
 private:
    void switchToNgraphFriendlyModel() override;
 protected:
    void SetUp() override;
    void Run() override;
 };
 class DelayedCopyAfterReshapeWithMultipleConnTest : public DelayedCopyTestBase {
 private:
    void switchToNgraphFriendlyModel() override;
 protected:
    void SetUp() override;
 };
 } // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp
@ -5,35 +5,73 @@
 #include "shared_test_classes/subgraph/delayed_copy_layer.hpp"
 namespace SubgraphTestsDefinitions {
-    std::string DelayedCopyTest::getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj) {
+    void DelayedCopyTestBase::InitMemory() {
        IE_SUPPRESS_DEPRECATED_START
        auto states = executableNetwork.QueryState();
        for (auto& state : states) {
            auto name = state.GetName();
            if (name.find("id") != std::string::npos) {
                auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetState()->getTensorDesc(),
                                                                           memory_init.data(), memory_init.size());
                state.SetState(blob);
            } else {
                GTEST_FAIL() << "unknown memory state";
            }
        }
        IE_SUPPRESS_DEPRECATED_END
    }
    void DelayedCopyTestBase::Run() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        LoadNetwork();
        InitMemory();
        GenerateInputs();
        Infer();
        switchToNgraphFriendlyModel();
        Validate();
    }
    std::string DelayedCopyTestBase::getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj) {
        InferenceEngine::Precision netPrecision;
        std::string targetName;
        std::map<std::string, std::string> additional_config;
-        std::tie(netPrecision, targetName, additional_config) = obj.param;
+        size_t memory_size;
        std::tie(netPrecision, targetName, additional_config, memory_size) = obj.param;
        std::ostringstream results;
        results << "netPRC=" << netPrecision.name() << "_";
        results << "targetDevice=" << targetName << "_";
        results << "memorySize=" << memory_size;
        for (auto const& configItem : additional_config) {
            results << "_configItem=" << configItem.first << "_" << configItem.second;
        }
        return results.str();
    }
    void DelayedCopyTest::SetUp() {
        InferenceEngine::Precision netPrecision;
        std::map<std::string, std::string> additional_config;
-        std::tie(netPrecision, targetDevice, additional_config) = this->GetParam();
+        size_t memory_size;
        std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
        configuration.insert(additional_config.begin(), additional_config.end());
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}});
-        auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0});
+        ASSERT_EQ(memory_size % 2, 0);
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
        memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
        auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
        auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_r, input[0]}, 1);
-        auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1);
+        auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
        auto mem_w = std::make_shared<ngraph::opset3::Assign>(split->output(1), "id");
-        auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1);
+        auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
        auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
        mem_w->add_control_dependency(mem_r);
@ -46,29 +84,85 @@ namespace SubgraphTestsDefinitions {
        InferenceEngine::Precision netPrecision;
        std::map<std::string, std::string> config;
        std::map<std::string, std::string> additional_config;
-        std::tie(netPrecision, targetDevice, additional_config) = this->GetParam();
+        size_t memory_size;
        std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
        ASSERT_EQ(memory_size % 2, 0);
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}});
+        auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
-        auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0});
+        auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_c, input[0]}, 1);
-        auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1);
+        auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
-        auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1);
+        auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
        auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
        function = std::make_shared<ngraph::Function>(relu2, input, "delayed_copy_layer_nonmemory");
        functionRefs = ngraph::clone_function(*function);
    }
-        void DelayedCopyTest::Run() {
+    void DelayedCopyAfterReshapeWithMultipleConnTest::SetUp() {
-        SKIP_IF_CURRENT_TEST_IS_DISABLED()
+        InferenceEngine::Precision netPrecision;
        std::map<std::string, std::string> additional_config;
        size_t memory_size;
        std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
        configuration.insert(additional_config.begin(), additional_config.end());
-        LoadNetwork();
+        ASSERT_EQ(memory_size % 8, 0);
-        GenerateInputs();
+
-        Infer();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        switchToNgraphFriendlyModel();
+        auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
-        Validate();
+
        memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
        auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{8, memory_size / 8}, memory_init);
        auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
        auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
        auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_r, reshape_pattern1, false);
        auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
        auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
        auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
        auto mem_w = std::make_shared<ngraph::opset3::Assign>(reshape2, "id");
        auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
        auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
        auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
        mem_w->add_control_dependency(mem_r);
        reshape3->add_control_dependency(mem_w);
        function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_memory");
    }
    void DelayedCopyAfterReshapeWithMultipleConnTest::switchToNgraphFriendlyModel() {
        InferenceEngine::Precision netPrecision;
        std::map<std::string, std::string> config;
        std::map<std::string, std::string> additional_config;
        size_t memory_size;
        std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
        ASSERT_EQ(memory_size % 8, 0);
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
        auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
        auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
        auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_c, reshape_pattern1, false);
        auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
        auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
        auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
        auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
        auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
        auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
        function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_nonmemory");
    }
 } // namespace SubgraphTestsDefinitions