diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index af1a6442c1b..732d53dcfb9 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -824,6 +824,30 @@ void InsertIdentityLayerPass::run() { void InsertCopyLayerPass::run() { OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass"); + using FuncChildrenInfo = std::tuple< + CNNLayerPtr, // parent layer + CNNLayerPtr, // child layer + int32_t // input index + >; + // recursively searches for children functional layers skipping non-functional ones + std::function(CNNLayerPtr, CNNLayerPtr, int32_t)> find_func_layers = + [&find_func_layers](CNNLayerPtr currentLayer, CNNLayerPtr parentLayer, int32_t input_idx) { + if (!LayerInfo(currentLayer).isNonFunctional() || + currentLayer->outData.size() == 0 || + getInputTo(currentLayer->outData[0]).size() == 0) { + return std::vector{std::make_tuple(parentLayer, currentLayer, input_idx)}; + } + std::vector results; + for (size_t i = 0; i < getInputTo(currentLayer->outData[0]).size(); ++i) { + auto next_layer = CNNNetGetNextLayerSkipCertain(currentLayer, 0, i, + [](CNNLayerPtr origin) {return false; }).first; + auto result = find_func_layers(next_layer, currentLayer, + CNNLayerFindInsDataIdxes(currentLayer->outData[0], next_layer)[0]); + results.insert(std::end(results), std::begin(result), std::end(result)); + } + return results; + }; + // Copy layer insertion happens in few cases: // Crop output goes to concat layer -> copy layer insertion // Splitted part of input goes to concat layer -> copy layer insertion @@ -854,37 +878,24 @@ void InsertCopyLayerPass::run() { // Crop -> Concat, Input -> Split -> Concat and Concat -> Memory cases if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat() || LayerInfo(l).isSplit()) { - std::vector> copy_insertion_tuples; - std::vector> delayed_copy_insertion_tuples; + std::vector copy_insertion_tuples; + std::vector delayed_copy_insertion_tuples; for (auto output : l->outData) { auto& inputTo = getInputTo(output); for (auto& childLayer : inputTo) { - auto original_child = childLayer.second; - auto original_parent = l; - auto current_layer = original_child; - std::vector connections = CNNLayerFindInsDataIdxes(output, original_child); - + std::vector connections = CNNLayerFindInsDataIdxes(output, childLayer.second); for (auto input_idx : connections) { - while (LayerInfo(current_layer).isNonFunctional()) { - if (current_layer->outData.size() == 0) break; - if (getInputTo(current_layer->outData[0]).size() == 0) break; - - auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin) {return false; }).first; - if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) { - original_child = next_layer; - original_parent = current_layer; - input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0]; + auto children_info = find_func_layers(childLayer.second, l, input_idx); + for (const auto &child_info : children_info) { + CNNLayerPtr child = std::get<1>(child_info); + if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(child).isMemory()) { + // Concat|Split|Crop -> Memory case + delayed_copy_insertion_tuples.push_back(child_info); + } else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(child).isConcat()) { + // Split|Crop -> Concat case + // concat may be connected to previous layer with multiple connections + copy_insertion_tuples.push_back(child_info); } - current_layer = next_layer; - } - - if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) { - // Concat|Split|Crop -> Memory case - delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx)); - } else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(current_layer).isConcat()) { - // Split|Crop -> Concat case - // concat may be connected to previous layer with multiple connections - copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx)); } } } diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp index 3e5ee5c2e9b..872f7c02837 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/delayed_copy_layer.cpp @@ -13,14 +13,28 @@ namespace { std::vector netPrecisions = {InferenceEngine::Precision::FP32, }; - std::map additional_config = { - {"GNA_COMPACT_MODE", "NO"} + std::vector> additional_config = { + {{"GNA_DEVICE_MODE", "GNA_SW_FP32"}}, + {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}} + }; + + std::vector memory_sizes = { + 128, 256, 32 }; INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyTest, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), - ::testing::Values(additional_config)), - DelayedCopyTest::getTestCaseName); + ::testing::ValuesIn(additional_config), + ::testing::ValuesIn(memory_sizes)), + DelayedCopyTestBase::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyAfterReshapeWithMultipleConnTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(additional_config), + ::testing::ValuesIn(memory_sizes)), + DelayedCopyTestBase::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp index 261ca06df16..00e9df817a3 100644 --- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/delayed_copy_layer.hpp @@ -12,4 +12,8 @@ TEST_P(DelayedCopyTest, CompareWithRefs) { Run(); }; +TEST_P(DelayedCopyAfterReshapeWithMultipleConnTest, CompareWithRefs) { + Run(); +}; + } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp index b590ecc4dc1..22941b0622e 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/delayed_copy_layer.hpp @@ -16,20 +16,37 @@ namespace SubgraphTestsDefinitions { typedef std::tuple< - InferenceEngine::Precision, //Network precision - std::string, //Device name - std::map //Configuration -> ConcatSplitReluTuple; + InferenceEngine::Precision, // Network precision + std::string, // Device name + std::map, // Configuration + size_t // Memory layer size +> DelayedCopyTuple; -class DelayedCopyTest - : public testing::WithParamInterface, - public LayerTestsUtils::LayerTestsCommon { +class DelayedCopyTestBase + : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { private: - void switchToNgraphFriendlyModel(); + void InitMemory(); + virtual void switchToNgraphFriendlyModel() = 0; +protected: + void Run() override; + std::vector memory_init; public: - static std::string getTestCaseName(const testing::TestParamInfo &obj); + static std::string getTestCaseName(const testing::TestParamInfo &obj); +}; + +class DelayedCopyTest : public DelayedCopyTestBase { +private: + void switchToNgraphFriendlyModel() override; protected: void SetUp() override; - void Run() override; }; + +class DelayedCopyAfterReshapeWithMultipleConnTest : public DelayedCopyTestBase { +private: + void switchToNgraphFriendlyModel() override; +protected: + void SetUp() override; +}; + } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp index aba00a1f338..f8b094bc278 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/delayed_copy_layer.cpp @@ -5,35 +5,73 @@ #include "shared_test_classes/subgraph/delayed_copy_layer.hpp" namespace SubgraphTestsDefinitions { - std::string DelayedCopyTest::getTestCaseName(const testing::TestParamInfo &obj) { + void DelayedCopyTestBase::InitMemory() { + IE_SUPPRESS_DEPRECATED_START + auto states = executableNetwork.QueryState(); + for (auto& state : states) { + auto name = state.GetName(); + if (name.find("id") != std::string::npos) { + auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetState()->getTensorDesc(), + memory_init.data(), memory_init.size()); + state.SetState(blob); + } else { + GTEST_FAIL() << "unknown memory state"; + } + } + IE_SUPPRESS_DEPRECATED_END + } + + void DelayedCopyTestBase::Run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + LoadNetwork(); + InitMemory(); + GenerateInputs(); + Infer(); + switchToNgraphFriendlyModel(); + Validate(); + } + + std::string DelayedCopyTestBase::getTestCaseName(const testing::TestParamInfo &obj) { InferenceEngine::Precision netPrecision; std::string targetName; std::map additional_config; - std::tie(netPrecision, targetName, additional_config) = obj.param; + size_t memory_size; + std::tie(netPrecision, targetName, additional_config, memory_size) = obj.param; std::ostringstream results; results << "netPRC=" << netPrecision.name() << "_"; results << "targetDevice=" << targetName << "_"; + results << "memorySize=" << memory_size; + for (auto const& configItem : additional_config) { + results << "_configItem=" << configItem.first << "_" << configItem.second; + } return results.str(); } void DelayedCopyTest::SetUp() { InferenceEngine::Precision netPrecision; std::map additional_config; - std::tie(netPrecision, targetDevice, additional_config) = this->GetParam(); + size_t memory_size; + std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam(); configuration.insert(additional_config.begin(), additional_config.end()); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}}); - auto mem_c = std::make_shared(ngPrc, ngraph::Shape{1, 128}, std::vector{0}); + ASSERT_EQ(memory_size % 2, 0); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}}); + + memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f); + + auto mem_c = std::make_shared(ngPrc, ngraph::Shape{1, memory_size}, memory_init); auto mem_r = std::make_shared(mem_c, "id"); auto concat = std::make_shared(ngraph::OutputVector{mem_r, input[0]}, 1); - auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1); + auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1); auto mem_w = std::make_shared(split->output(1), "id"); - auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1); + auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1); auto relu2 = std::make_shared(VariadicSplit->output(1)); mem_w->add_control_dependency(mem_r); @@ -46,29 +84,85 @@ namespace SubgraphTestsDefinitions { InferenceEngine::Precision netPrecision; std::map config; std::map additional_config; - std::tie(netPrecision, targetDevice, additional_config) = this->GetParam(); + size_t memory_size; + std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam(); + + ASSERT_EQ(memory_size % 2, 0); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}}); + auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}}); - auto mem_c = std::make_shared(ngPrc, ngraph::Shape{1, 128}, std::vector{0}); + auto mem_c = std::make_shared(ngPrc, ngraph::Shape{1, memory_size}, memory_init); auto concat = std::make_shared(ngraph::OutputVector{mem_c, input[0]}, 1); - auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1); + auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1); - auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1); + auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1); auto relu2 = std::make_shared(VariadicSplit->output(1)); function = std::make_shared(relu2, input, "delayed_copy_layer_nonmemory"); - functionRefs = ngraph::clone_function(*function); } - void DelayedCopyTest::Run() { - SKIP_IF_CURRENT_TEST_IS_DISABLED() + void DelayedCopyAfterReshapeWithMultipleConnTest::SetUp() { + InferenceEngine::Precision netPrecision; + std::map additional_config; + size_t memory_size; + std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); - LoadNetwork(); - GenerateInputs(); - Infer(); - switchToNgraphFriendlyModel(); - Validate(); + ASSERT_EQ(memory_size % 8, 0); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}}); + + memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f); + + auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{8, memory_size / 8}, memory_init); + auto mem_r = std::make_shared(mem_c, "id"); + auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size}); + auto reshape1 = std::make_shared(mem_r, reshape_pattern1, false); + auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1); + + auto concat = std::make_shared(ngraph::OutputVector{split->output(0), input[0]}, 1); + auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8}); + auto reshape2 = std::make_shared(concat, reshape_pattern2, false); + + auto mem_w = std::make_shared(reshape2, "id"); + + auto relu = std::make_shared(reshape2); + auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size}); + auto reshape3 = std::make_shared(relu, reshape_pattern3, false); + + mem_w->add_control_dependency(mem_r); + reshape3->add_control_dependency(mem_w); + + function = std::make_shared(reshape3, input, "delayed_copy_layer_reshape_memory"); } + void DelayedCopyAfterReshapeWithMultipleConnTest::switchToNgraphFriendlyModel() { + InferenceEngine::Precision netPrecision; + std::map config; + std::map additional_config; + size_t memory_size; + std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam(); + + ASSERT_EQ(memory_size % 8, 0); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}}); + + auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{1, memory_size}, memory_init); + auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size}); + auto reshape1 = std::make_shared(mem_c, reshape_pattern1, false); + auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1); + + auto concat = std::make_shared(ngraph::OutputVector{split->output(0), input[0]}, 1); + auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8}); + auto reshape2 = std::make_shared(concat, reshape_pattern2, false); + + auto relu = std::make_shared(reshape2); + auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size}); + auto reshape3 = std::make_shared(relu, reshape_pattern3, false); + + function = std::make_shared(reshape3, input, "delayed_copy_layer_reshape_nonmemory"); + } } // namespace SubgraphTestsDefinitions