From 6b2ac800aa030c9cc983ac9186e5c16ad2b934b8 Mon Sep 17 00:00:00 2001 From: Andrey Dmitriev Date: Thu, 3 Sep 2020 13:23:58 +0300 Subject: [PATCH] [GNA] Fixed case of unconnected output of split layer (#1344) [GNA] Fixed case of unconnected output of split layer [GNA] Fixed case of unconnected output of split layer test [GNA] Fixed case of unconnected output of split layer fixed --- .../src/gna_plugin/gna_graph_compiler.cpp | 9 +- .../src/gna_plugin/gna_graph_tools.hpp | 22 +++- .../gna_plugin/optimizer/gna_pass_manager.cpp | 111 +++++++++--------- .../subgraph_tests/split_relu.cpp | 56 +++++++++ .../include/subgraph_tests/split_relu.hpp | 33 ++++++ .../shared/src/subgraph_tests/split_relu.cpp | 53 +++++++++ 6 files changed, 227 insertions(+), 57 deletions(-) create mode 100644 inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 65fe47b1f4d..5b745b009dc 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -140,7 +140,6 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) size_t padding = 0; size_t output_layer_size = 0; - for (int j = 0; j != getInputTo(layer->outData[i]).size(); j++) { auto outFunctionalLayer = CNNNetGetNextLayerSkipCertain(layer, i, j, [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); @@ -171,6 +170,13 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) } } + // in case of unconnected split - we need properly increment size + if (getInputTo(layer->outData[i]).empty()) { + output_layer_size = + InferenceEngine::details::product(begin(layer->outData[i]->getDims()), + end(layer->outData[i]->getDims())) * layer->outData[i]->getPrecision().size(); + } + split_size += padding + output_layer_size; } layerInfoItem.reserved_size = split_size; @@ -1932,7 +1938,6 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, return LayerInfo(l).isNonFunctional(); }); - gnalog() << "Connecting input " << layer->name << " to " << prevLayer->name << " ...\n"; // real input not a memory input diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp index a593feb265e..a1851741e45 100644 --- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp @@ -459,6 +459,7 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after, bool bLocated = false; bool hasOutputIndex = outDataIndex != invalid_data_idx; if (after != nullptr) { + int nUnconnectedOData = 0; for (auto && data : after->outData) { if (hasOutputIndex && outDataIndex) { --outDataIndex; @@ -485,8 +486,8 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after, break; } } - if (getInputTo(data).empty()) { - bLocated = true; + if (inputTo.empty()) { + nUnconnectedOData++; } if (bLocated) { // erasing all connection @@ -503,6 +504,23 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after, } } + // separately checking case of possible single unconnected output of given layer + if (!bLocated && !before && !hasOutputIndex) { + if (nUnconnectedOData != 1) { + THROW_GNA_EXCEPTION << "Cannot insert layer: " << LAYER_NAME(layerToInsert) <<" after: " << LAYER_NAME(after); + } + + for (auto && data : after->outData) { + if (!getInputTo(data).empty()) continue; + + bLocated = true; + getInputTo(data)[layerToInsert->outData.front()->getName()] = layerToInsert; + layerToInsert->insData.push_back(data); + + break; + } + } + // if given outputDataIndex is not correct, lets find index that matches *before* layer if (!bLocated) { if (before != nullptr) { diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 402b7c2357d..708ad70bcda 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -87,7 +87,6 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer, getCreatorLayer(dataPtr) = diagonalWithQuant; diagonalWithQuant->outData.push_back(dataPtr); - // actual insertion CNNNetworkInsertLayer(prevLayer, nextLayer, diagonalWithQuant); } @@ -944,68 +943,74 @@ void InsertSplitAligningFilterPass::run() { auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims())); if (currentOffset != ALIGN64(currentOffset)) { - // this split output not beginning from 64 bytes aligned boundary - need to correct by aligning filter layer + // check that this split output actually connected to further layers + if (getInputTo(splitOutput).empty()) { + gnalog() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n"; + } else { + // this split output not beginning from 64 bytes aligned boundary - need to correct by aligning filter layer + // insert the filter + auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++); + #ifdef PLOT - // getting list of layers attached to current split output - gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and "; - for (auto &&followingLayers : getInputTo(splitOutput)) { - if (getInputTo(splitOutput).size() != 1) { - gnalog() << "\n "; + // getting list of layers attached to current split output + gnalog() << "Inserted Affine Filter: " << filterName << " between: " << l->name << " and "; + for (auto &&followingLayers : getInputTo(splitOutput)) { + if (getInputTo(splitOutput).size() != 1) { + gnalog() << "\n "; + } + gnalog() << followingLayers.second->name; } - gnalog() << followingLayers.second->name; - } - gnalog() << std::endl; + gnalog() << std::endl; #endif - // insert the filter - auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++); - auto filterLayer = - std::make_shared(LayerParams({filterName, "AffineFilter", Precision::FP32})); + auto filterLayer = + std::make_shared(LayerParams({filterName, "AffineFilter", Precision::FP32})); + auto inputData = splitOutput; - auto inputData = splitOutput; + size_t aligned64_offset = std::max(0, static_cast(ALIGN64(currentOffset) - 64)); + size_t + newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset) + / bytesPerSplitElement; - size_t aligned64_offset = std::max(0, static_cast(ALIGN64(currentOffset) - 64)); - size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset) - / bytesPerSplitElement; + IE_ASSERT(filterLayer != nullptr); - IE_ASSERT(filterLayer != nullptr); + // encodes offset to beginning of split layer input + filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement); - // encodes offset to beginning of split layer input - filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement); + auto dims = splitOutput->getTensorDesc().getDims(); + if (dims.size() > 3) { + THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size(); + } - auto dims = splitOutput->getTensorDesc().getDims(); - if (dims.size() > 3) { - THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size(); + auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1); + std::vector filterWeights(newOutputSize * num_rows_out, 0.f); + + auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement; + + for (int i = 0; i != outputSize; i++) { + filterWeights[offset] = 1.0f; + offset += newOutputSize + 1; + } + + filterLayer->_weights = make_shared_blob(TensorDesc( + inputData->getTensorDesc().getPrecision(), + SizeVector({filterWeights.size()}), + Layout::C)); + filterLayer->_weights->allocate(); + CopyVectorToBlob(filterLayer->_weights, filterWeights); + + auto outData = std::make_shared(filterName, + TensorDesc(splitOutput->getTensorDesc().getPrecision(), + splitOutput->getTensorDesc().getDims(), + inputData->getTensorDesc().getLayout())); + + auto filterWithQuant = quantized ? + InferenceEngine::injectData(filterLayer) : + filterLayer; + getCreatorLayer(outData) = filterWithQuant; + filterWithQuant->outData.push_back(outData); + CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex); } - - auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1); - std::vector filterWeights(newOutputSize * num_rows_out, 0.f); - - auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement; - - for (int i = 0; i != outputSize; i++) { - filterWeights[offset] = 1.0f; - offset += newOutputSize + 1; - } - - filterLayer->_weights = make_shared_blob(TensorDesc( - inputData->getTensorDesc().getPrecision(), - SizeVector({filterWeights.size()}), - Layout::C)); - filterLayer->_weights->allocate(); - CopyVectorToBlob(filterLayer->_weights, filterWeights); - - auto outData = std::make_shared(filterName, - TensorDesc(splitOutput->getTensorDesc().getPrecision(), - splitOutput->getTensorDesc().getDims(), - inputData->getTensorDesc().getLayout())); - - auto filterWithQuant = quantized ? - InferenceEngine::injectData(filterLayer) : - filterLayer; - getCreatorLayer(outData) = filterWithQuant; - filterWithQuant->outData.push_back(outData); - CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex); } // search data that starts from unaligned location diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp new file mode 100644 index 00000000000..55001c6ba4b --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_relu.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +#include +#include "subgraph_tests/split_relu.hpp" +#include "common_test_utils/test_constants.hpp" +#include "gna/gna_config.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + std::vector>> inputs{ + {{1, 64}}, + {{1, 128}}, + {{1, 96}}, + {{1, 16}} + }; + + std::vector> connect_index{ + {0, 1, 2, 3}, + {0, 1, 2}, + {0, 1, 3}, + {0, 2, 3}, + {1, 2, 3}, + {0, 1}, + {0, 2}, + {0, 3}, + {1, 2}, + {1, 3}, + {2, 3}, + {0}, + {1}, + {2}, + {3}, + }; + + std::vector netPrecisions = {InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, + }; + + std::map additional_config = { + {"GNA_SCALE_FACTOR_1", "1"}, + {"GNA_SCALE_FACTOR_2", "1"}, + {"GNA_SCALE_FACTOR_3", "1"}, + {"GNA_SCALE_FACTOR_4", "1"}, + {std::string(GNA_CONFIG_KEY(COMPACT_MODE)), "NO"} + }; + + INSTANTIATE_TEST_CASE_P(split_connected, SplitRelu, + ::testing::Combine( + ::testing::ValuesIn(inputs), + ::testing::ValuesIn(connect_index), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::Values(additional_config)), + SplitRelu::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp new file mode 100644 index 00000000000..de6a254ecde --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_relu.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include +#include +#include +#include +#include "functional_test_utils/layer_test_utils.hpp" +#include "../../../../../ngraph_functions/include/ngraph_functions/builders.hpp" +#include "common_test_utils/test_constants.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + std::vector>, //input shapes + std::vector, //index connected layer + InferenceEngine::Precision, //Network precision + std::string, //Device name + std::map //Configuration +> SplitReluTuple; + + +class SplitRelu: + public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon{ +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); +protected: + void SetUp() override; +}; +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp new file mode 100644 index 00000000000..914df8931b9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_relu.cpp @@ -0,0 +1,53 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include +#include +#include +#include +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/precision_utils.hpp" +#include "functional_test_utils/skip_tests_config.hpp" +#include "subgraph_tests/split_relu.hpp" + +namespace LayerTestsDefinitions { + std::string SplitRelu::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector> input; + std::vector connect_input; + InferenceEngine::Precision netPrecision; + std::string targetName; + std::map additional_config; + std::tie(input, connect_input, netPrecision, targetName, additional_config) = obj.param; + std::ostringstream results; + + results << "IS=" << CommonTestUtils::vec2str(input[0]) << "_"; + results << "ConnectInput=" << CommonTestUtils::vec2str(connect_input) << "_"; + results << "netPRC=" << netPrecision.name() << "_"; + results << "targetDevice=" << targetName << "_"; + return results.str(); + } + + void SplitRelu::SetUp() { + std::vector> inputs; + std::vector connect_index; + InferenceEngine::Precision netPrecision; + std::map additional_config; + std::tie(inputs, connect_index, netPrecision, targetDevice, additional_config) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto input = ngraph::builder::makeParams(ngPrc, {inputs}); + auto split = ngraph::builder::makeSplit(input[0], ngPrc, 4, 1); + ngraph::ResultVector results; + + for (size_t i : connect_index) { + auto relu = std::make_shared(split->output(i)); + results.push_back(std::make_shared(relu)); + } + function = std::make_shared(results, input, "split_relu"); + } + + TEST_P(SplitRelu, CompareWithRefs){ + Run(); + }; +} // namespace LayerTestsDefinitions