[GNA] Fix LSTM Cell channel C being 0 on output (#1174)

* [GNA] get output before activation test [GNA] SubstituteScaleShiftBroadCastPass fix for cases when there are multiple scaleshifts as an output from the layer [GNA] Generalize Fix where LSTMCell output was zero due to being fused into activation [GNA] Fix LSTMCell being zero on channel C if being output layer * linux build fix
2020-10-06 09:59:03 +02:00 · 2020-10-06 09:59:03 +02:00 · 8abdc32676
commit 8abdc32676
parent de1cc8af2e
9 changed files with 296 additions and 8 deletions
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -383,6 +383,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
            passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
        }
        passes->registerPass<InsertIdentityLayerPass>();
+        passes->registerPass<BreakFusingOfOutputLayersPass>();
        passes->registerPass<InsertCopyLayerPass>();
        passes->registerPass<InsertDiagonalLayerPass>();
        passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -1158,6 +1158,7 @@ void EltwiseSplitOverChannelsPass::run() {
 }

 void SubstituteScaleShiftBroadCastPass::run() {
+    std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
    for (auto & l : *pLayers) {
        LayerInfo layerInfo(l);

@ -1172,12 +1173,20 @@ void SubstituteScaleShiftBroadCastPass::run() {
            THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
        }

-        if (insData->getDims().size() <= 2) {
+        bool was_reshaped = reshaped_data.count(insData->getName()) != 0;
+        InferenceEngine::SizeVector dataDims;
+        if (was_reshaped) {
+            dataDims = reshaped_data[insData->getName()];
+        } else {
+            dataDims = insData->getDims();
+        }
+
+        if (dataDims.size() <= 2) {
            // NC or C cannot do broadcast
            continue;
        }
-        auto batchSize = insData->getDims()[0];
-        auto nElements = product(begin(insData->getDims()), end(insData->getDims())) / batchSize;
+        auto batchSize = dataDims[0];
+        auto nElements = product(begin(dataDims), end(dataDims)) / batchSize;
        auto weightsElements = scaleShift->_weights->size();
        auto weightsBytes = scaleShift->_weights->byteSize();

@ -1186,12 +1195,12 @@ void SubstituteScaleShiftBroadCastPass::run() {
        }

        // only 3d scaleshift supported where number of c is arbitrary
-        auto lastD = insData->getDims()[insData->getDims().size() - 1];
+        auto lastD = dataDims[dataDims.size() - 1];
        if (lastD != weightsElements) {
            THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
                                << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
        }
-        if (insData->getDims().size() == 2) {
+        if (dataDims.size() == 2) {
            THROW_GNA_EXCEPTION << "For layer: " << l->name
                                << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
        }
@ -1212,7 +1221,10 @@ void SubstituteScaleShiftBroadCastPass::run() {

            // currently data type no providing reshape method of tensor desc
            scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
-            insData->reshape({batchSize, nElements}, Layout::NC);
+            if (!was_reshaped) {
+                reshaped_data[insData->getName()] = insData->getDims();
+                insData->reshape({batchSize, nElements}, Layout::NC);
+            }
        } else {
            THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
                                << getPassManager()->getPolicy().ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
@ -1307,6 +1319,46 @@ void InsertIdentityToLSTMCellPass::run() {
    }
 }

+void BreakFusingOfOutputLayersPass::run() {
+#if GNA_LIB_VER == 1
+    return;
+#endif
+    OutputsDataMap outputsMap;
+    this->getPassManager()->getNetwork()->getOutputsInfo(outputsMap);
+    for (auto layer : *pLayers) {
+        for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
+            auto& output = layer->outData[output_idx];
+            auto& input_to = getInputTo(output);
+
+            auto output_name = output->getName();
+            auto is_network_output = outputsMap.find(output_name) != outputsMap.end();
+            // In cases that this layer is network output you cannot use identity as sole output on
+            // it since it will possibly be fused and layer outputs will be unavailable
+            if (is_network_output) {
+                if (input_to.size() != 1) continue;
+                if (!LayerInfo(input_to.begin()->second).isActivation()) continue;
+
+                CNNLayerPtr additional_output =
+                    std::make_shared<GenericLayer>(LayerParams({output_name + "_side_identity", "identity", InferenceEngine::Precision::FP32}));
+
+                auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+                auto additional_output_quant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(additional_output) : additional_output;
+
+                additional_output_quant->insData.resize(1);
+                additional_output_quant->outData.resize(1);
+
+                auto out_data = DataPtr(new Data(output_name + "_side_identity_data", output->getTensorDesc()));
+                getCreatorLayer(out_data) = additional_output_quant;
+
+                additional_output_quant->outData[0] = out_data;
+
+                input_to[additional_output_quant->name] = additional_output_quant;
+                additional_output_quant->insData[0] = output;
+            }
+        }
+    }
+}
+
 void UnrollLSTMCellPass::run() {
    InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
        if (rnn.clip != 0.0f)
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
@ -144,6 +144,12 @@ DECL_PASS(InsertConcatAligningFilter);
 */
 DECL_PASS(ReorderConcatInputs);

+/**
+* @brief in cases that network output layer is connected to only one layer which is activation additional identity is inserted
+* so the operation is not fused with the activation allowing to get te results from said layer
+*/
+DECL_PASS(BreakFusingOfOutputLayers);
+
 /**
 * @brief insert identity at the output of LSTMCell which fixes cases where data is not propagated correctly through network
 * and LSTMCell returns all zeroes
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+    std::vector<size_t> input_sizes = {
+        80,
+        32,
+        64,
+        100
+    };
+
+    std::vector<midOutputType> midLayerTypes {
+        midOutputType::Mul,
+        midOutputType::Sub,
+        midOutputType::Sum
+    };
+
+    std::map<std::string, std::string> additional_config = {};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+std::vector<size_t> input_sizes = {
+    80,
+    32,
+    64,
+    100
+};
+
+std::vector<midOutputType> midLayerTypes {
+    midOutputType::Mul,
+    midOutputType::Sub,
+    midOutputType::Sum
+};
+
+std::map<std::string, std::string> additional_config = {
+    {"GNA_COMPACT_MODE", "NO"},
+    {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    {"GNA_SCALE_FACTOR_0", "1638.4"},
+    {"GNA_SCALE_FACTOR_1", "1638.4"},
+};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/get_output_before_activation.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <subgraph_tests/get_output_before_activation.hpp>
+#include "common_test_utils/test_constants.hpp"
+
+namespace SubgraphTestsDefinitions {
+namespace {
+    std::vector<size_t> input_sizes = {
+        80,
+        32,
+        64,
+        100
+    };
+
+    std::vector<midOutputType> midLayerTypes {
+        midOutputType::Mul,
+        midOutputType::Sub,
+        midOutputType::Sum
+    };
+
+    std::map<std::string, std::string> additional_config = {};
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
+    ::testing::Combine(
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::Values(InferenceEngine::Precision::FP32),
+        ::testing::ValuesIn(input_sizes),
+        ::testing::ValuesIn(midLayerTypes),
+        ::testing::Values(additional_config)),
+    OutputBeforeActivation::getTestCaseName);
+} // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/get_output_before_activation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/get_output_before_activation.hpp
@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#pragma once
+
+#include "common_test_utils/test_common.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include <ie_core.hpp>
+
+namespace SubgraphTestsDefinitions {
+enum class midOutputType {
+    Sum,
+    Sub,
+    Mul,
+};
+
+typedef std::tuple<
+    std::string,                        // Target device name
+    InferenceEngine::Precision,         // Network precision
+    size_t,                             // Input size
+    midOutputType,                      // Type of layer that will be an output
+    std::map<std::string, std::string>  // Configuration
+> outputBeforeActivationParams;
+
+std::ostream& operator<< (std::ostream& os, const midOutputType& oType);
+
+class OutputBeforeActivation : public LayerTestsUtils::LayerTestsCommon,
+    public testing::WithParamInterface<outputBeforeActivationParams> {
+protected:
+    void SetUp() override;
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams> &obj);
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
+};
+} // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/get_output_before_activation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/get_output_before_activation.cpp
@ -0,0 +1,96 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+#include <ie_core.hpp>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "subgraph_tests/get_output_before_activation.hpp"
+
+namespace SubgraphTestsDefinitions {
+std::ostream& operator<<(std::ostream& os, const midOutputType& oType) {
+    switch (oType) {
+    case midOutputType::Sub:
+        return (os << "Sub");
+    case midOutputType::Sum:
+        return (os << "Sum");
+    case midOutputType::Mul:
+        return (os << "Mul");
+    default:
+        return (os << "Unknown");
+    }
+}
+
+std::string OutputBeforeActivation::getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams>& obj) {
+    std::string targetDevice;
+    InferenceEngine::Precision netPrecision;
+    size_t inputSize;
+    midOutputType outputType;
+    std::map<std::string, std::string> config;
+    std::tie(targetDevice, netPrecision, inputSize, outputType, config) = obj.param;
+    std::ostringstream result;
+
+    result << "netPrecision=" << netPrecision.name() << "_";
+    result << "IS=" << inputSize << "_";
+    result << "OutputType=" << outputType << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void OutputBeforeActivation::SetUp() {
+    InferenceEngine::Precision netPrecision;
+    std::map<std::string, std::string> config;
+    size_t inputSize;
+    midOutputType outputType;
+    std::tie(targetDevice, netPrecision, inputSize, outputType, config) = this->GetParam();
+    configuration.insert(config.begin(), config.end());
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    std::vector<size_t> input_dims { 1, inputSize };
+
+    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims, input_dims});
+    auto input0 = input_parameter[0];
+    auto input1 = input_parameter[1];
+
+    ngraph::OutputVector outputs;
+    std::shared_ptr<ngraph::Node> midLayer;
+    switch (outputType) {
+    case SubgraphTestsDefinitions::midOutputType::Sum: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::ADD);
+        break;
+    }
+    case SubgraphTestsDefinitions::midOutputType::Sub: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::SUBTRACT);
+        break;
+    }
+    case SubgraphTestsDefinitions::midOutputType::Mul: {
+        midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        break;
+    }
+    default:
+        GTEST_FAIL() << "Unknown midOutputType";
+    }
+
+    auto act = ngraph::builder::makeActivation(midLayer, ngPrc, ngraph::helpers::ActivationTypes::Tanh);
+    outputs.insert(outputs.end(), {midLayer, act});
+    function = std::make_shared<ngraph::Function>(outputs, input_parameter, "output_before_activation");
+}
+
+InferenceEngine::Blob::Ptr OutputBeforeActivation::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 2, -1, 100);
+}
+
+TEST_P(OutputBeforeActivation, CompareWithRefs) {
+    Run();
+};
+} // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp
+++ b/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/lstm/lstm_cell_test.cpp
@ -16,7 +16,6 @@ TEST_P(LSTMCellTestBase, GNA_sw_fp32_single_lstm_test) {
 }

 TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
-    DISABLE_TEST_ON_GNA2
    runSingleLSTMTest( {
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
        {"GNA_COMPACT_MODE", "NO"},
@ -28,7 +27,6 @@ TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
 }

 TEST_P(LSTMCellTestBase, GNA_I8_single_lstm_test) {
-    DISABLE_TEST_ON_GNA2
    runSingleLSTMTest({
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
        {"GNA_COMPACT_MODE", "NO"},