[GNA] Fix LSTM Cell channel C being 0 on output (#1174)

* [GNA] get output before activation test

[GNA] SubstituteScaleShiftBroadCastPass fix for cases when there are multiple scaleshifts as an output from the layer

[GNA] Generalize Fix where LSTMCell output was zero due to being fused into activation

[GNA] Fix LSTMCell being zero on channel C if being output layer

* linux build fix
This commit is contained in:
Kamil Magierski 2020-10-06 09:59:03 +02:00 committed by GitHub
parent de1cc8af2e
commit 8abdc32676
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 296 additions and 8 deletions

View File

@ -383,6 +383,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
}
passes->registerPass<InsertIdentityLayerPass>();
passes->registerPass<BreakFusingOfOutputLayersPass>();
passes->registerPass<InsertCopyLayerPass>();
passes->registerPass<InsertDiagonalLayerPass>();
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();

View File

@ -1158,6 +1158,7 @@ void EltwiseSplitOverChannelsPass::run() {
}
void SubstituteScaleShiftBroadCastPass::run() {
std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
for (auto & l : *pLayers) {
LayerInfo layerInfo(l);
@ -1172,12 +1173,20 @@ void SubstituteScaleShiftBroadCastPass::run() {
THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
}
if (insData->getDims().size() <= 2) {
bool was_reshaped = reshaped_data.count(insData->getName()) != 0;
InferenceEngine::SizeVector dataDims;
if (was_reshaped) {
dataDims = reshaped_data[insData->getName()];
} else {
dataDims = insData->getDims();
}
if (dataDims.size() <= 2) {
// NC or C cannot do broadcast
continue;
}
auto batchSize = insData->getDims()[0];
auto nElements = product(begin(insData->getDims()), end(insData->getDims())) / batchSize;
auto batchSize = dataDims[0];
auto nElements = product(begin(dataDims), end(dataDims)) / batchSize;
auto weightsElements = scaleShift->_weights->size();
auto weightsBytes = scaleShift->_weights->byteSize();
@ -1186,12 +1195,12 @@ void SubstituteScaleShiftBroadCastPass::run() {
}
// only 3d scaleshift supported where number of c is arbitrary
auto lastD = insData->getDims()[insData->getDims().size() - 1];
auto lastD = dataDims[dataDims.size() - 1];
if (lastD != weightsElements) {
THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
<< " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
}
if (insData->getDims().size() == 2) {
if (dataDims.size() == 2) {
THROW_GNA_EXCEPTION << "For layer: " << l->name
<< " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
}
@ -1212,7 +1221,10 @@ void SubstituteScaleShiftBroadCastPass::run() {
// currently data type no providing reshape method of tensor desc
scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
insData->reshape({batchSize, nElements}, Layout::NC);
if (!was_reshaped) {
reshaped_data[insData->getName()] = insData->getDims();
insData->reshape({batchSize, nElements}, Layout::NC);
}
} else {
THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
<< getPassManager()->getPolicy().ScaleShiftPolicy << "using layers tiling, layer: " << l->name;
@ -1307,6 +1319,46 @@ void InsertIdentityToLSTMCellPass::run() {
}
}
void BreakFusingOfOutputLayersPass::run() {
#if GNA_LIB_VER == 1
return;
#endif
OutputsDataMap outputsMap;
this->getPassManager()->getNetwork()->getOutputsInfo(outputsMap);
for (auto layer : *pLayers) {
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
auto& output = layer->outData[output_idx];
auto& input_to = getInputTo(output);
auto output_name = output->getName();
auto is_network_output = outputsMap.find(output_name) != outputsMap.end();
// In cases that this layer is network output you cannot use identity as sole output on
// it since it will possibly be fused and layer outputs will be unavailable
if (is_network_output) {
if (input_to.size() != 1) continue;
if (!LayerInfo(input_to.begin()->second).isActivation()) continue;
CNNLayerPtr additional_output =
std::make_shared<GenericLayer>(LayerParams({output_name + "_side_identity", "identity", InferenceEngine::Precision::FP32}));
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
auto additional_output_quant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(additional_output) : additional_output;
additional_output_quant->insData.resize(1);
additional_output_quant->outData.resize(1);
auto out_data = DataPtr(new Data(output_name + "_side_identity_data", output->getTensorDesc()));
getCreatorLayer(out_data) = additional_output_quant;
additional_output_quant->outData[0] = out_data;
input_to[additional_output_quant->name] = additional_output_quant;
additional_output_quant->insData[0] = output;
}
}
}
}
void UnrollLSTMCellPass::run() {
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
if (rnn.clip != 0.0f)

View File

@ -144,6 +144,12 @@ DECL_PASS(InsertConcatAligningFilter);
*/
DECL_PASS(ReorderConcatInputs);
/**
* @brief in cases that network output layer is connected to only one layer which is activation additional identity is inserted
* so the operation is not fused with the activation allowing to get te results from said layer
*/
DECL_PASS(BreakFusingOfOutputLayers);
/**
* @brief insert identity at the output of LSTMCell which fixes cases where data is not propagated correctly through network
* and LSTMCell returns all zeroes

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <subgraph_tests/get_output_before_activation.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
namespace {
std::vector<size_t> input_sizes = {
80,
32,
64,
100
};
std::vector<midOutputType> midLayerTypes {
midOutputType::Mul,
midOutputType::Sub,
midOutputType::Sum
};
std::map<std::string, std::string> additional_config = {};
} // namespace
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(midLayerTypes),
::testing::Values(additional_config)),
OutputBeforeActivation::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,37 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <subgraph_tests/get_output_before_activation.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
namespace {
std::vector<size_t> input_sizes = {
80,
32,
64,
100
};
std::vector<midOutputType> midLayerTypes {
midOutputType::Mul,
midOutputType::Sub,
midOutputType::Sum
};
std::map<std::string, std::string> additional_config = {
{"GNA_COMPACT_MODE", "NO"},
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_SCALE_FACTOR_0", "1638.4"},
{"GNA_SCALE_FACTOR_1", "1638.4"},
};
} // namespace
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(midLayerTypes),
::testing::Values(additional_config)),
OutputBeforeActivation::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <subgraph_tests/get_output_before_activation.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
namespace {
std::vector<size_t> input_sizes = {
80,
32,
64,
100
};
std::vector<midOutputType> midLayerTypes {
midOutputType::Mul,
midOutputType::Sub,
midOutputType::Sum
};
std::map<std::string, std::string> additional_config = {};
} // namespace
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(midLayerTypes),
::testing::Values(additional_config)),
OutputBeforeActivation::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,34 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "common_test_utils/test_common.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include <ie_core.hpp>
namespace SubgraphTestsDefinitions {
enum class midOutputType {
Sum,
Sub,
Mul,
};
typedef std::tuple<
std::string, // Target device name
InferenceEngine::Precision, // Network precision
size_t, // Input size
midOutputType, // Type of layer that will be an output
std::map<std::string, std::string> // Configuration
> outputBeforeActivationParams;
std::ostream& operator<< (std::ostream& os, const midOutputType& oType);
class OutputBeforeActivation : public LayerTestsUtils::LayerTestsCommon,
public testing::WithParamInterface<outputBeforeActivationParams> {
protected:
void SetUp() override;
public:
static std::string getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams> &obj);
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
};
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,96 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <ie_core.hpp>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include <ie_plugin_config.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "subgraph_tests/get_output_before_activation.hpp"
namespace SubgraphTestsDefinitions {
std::ostream& operator<<(std::ostream& os, const midOutputType& oType) {
switch (oType) {
case midOutputType::Sub:
return (os << "Sub");
case midOutputType::Sum:
return (os << "Sum");
case midOutputType::Mul:
return (os << "Mul");
default:
return (os << "Unknown");
}
}
std::string OutputBeforeActivation::getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams>& obj) {
std::string targetDevice;
InferenceEngine::Precision netPrecision;
size_t inputSize;
midOutputType outputType;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, outputType, config) = obj.param;
std::ostringstream result;
result << "netPrecision=" << netPrecision.name() << "_";
result << "IS=" << inputSize << "_";
result << "OutputType=" << outputType << "_";
result << "targetDevice=" << targetDevice;
return result.str();
}
void OutputBeforeActivation::SetUp() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
midOutputType outputType;
std::tie(targetDevice, netPrecision, inputSize, outputType, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims, input_dims});
auto input0 = input_parameter[0];
auto input1 = input_parameter[1];
ngraph::OutputVector outputs;
std::shared_ptr<ngraph::Node> midLayer;
switch (outputType) {
case SubgraphTestsDefinitions::midOutputType::Sum: {
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::ADD);
break;
}
case SubgraphTestsDefinitions::midOutputType::Sub: {
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::SUBTRACT);
break;
}
case SubgraphTestsDefinitions::midOutputType::Mul: {
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::MULTIPLY);
break;
}
default:
GTEST_FAIL() << "Unknown midOutputType";
}
auto act = ngraph::builder::makeActivation(midLayer, ngPrc, ngraph::helpers::ActivationTypes::Tanh);
outputs.insert(outputs.end(), {midLayer, act});
function = std::make_shared<ngraph::Function>(outputs, input_parameter, "output_before_activation");
}
InferenceEngine::Blob::Ptr OutputBeforeActivation::GenerateInput(const InferenceEngine::InputInfo &info) const {
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 2, -1, 100);
}
TEST_P(OutputBeforeActivation, CompareWithRefs) {
Run();
};
} // namespace SubgraphTestsDefinitions

View File

@ -16,7 +16,6 @@ TEST_P(LSTMCellTestBase, GNA_sw_fp32_single_lstm_test) {
}
TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
DISABLE_TEST_ON_GNA2
runSingleLSTMTest( {
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_COMPACT_MODE", "NO"},
@ -28,7 +27,6 @@ TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
}
TEST_P(LSTMCellTestBase, GNA_I8_single_lstm_test) {
DISABLE_TEST_ON_GNA2
runSingleLSTMTest({
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_COMPACT_MODE", "NO"},