[GNA] Fix LSTM Cell channel C being 0 on output (#1174)
* [GNA] get output before activation test [GNA] SubstituteScaleShiftBroadCastPass fix for cases when there are multiple scaleshifts as an output from the layer [GNA] Generalize Fix where LSTMCell output was zero due to being fused into activation [GNA] Fix LSTMCell being zero on channel C if being output layer * linux build fix
This commit is contained in:
parent
de1cc8af2e
commit
8abdc32676
@ -383,6 +383,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
}
|
||||
passes->registerPass<InsertIdentityLayerPass>();
|
||||
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
||||
passes->registerPass<InsertCopyLayerPass>();
|
||||
passes->registerPass<InsertDiagonalLayerPass>();
|
||||
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
||||
|
@ -1158,6 +1158,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
}
|
||||
|
||||
void SubstituteScaleShiftBroadCastPass::run() {
|
||||
std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
|
||||
for (auto & l : *pLayers) {
|
||||
LayerInfo layerInfo(l);
|
||||
|
||||
@ -1172,12 +1173,20 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
|
||||
}
|
||||
|
||||
if (insData->getDims().size() <= 2) {
|
||||
bool was_reshaped = reshaped_data.count(insData->getName()) != 0;
|
||||
InferenceEngine::SizeVector dataDims;
|
||||
if (was_reshaped) {
|
||||
dataDims = reshaped_data[insData->getName()];
|
||||
} else {
|
||||
dataDims = insData->getDims();
|
||||
}
|
||||
|
||||
if (dataDims.size() <= 2) {
|
||||
// NC or C cannot do broadcast
|
||||
continue;
|
||||
}
|
||||
auto batchSize = insData->getDims()[0];
|
||||
auto nElements = product(begin(insData->getDims()), end(insData->getDims())) / batchSize;
|
||||
auto batchSize = dataDims[0];
|
||||
auto nElements = product(begin(dataDims), end(dataDims)) / batchSize;
|
||||
auto weightsElements = scaleShift->_weights->size();
|
||||
auto weightsBytes = scaleShift->_weights->byteSize();
|
||||
|
||||
@ -1186,12 +1195,12 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
}
|
||||
|
||||
// only 3d scaleshift supported where number of c is arbitrary
|
||||
auto lastD = insData->getDims()[insData->getDims().size() - 1];
|
||||
auto lastD = dataDims[dataDims.size() - 1];
|
||||
if (lastD != weightsElements) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
|
||||
<< " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
|
||||
}
|
||||
if (insData->getDims().size() == 2) {
|
||||
if (dataDims.size() == 2) {
|
||||
THROW_GNA_EXCEPTION << "For layer: " << l->name
|
||||
<< " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
|
||||
}
|
||||
@ -1212,7 +1221,10 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
|
||||
// currently data type no providing reshape method of tensor desc
|
||||
scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
|
||||
insData->reshape({batchSize, nElements}, Layout::NC);
|
||||
if (!was_reshaped) {
|
||||
reshaped_data[insData->getName()] = insData->getDims();
|
||||
insData->reshape({batchSize, nElements}, Layout::NC);
|
||||
}
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
|
||||
<< getPassManager()->getPolicy().ScaleShiftPolicy << "using layers tiling, layer: " << l->name;
|
||||
@ -1307,6 +1319,46 @@ void InsertIdentityToLSTMCellPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void BreakFusingOfOutputLayersPass::run() {
|
||||
#if GNA_LIB_VER == 1
|
||||
return;
|
||||
#endif
|
||||
OutputsDataMap outputsMap;
|
||||
this->getPassManager()->getNetwork()->getOutputsInfo(outputsMap);
|
||||
for (auto layer : *pLayers) {
|
||||
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
|
||||
auto& output = layer->outData[output_idx];
|
||||
auto& input_to = getInputTo(output);
|
||||
|
||||
auto output_name = output->getName();
|
||||
auto is_network_output = outputsMap.find(output_name) != outputsMap.end();
|
||||
// In cases that this layer is network output you cannot use identity as sole output on
|
||||
// it since it will possibly be fused and layer outputs will be unavailable
|
||||
if (is_network_output) {
|
||||
if (input_to.size() != 1) continue;
|
||||
if (!LayerInfo(input_to.begin()->second).isActivation()) continue;
|
||||
|
||||
CNNLayerPtr additional_output =
|
||||
std::make_shared<GenericLayer>(LayerParams({output_name + "_side_identity", "identity", InferenceEngine::Precision::FP32}));
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
auto additional_output_quant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(additional_output) : additional_output;
|
||||
|
||||
additional_output_quant->insData.resize(1);
|
||||
additional_output_quant->outData.resize(1);
|
||||
|
||||
auto out_data = DataPtr(new Data(output_name + "_side_identity_data", output->getTensorDesc()));
|
||||
getCreatorLayer(out_data) = additional_output_quant;
|
||||
|
||||
additional_output_quant->outData[0] = out_data;
|
||||
|
||||
input_to[additional_output_quant->name] = additional_output_quant;
|
||||
additional_output_quant->insData[0] = output;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnrollLSTMCellPass::run() {
|
||||
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
|
||||
if (rnn.clip != 0.0f)
|
||||
|
@ -144,6 +144,12 @@ DECL_PASS(InsertConcatAligningFilter);
|
||||
*/
|
||||
DECL_PASS(ReorderConcatInputs);
|
||||
|
||||
/**
|
||||
* @brief in cases that network output layer is connected to only one layer which is activation additional identity is inserted
|
||||
* so the operation is not fused with the activation allowing to get te results from said layer
|
||||
*/
|
||||
DECL_PASS(BreakFusingOfOutputLayers);
|
||||
|
||||
/**
|
||||
* @brief insert identity at the output of LSTMCell which fixes cases where data is not propagated correctly through network
|
||||
* and LSTMCell returns all zeroes
|
||||
|
@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <subgraph_tests/get_output_before_activation.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
64,
|
||||
100
|
||||
};
|
||||
|
||||
std::vector<midOutputType> midLayerTypes {
|
||||
midOutputType::Mul,
|
||||
midOutputType::Sub,
|
||||
midOutputType::Sum
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
} // namespace
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(midLayerTypes),
|
||||
::testing::Values(additional_config)),
|
||||
OutputBeforeActivation::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <subgraph_tests/get_output_before_activation.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
64,
|
||||
100
|
||||
};
|
||||
|
||||
std::vector<midOutputType> midLayerTypes {
|
||||
midOutputType::Mul,
|
||||
midOutputType::Sub,
|
||||
midOutputType::Sum
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {
|
||||
{"GNA_COMPACT_MODE", "NO"},
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"},
|
||||
{"GNA_SCALE_FACTOR_1", "1638.4"},
|
||||
};
|
||||
} // namespace
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(midLayerTypes),
|
||||
::testing::Values(additional_config)),
|
||||
OutputBeforeActivation::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <subgraph_tests/get_output_before_activation.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
64,
|
||||
100
|
||||
};
|
||||
|
||||
std::vector<midOutputType> midLayerTypes {
|
||||
midOutputType::Mul,
|
||||
midOutputType::Sub,
|
||||
midOutputType::Sum
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
} // namespace
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OutputBeforeActivation, OutputBeforeActivation,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(midLayerTypes),
|
||||
::testing::Values(additional_config)),
|
||||
OutputBeforeActivation::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include <ie_core.hpp>
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
enum class midOutputType {
|
||||
Sum,
|
||||
Sub,
|
||||
Mul,
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
std::string, // Target device name
|
||||
InferenceEngine::Precision, // Network precision
|
||||
size_t, // Input size
|
||||
midOutputType, // Type of layer that will be an output
|
||||
std::map<std::string, std::string> // Configuration
|
||||
> outputBeforeActivationParams;
|
||||
|
||||
std::ostream& operator<< (std::ostream& os, const midOutputType& oType);
|
||||
|
||||
class OutputBeforeActivation : public LayerTestsUtils::LayerTestsCommon,
|
||||
public testing::WithParamInterface<outputBeforeActivationParams> {
|
||||
protected:
|
||||
void SetUp() override;
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams> &obj);
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,96 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <ie_core.hpp>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "subgraph_tests/get_output_before_activation.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::ostream& operator<<(std::ostream& os, const midOutputType& oType) {
|
||||
switch (oType) {
|
||||
case midOutputType::Sub:
|
||||
return (os << "Sub");
|
||||
case midOutputType::Sum:
|
||||
return (os << "Sum");
|
||||
case midOutputType::Mul:
|
||||
return (os << "Mul");
|
||||
default:
|
||||
return (os << "Unknown");
|
||||
}
|
||||
}
|
||||
|
||||
std::string OutputBeforeActivation::getTestCaseName(const testing::TestParamInfo<outputBeforeActivationParams>& obj) {
|
||||
std::string targetDevice;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
size_t inputSize;
|
||||
midOutputType outputType;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, outputType, config) = obj.param;
|
||||
std::ostringstream result;
|
||||
|
||||
result << "netPrecision=" << netPrecision.name() << "_";
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "OutputType=" << outputType << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void OutputBeforeActivation::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
midOutputType outputType;
|
||||
std::tie(targetDevice, netPrecision, inputSize, outputType, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims, input_dims});
|
||||
auto input0 = input_parameter[0];
|
||||
auto input1 = input_parameter[1];
|
||||
|
||||
ngraph::OutputVector outputs;
|
||||
std::shared_ptr<ngraph::Node> midLayer;
|
||||
switch (outputType) {
|
||||
case SubgraphTestsDefinitions::midOutputType::Sum: {
|
||||
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::ADD);
|
||||
break;
|
||||
}
|
||||
case SubgraphTestsDefinitions::midOutputType::Sub: {
|
||||
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::SUBTRACT);
|
||||
break;
|
||||
}
|
||||
case SubgraphTestsDefinitions::midOutputType::Mul: {
|
||||
midLayer = ngraph::builder::makeEltwise(input0, input1, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
GTEST_FAIL() << "Unknown midOutputType";
|
||||
}
|
||||
|
||||
auto act = ngraph::builder::makeActivation(midLayer, ngPrc, ngraph::helpers::ActivationTypes::Tanh);
|
||||
outputs.insert(outputs.end(), {midLayer, act});
|
||||
function = std::make_shared<ngraph::Function>(outputs, input_parameter, "output_before_activation");
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr OutputBeforeActivation::GenerateInput(const InferenceEngine::InputInfo &info) const {
|
||||
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 2, -1, 100);
|
||||
}
|
||||
|
||||
TEST_P(OutputBeforeActivation, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -16,7 +16,6 @@ TEST_P(LSTMCellTestBase, GNA_sw_fp32_single_lstm_test) {
|
||||
}
|
||||
|
||||
TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
|
||||
DISABLE_TEST_ON_GNA2
|
||||
runSingleLSTMTest( {
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_COMPACT_MODE", "NO"},
|
||||
@ -28,7 +27,6 @@ TEST_P(LSTMCellTestBase, GNA_I16_single_lstm_test) {
|
||||
}
|
||||
|
||||
TEST_P(LSTMCellTestBase, GNA_I8_single_lstm_test) {
|
||||
DISABLE_TEST_ON_GNA2
|
||||
runSingleLSTMTest({
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_COMPACT_MODE", "NO"},
|
||||
|
Loading…
Reference in New Issue
Block a user