LSTMCell test [GNA] LSTMCell fix for GNA (#2216)

This commit is contained in:
Kamil Magierski 2020-09-14 16:29:45 +02:00 committed by GitHub
parent 6d90eedbd2
commit db5aa551af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 539 additions and 84 deletions

View File

@ -430,9 +430,8 @@ void GNAPluginNS::backend::AMIntelDNN::Propagate() {
break;
case kDnnCopyOp:ApplyCopy(comp);
break;
default:fprintf(stderr, "Bad operation in Propagate!\n");
throw -1;
break;
default:
THROW_GNA_EXCEPTION << "Bad operation in Propagate : " << comp->operation;
}
// PrintOutputs(i); fflush(stdout);
}

View File

@ -200,22 +200,6 @@ void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) {
}
}
bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) {
bool isCompatible = true;
// compare basic structures to see if they are compatible
if (dnn1.num_components() != dnn2.num_components()) isCompatible = false;
for (int i = 0; i < dnn1.num_components(); i++) {
if (dnn1.component[i].num_rows_in != dnn2.component[i].num_rows_in) isCompatible = false;
if (dnn1.component[i].num_columns_in != dnn2.component[i].num_columns_in) isCompatible = false;
if (dnn1.component[i].num_rows_out != dnn2.component[i].num_rows_out) isCompatible = false;
if (dnn1.component[i].num_columns_out != dnn2.component[i].num_columns_out) isCompatible = false;
if (dnn1.component[i].operation != dnn2.component[i].operation) isCompatible = false;
}
return (isCompatible);
}
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
error->num_scores = 0;
error->num_errors = 0;

View File

@ -65,7 +65,6 @@ void ApplyTranspose(intel_dnn_component_t *component);
void ApplyCopy(intel_dnn_component_t *component);
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2);
void ClearScoreError(intel_score_error_t *error);
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);

View File

@ -8,32 +8,62 @@
#include <ie_common.h>
#include <legacy/ie_layers.h>
#include <iomanip>
#include <details/caseless.hpp>
#include <layers/gna_copy_layer.hpp>
#include "backend/dnn_types.h"
#include "dnn_components.hpp"
using namespace GNAPluginNS;
using namespace GNAPluginNS::backend;
intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
components.emplace_back(layerName, intel_dnn_component_t());
auto &currentComponent = components.back().second;
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
delayedOperations += isDelayed ? 1 : 0;
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
auto &currentComponent = components.back().dnnComponent;
#ifdef PLOT
currentComponent.original_layer_name = components.back().first.c_str();
currentComponent.original_layer_name = components.back().name.c_str();
std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
#endif
int execOrder = 0;
if (!isDelayed) {
execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
} else {
// todo: not perfect - propose to create mapping table that will be printed out by extra request
execOrder = - static_cast<int>(delayedOperations);
}
gnalog() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl;
return currentComponent;
return currentComponent;
}
intel_dnn_component_t * backend::DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
intel_dnn_component_t * DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
auto component = std::find_if(begin(components),
end(components),
[&](storage_type ::value_type &comp) {
return comp.first == __layer->name;
return comp.name == __layer->name;
});
// check for generic prev layer
if (component != components.end()) {
return &component->second;
return &component->dnnComponent;
}
return nullptr;
}
std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
std::vector<intel_dnn_component_t> result(components.size());
uint32_t direct_id = 0;
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
for (auto &&c : components) {
uint32_t &id = c.isDelayed ? delayed_id : direct_id;
result[id] = c.dnnComponent;
id++;
}
return result;
}

View File

@ -12,11 +12,21 @@
namespace GNAPluginNS {
namespace backend {
struct DnnComponentExtra {
std::string name;
intel_dnn_component_t dnnComponent;
bool isDelayed;
DnnComponentExtra(std::string name,
intel_dnn_component_t dnnComponent,
bool isDelayed) :
name(name), dnnComponent(dnnComponent), isDelayed(isDelayed) {}
};
/**
* maps layer name to dnn.component, in topological sort prev nodes will be initialized
* maps layer name to dnn.component, in topological order, or execution order
*/
struct DnnComponents {
using storage_type = std::list<std::pair<std::string, intel_dnn_component_t>>;
using storage_type = std::list<DnnComponentExtra>;
storage_type components;
/**
* @brief initializes new empty intel_dnn_component_t object
@ -30,6 +40,14 @@ struct DnnComponents {
* @return
*/
intel_dnn_component_t * findComponent(InferenceEngine::CNNLayerPtr layer);
/**
* @brief extract components in execution order
*/
std::vector<intel_dnn_component_t> getExecutionOrder();
private:
uint32_t delayedOperations = 0;
};
} // namespace backend
} // namespace GNAPluginNS

View File

@ -706,7 +706,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
void* ptr_outputs = nullptr;
auto orientation = kDnnInterleavedOrientation;
auto& currentComponent = dnnComponents.addComponent(layer->name, "copy");
auto &currentComponent = dnnComponents.addComponent(layer->name, layer->type);
dnn->InitCopyComponent(currentComponent,
orientation,
@ -1295,7 +1295,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto orientation = kDnnInterleavedOrientation;
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", "copy");
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", CopyLayerName);
dnn->InitCopyComponent(copyComponent,
orientation,
@ -1774,7 +1774,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
{{"Reshape"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
{{"Squeeze"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
{{"Crop"}, CREATE(CropPrimitive)},
{{"Copy"}, CREATE(CopyPrimitive)},
{{CopyLayerName}, CREATE(CopyPrimitive)},
{{DelayedCopyLayerName}, CREATE(CopyPrimitive)},
{{"TensorIterator"}, SKIP},
{{"LSTMCell"}, SKIP}
};
@ -1786,7 +1787,17 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
}
}
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, size_t num_data_bytes_out) {
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
size_t num_data_bytes_out) {
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
int32_t output_offset = 0;
if (layer->params.find("output_offset") != layer->params.end()) {
output_offset = layer->GetParamAsInt("output_offset");
}
return output_offset;
};
gnalog() << "Connecting output " << layer->name << " ...\n";
// in case of Memory Layer it's input allocated in meminput layer
if (layer->outData.size() == 1) {
@ -1816,12 +1827,12 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
} else {
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
}
return;
}
@ -2073,7 +2084,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
} else {
if (num_data_bytes_in > memorySize) {
if (num_data_bytes_in > memorySize - offset) {
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize;
}

View File

@ -362,7 +362,9 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
passes->registerPass<RemoveConstPass>();
passes->registerPass<UnrollTIPass>();
passes->registerPass<RemoveConstPass>();
passes->registerPass<InsertIdentityToLSTMCellPass>();
passes->registerPass<UnrollLSTMCellPass>();
passes->registerPass<RemoveSingleInputConcatPass>();
passes->registerPass<SubstitutePReluPass>();
passes->registerPass<SubstituteSoftSignPass>();
@ -556,15 +558,15 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
auto irLayerAvatar = std::find_if(
graphCompiler.dnnComponents.components.begin(),
graphCompiler.dnnComponents.components.end(),
[&layer](std::pair<std::string, intel_dnn_component_t> & value) {
return value.first == layer->name;
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
return value.name == layer->name;
});
gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
// probing gna_primitives
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
initOutput(portId, irLayerAvatar->second, layer);
initOutput(portId, irLayerAvatar->dnnComponent, layer);
stopSearching = true;
}
@ -620,9 +622,8 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
1);
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
for (auto &element : graphCompiler.dnnComponents.components) {
dnn->component.push_back(element.second);
}
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
// in fp32 mode last PWL cannot be computed without that
dnn->InitActiveList(NULL);

View File

@ -0,0 +1,17 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
namespace GNAPluginNS {
/**
* GNA primitive created in sorting order for this copy layer
*/
static constexpr auto CopyLayerName = "Copy";
/**
* GNA primitive created at the end of primitives sequence
*/
static constexpr auto DelayedCopyLayerName = "DelayedCopy";
} // namespace GNAPluginNS

View File

@ -13,6 +13,7 @@
#include "backend/gna_types.h"
#include "gna_permute.hpp"
#include "gna_lib_ver_selector.hpp"
#include "gna_copy_layer.hpp"
namespace GNAPluginNS {
@ -201,13 +202,13 @@ class LayerInfo {
return isOfType("concat");
}
bool isNonFunctional() const noexcept {
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze");
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
}
bool isPermute() const noexcept {
return isOfType("permute");
}
// @brief this not only mathematically trivial, has some WA for kaldi case
bool isTrivialPermute() {
bool isTrivialPermute() const {
if (!isPermute()) return false;
auto layerOrder = layer->GetParamAsInts("order");
@ -269,8 +270,13 @@ class LayerInfo {
return false;
}
bool isCopy() const noexcept {
return isOfType("copy");
return isOfType(CopyLayerName) || isOfType(DelayedCopyLayerName);
}
bool isCopyDelayed() const noexcept {
return isOfType(DelayedCopyLayerName);
}
size_t paddingSize() const {
static InferenceEngine::details::caseless_set<std::string> layersWithPossiblePadding = {"FullyConnected",
"InnerProduct",

View File

@ -23,6 +23,7 @@
#include <legacy/ie_util_internal.hpp>
#include <legacy/graph_tools.hpp>
#include <legacy/net_pass.h>
#include <layers/gna_copy_layer.hpp>
#include "gna_plugin_log.hpp"
#include "frontend/quantized_layer_params.hpp"
@ -47,6 +48,7 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
}
// indexes stored in pass manager
static const char identityLayersCounterName[] = "identityLayerCounter";
static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
static const char copyLayersCounter[] = "numCopyLayers";
static const char softSignLayersCounter[] = "numSoftSignLayers";
@ -94,12 +96,13 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
* @brief copy layer inserted by several passes
* @returns pointer to newly created COPYLayer
*/
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx, std::shared_ptr<IPassManager> passmanager) {
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
std::shared_ptr<IPassManager> passmanager, std::string copyLayerType) {
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
std::string copyName = std::string("copy_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, copyLayerType, Precision::FP32}));
auto inputData = nextLayer->insData[beforeIdx].lock();
auto dataPtr = std::make_shared<Data>(copyName, inputData->getTensorDesc());
@ -124,7 +127,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
auto PrevFunctionalLayer = [](CNNLayerPtr l, int idx = 0) {
auto prevLayer = CNNNetPrevLayerSkipCertain(l, idx, [](CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonFunctional();
});
});
gnalog() << "CNNNetPrevLayerSkipCertain for :: " << l->name << "returned: " << prevLayer->name << std::endl;
return prevLayer;
};
@ -148,35 +151,35 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
auto prev1 = PrevFunctionalLayer(l, 1);
switch (eltwise->_operation) {
case EltwiseLayer::Sub:
case EltwiseLayer::Sum:
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
return prevLayers;
}
// TODO: whether there are possibility to select after what layer identity gets inserted
prevLayers.push_back(CNNNetPrevLayer(l, 0));
break;
case EltwiseLayer::Prod: {
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
return prevLayers;
}
if (LayerInfo(prev0).has32BOutput()) {
prevLayers.push_back(CNNNetPrevLayer(l, 0));
}
// if layers of outdata are different
auto prevData0 = l->insData[0].lock();
auto prevData1 = l->insData[1].lock();
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
prevLayers.push_back(CNNNetPrevLayer(l, 1));
}
break;
case EltwiseLayer::Sub:
case EltwiseLayer::Sum:
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
return prevLayers;
}
default :
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
// TODO: whether there are possibility to select after what layer identity gets inserted
prevLayers.push_back(CNNNetPrevLayer(l, 0));
break;
case EltwiseLayer::Prod: {
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
return prevLayers;
}
if (LayerInfo(prev0).has32BOutput()) {
prevLayers.push_back(CNNNetPrevLayer(l, 0));
}
// if layers of outdata are different
auto prevData0 = l->insData[0].lock();
auto prevData1 = l->insData[1].lock();
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
prevLayers.push_back(CNNNetPrevLayer(l, 1));
}
break;
}
default :
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
}
} else if (concat != nullptr) {
for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
@ -624,12 +627,12 @@ void RemovePermutationsNHWCToNCHWPass::run() {
}
void InsertIdentityLayerPass::run() {
int numOfIdentityLayers = 0;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
for (auto & l : *pLayers) {
for (auto && prev : getCandidatesForIdentityInsertion(l)) {
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
// actual insertion
auto activationName = std::string("identity_") + std::to_string(++numOfIdentityLayers);
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
@ -692,27 +695,34 @@ void InsertCopyLayerPass::run() {
for (int i=0; i != prevLayers.size(); i++) {
auto & prevIndirectLayer = prevLayers[i].first;
bool bInsert = false;
/// Delayed copy layers need to be moved to the very end of processing
bool bInsertDelayed = false;
auto isInserted = [&bInsertDelayed, &bInsert]() {
return bInsert || bInsertDelayed;
};
if (LayerInfo(l).isMemory()) {
if (LayerInfo(prevIndirectLayer).isConcat()) { bInsert = true;}
if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()) { bInsertDelayed = true;}
// memory usualy preceded by either activation or split, or other layers in order to have 2b precision
for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
// if preceding layer is common for memory and concat
if (LayerInfo(inputto.second).isConcat()) {
bInsert = true;
bInsertDelayed = true;
break;
}
}
}
if (LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
if (bInsert) {
if (isInserted()) {
if (LayerInfo(prevIndirectLayer).isCropAffined()) {
// The crop will be replaced by affine.
// Copy layer insertion is not required
continue;
}
auto prevLayer = CNNNetPrevLayer(l, i);
InsertCopyLayer(prevLayer, l, i, getPassManager());
InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
}
}
}
@ -1253,6 +1263,48 @@ void BroadcastConstPass::run() {
}
}
void InsertIdentityToLSTMCellPass::run() {
for (auto layer : *pLayers) {
if (layer->type == "LSTMCell") {
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
// which results in scratch buffer being used so outputs cannot be used in form of blob or by non-functional layers
// downside is scaling down from i32 to i16 which may
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
int numOfIdentityLayers = ((this->getPassManager())->getIntVar(identityLayersCounterName))++;
auto activationName = std::string("lstm_identity_") + std::to_string(numOfIdentityLayers);
auto& output = layer->outData[output_idx];
auto& input_to = getInputTo(output);
CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", InferenceEngine::Precision::FP32}));
auto dataPtr = std::make_shared<Data>("lstm_identity_data_" + std::to_string(numOfIdentityLayers), output->getTensorDesc());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
auto activationLayerWithQuant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) : activationLayer;
getCreatorLayer(dataPtr) = activationLayerWithQuant;
activationLayerWithQuant->outData.push_back(dataPtr);
activationLayerWithQuant->insData.push_back(output);
auto& activationInputTo = getInputTo(dataPtr);
for (auto& input : input_to) {
auto& next_layer = input.second;
activationInputTo[input.first] = next_layer;
for (int i = next_layer->insData.size() -1; i>= 0; i--) {
auto ins = next_layer->insData[i].lock();
if (ins == output) {
next_layer->insData.erase(next_layer->insData.begin() + i);
}
}
next_layer->insData.push_back(dataPtr);
}
input_to.clear();
input_to[activationName] = activationLayerWithQuant;
}
}
}
}
void UnrollLSTMCellPass::run() {
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
if (rnn.clip != 0.0f)
@ -1286,6 +1338,33 @@ void RemoveConstPass::run() {
transformer.fullTrim();
}
void RemoveSingleInputConcatPass::run() {
for (auto &l : *pLayers) {
if (l->type == "Concat") {
auto concat = dynamic_cast<ConcatLayer*>(l.get());
if (concat->insData.size() == 1 && concat->outData.size() > 0) {
auto in = concat->insData[0];
auto in_layer = getCreatorLayer(in.lock());
auto out = concat->outData[0];
for (auto out_layer : getInputTo(out)) {
for (int i = 0; i < out_layer.second->insData.size(); i++) {
if (out_layer.second->insData[i].lock() == out) {
out_layer.second->insData[i] = in;
getInputTo(in.lock())[out_layer.second->name] = out_layer.second;
}
}
}
getInputTo(in.lock()).erase(concat->name);
getInputTo(out).clear();
concat->insData.clear();
concat->outData.clear();
}
}
}
}
void FuseMultipleIdentitiesPass::run() {
for (auto &l : *pLayers) {
if (l->insData.empty()) continue;

View File

@ -144,6 +144,8 @@ DECL_PASS(InsertConcatAligningFilter);
*/
DECL_PASS(ReorderConcatInputs);
DECL_PASS_BEFORE_COPY(InsertIdentityToLSTMCell);
/**
* @brief unrolled LSTM cell layer in supported GNA primitives
*/
@ -159,6 +161,10 @@ DECL_PASS_BEFORE_COPY(UnrollTI);
*/
DECL_PASS_BEFORE_COPY(RemoveConst);
/**
*/
DECL_PASS_BEFORE_COPY(RemoveSingleInputConcat);
/**
* @brief removed extra identity layer for multi-output
*/

View File

@ -581,6 +581,12 @@ bool unrollTI(CNNLayerPtr cur, ICNNNetwork& net) {
auto& rule = first_class[i];
auto out_data = ti->outData[rule.from];
if (num == 1) {
getInputTo(body_list[0].outputs[rule.to]) = getInputTo(out_data);
getInputTo(body_list[0].outputs[rule.to]).begin()->second->insData[0] = body_list[0].outputs[rule.to];
continue;
}
std::string name = ti->name + ":out_concat_" + std::to_string(i);
auto concat = std::make_shared<ConcatLayer>(LayerParams {name, "Concat", cur->precision});
concat->_axis = rule.axis;

View File

@ -0,0 +1,37 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <subgraph_tests/memory_LSTMCell.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
std::vector<size_t> input_sizes = {
80,
32,
64,
100,
25
};
std::vector<size_t> hidden_sizes = {
128,
200,
300,
24,
32,
};
std::map<std::string, std::string> additional_config = {
{"GNA_COMPACT_MODE", "NO"},
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_SCALE_FACTOR_0", "1638.4"},
};
INSTANTIATE_TEST_CASE_P(MemoryLSTMCellTest, MemoryLSTMCellTest,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(hidden_sizes),
::testing::Values(additional_config)),
MemoryLSTMCellTest::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,37 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "common_test_utils/test_common.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include <ie_core.hpp>
namespace SubgraphTestsDefinitions {
typedef std::tuple<
std::string, // Target device name
InferenceEngine::Precision, // Network precision
size_t, // Input size
size_t, // Hidden size
std::map<std::string, std::string> // Configuration
> memoryLSTMCellParams;
class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
public testing::WithParamInterface<memoryLSTMCellParams> {
private:
// you have to Unroll TI manually and remove memory untill ngraph supports it
void switchToNgraphFriendlyModel();
// since we switching models we need to generate and save weights biases and inputs in SetUp
std::vector<float> input_bias;
std::vector<float> input_weights;
std::vector<float> hidden_memory_init;
std::vector<float> cell_memory_init;
std::vector<float> weights_vals;
std::vector<float> reccurrenceWeights_vals;
std::vector<float> bias_vals;
protected:
void SetUp() override;
void Run() override;
public:
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
};
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,225 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include <functional>
#include "ie_core.hpp"
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "functional_test_utils/precision_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include <transformations/lstm_cell_decomposition.hpp>
#include "subgraph_tests/memory_LSTMCell.hpp"
namespace SubgraphTestsDefinitions {
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
std::string targetDevice;
InferenceEngine::Precision netPrecision;
size_t inputSize;
size_t hiddenSize;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
std::ostringstream result;
result << "netPrecision=" << netPrecision.name() << "_";
result << "IS=" << inputSize << "_";
result << "HS=" << hiddenSize << "_";
result << "targetDevice=" << targetDevice;
return result.str();
}
size_t hiddenSize;
void MemoryLSTMCellTest::SetUp() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
std::vector<size_t> squeeze_axes {0};
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
const int seed = 0;
std::mt19937 gen(static_cast<float>(seed));
auto generateFloatNumbers = [gen](std::size_t vec_len, float min, float max) mutable {
std::vector<float> res;
std::uniform_real_distribution<float> dist(min, max);
for (int i = 0; i < vec_len; i++)
res.emplace_back(static_cast<float>(dist(gen)));
return res;
};
input_bias = generateFloatNumbers(inputSize, -0.25f, 0.0f);
input_weights = generateFloatNumbers(inputSize, 0.0f, 0.15f);
hidden_memory_init = generateFloatNumbers(hiddenSize, -0.2f, 0.2f);
cell_memory_init = generateFloatNumbers(hiddenSize, -0.2f, 0.2f);
weights_vals = generateFloatNumbers(4 * hiddenSize * inputSize, -0.1f, 0.1f);
reccurrenceWeights_vals = generateFloatNumbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
bias_vals = generateFloatNumbers(4 * hiddenSize, -0.25f, 0.15f);
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::op::Unsqueeze>(mul, unsqueeze_input_const);
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_read = std::make_shared<ngraph::op::ReadValue>(cell_memory_constant, "cell_memory");
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_read = std::make_shared<ngraph::op::ReadValue>(hidden_memory_constant, "hidden_memory");
// Body - inputs
auto X = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
auto H_t = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
// Body - layers
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::op::Squeeze>(X, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(lstm->output(0), unsqueeze_const);
// body - outputs
auto H_o = lstm->output(0);
auto C_o = lstm->output(1);
auto unsqueeze_o = unsqueeze->output(0);
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
// TI construction
auto tensor_iterator = std::make_shared<ngraph::op::TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_invariant_input(X, permute_in);
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
tensor_iterator->set_merged_input(C_t, cell_memory_read, C_o);
auto out_unsqueeze = tensor_iterator->get_iter_value(unsqueeze_o, -1);
auto out_hidden = tensor_iterator->get_iter_value(H_o, -1);
auto out_cell = tensor_iterator->get_iter_value(C_o, -1);
out_hidden.get_tensor().set_element_type(ngPrc);
out_cell.get_tensor().set_element_type(ngPrc);
auto cell_memory_write = std::make_shared<ngraph::op::Assign>(out_cell, "cell_memory");
auto hidden_memory_write = std::make_shared<ngraph::op::Assign>(out_hidden, "hidden_memory");
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(out_unsqueeze, final_reshape_pattern, false);
cell_memory_write->add_control_dependency(cell_memory_read);
final_reshape->add_control_dependency(cell_memory_write);
hidden_memory_write->add_control_dependency(hidden_memory_read);
final_reshape->add_control_dependency(hidden_memory_write);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
}
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
std::vector<size_t> squeeze_axes {0};
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::op::Unsqueeze>(mul, unsqueeze_input_const);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
// Body - layers
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::op::Squeeze>(unsqueeze_input, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(unsqueeze, final_reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
}
void MemoryLSTMCellTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ConfigurePlugin();
LoadNetwork();
auto states = executableNetwork.QueryState();
for (auto& state : states) {
auto name = state.GetName();
if (name == "cell_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
cell_memory_init.data(), cell_memory_init.size());
state.SetState(blob);
} else if (name == "hidden_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
hidden_memory_init.data(), hidden_memory_init.size());
state.SetState(blob);
} else {
GTEST_FAIL() << "unknown memory state";
}
}
Infer();
switchToNgraphFriendlyModel();
Validate();
}
TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
Run();
};
} // namespace SubgraphTestsDefinitions