LSTMCell test [GNA] LSTMCell fix for GNA (#2216)
This commit is contained in:
parent
6d90eedbd2
commit
db5aa551af
@ -430,9 +430,8 @@ void GNAPluginNS::backend::AMIntelDNN::Propagate() {
|
||||
break;
|
||||
case kDnnCopyOp:ApplyCopy(comp);
|
||||
break;
|
||||
default:fprintf(stderr, "Bad operation in Propagate!\n");
|
||||
throw -1;
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Bad operation in Propagate : " << comp->operation;
|
||||
}
|
||||
// PrintOutputs(i); fflush(stdout);
|
||||
}
|
||||
|
@ -200,22 +200,6 @@ void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) {
|
||||
}
|
||||
}
|
||||
|
||||
bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) {
|
||||
bool isCompatible = true;
|
||||
|
||||
// compare basic structures to see if they are compatible
|
||||
if (dnn1.num_components() != dnn2.num_components()) isCompatible = false;
|
||||
for (int i = 0; i < dnn1.num_components(); i++) {
|
||||
if (dnn1.component[i].num_rows_in != dnn2.component[i].num_rows_in) isCompatible = false;
|
||||
if (dnn1.component[i].num_columns_in != dnn2.component[i].num_columns_in) isCompatible = false;
|
||||
if (dnn1.component[i].num_rows_out != dnn2.component[i].num_rows_out) isCompatible = false;
|
||||
if (dnn1.component[i].num_columns_out != dnn2.component[i].num_columns_out) isCompatible = false;
|
||||
if (dnn1.component[i].operation != dnn2.component[i].operation) isCompatible = false;
|
||||
}
|
||||
|
||||
return (isCompatible);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
|
||||
error->num_scores = 0;
|
||||
error->num_errors = 0;
|
||||
|
@ -65,7 +65,6 @@ void ApplyTranspose(intel_dnn_component_t *component);
|
||||
void ApplyCopy(intel_dnn_component_t *component);
|
||||
|
||||
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
|
||||
bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2);
|
||||
void ClearScoreError(intel_score_error_t *error);
|
||||
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
|
||||
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);
|
||||
|
@ -8,32 +8,62 @@
|
||||
#include <ie_common.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <iomanip>
|
||||
#include <details/caseless.hpp>
|
||||
#include <layers/gna_copy_layer.hpp>
|
||||
#include "backend/dnn_types.h"
|
||||
|
||||
#include "dnn_components.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::backend;
|
||||
|
||||
intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
components.emplace_back(layerName, intel_dnn_component_t());
|
||||
auto ¤tComponent = components.back().second;
|
||||
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
|
||||
delayedOperations += isDelayed ? 1 : 0;
|
||||
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
|
||||
auto ¤tComponent = components.back().dnnComponent;
|
||||
#ifdef PLOT
|
||||
currentComponent.original_layer_name = components.back().first.c_str();
|
||||
currentComponent.original_layer_name = components.back().name.c_str();
|
||||
std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
|
||||
#endif
|
||||
int execOrder = 0;
|
||||
if (!isDelayed) {
|
||||
execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
|
||||
} else {
|
||||
// todo: not perfect - propose to create mapping table that will be printed out by extra request
|
||||
execOrder = - static_cast<int>(delayedOperations);
|
||||
}
|
||||
|
||||
gnalog() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl;
|
||||
return currentComponent;
|
||||
return currentComponent;
|
||||
}
|
||||
|
||||
intel_dnn_component_t * backend::DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
|
||||
intel_dnn_component_t * DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
|
||||
auto component = std::find_if(begin(components),
|
||||
end(components),
|
||||
[&](storage_type ::value_type &comp) {
|
||||
return comp.first == __layer->name;
|
||||
return comp.name == __layer->name;
|
||||
});
|
||||
// check for generic prev layer
|
||||
if (component != components.end()) {
|
||||
return &component->second;
|
||||
return &component->dnnComponent;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
|
||||
std::vector<intel_dnn_component_t> result(components.size());
|
||||
|
||||
uint32_t direct_id = 0;
|
||||
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
|
||||
|
||||
for (auto &&c : components) {
|
||||
uint32_t &id = c.isDelayed ? delayed_id : direct_id;
|
||||
result[id] = c.dnnComponent;
|
||||
id++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -12,11 +12,21 @@
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace backend {
|
||||
struct DnnComponentExtra {
|
||||
std::string name;
|
||||
intel_dnn_component_t dnnComponent;
|
||||
bool isDelayed;
|
||||
DnnComponentExtra(std::string name,
|
||||
intel_dnn_component_t dnnComponent,
|
||||
bool isDelayed) :
|
||||
name(name), dnnComponent(dnnComponent), isDelayed(isDelayed) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* maps layer name to dnn.component, in topological sort prev nodes will be initialized
|
||||
* maps layer name to dnn.component, in topological order, or execution order
|
||||
*/
|
||||
struct DnnComponents {
|
||||
using storage_type = std::list<std::pair<std::string, intel_dnn_component_t>>;
|
||||
using storage_type = std::list<DnnComponentExtra>;
|
||||
storage_type components;
|
||||
/**
|
||||
* @brief initializes new empty intel_dnn_component_t object
|
||||
@ -30,6 +40,14 @@ struct DnnComponents {
|
||||
* @return
|
||||
*/
|
||||
intel_dnn_component_t * findComponent(InferenceEngine::CNNLayerPtr layer);
|
||||
|
||||
/**
|
||||
* @brief extract components in execution order
|
||||
*/
|
||||
std::vector<intel_dnn_component_t> getExecutionOrder();
|
||||
|
||||
private:
|
||||
uint32_t delayedOperations = 0;
|
||||
};
|
||||
} // namespace backend
|
||||
} // namespace GNAPluginNS
|
||||
|
@ -706,7 +706,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
void* ptr_outputs = nullptr;
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "copy");
|
||||
auto ¤tComponent = dnnComponents.addComponent(layer->name, layer->type);
|
||||
|
||||
dnn->InitCopyComponent(currentComponent,
|
||||
orientation,
|
||||
@ -1295,7 +1295,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", "copy");
|
||||
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", CopyLayerName);
|
||||
|
||||
dnn->InitCopyComponent(copyComponent,
|
||||
orientation,
|
||||
@ -1774,7 +1774,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
{{"Reshape"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
||||
{{"Squeeze"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
||||
{{"Crop"}, CREATE(CropPrimitive)},
|
||||
{{"Copy"}, CREATE(CopyPrimitive)},
|
||||
{{CopyLayerName}, CREATE(CopyPrimitive)},
|
||||
{{DelayedCopyLayerName}, CREATE(CopyPrimitive)},
|
||||
{{"TensorIterator"}, SKIP},
|
||||
{{"LSTMCell"}, SKIP}
|
||||
};
|
||||
@ -1786,7 +1787,17 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, size_t num_data_bytes_out) {
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
|
||||
int32_t output_offset = 0;
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
output_offset = layer->GetParamAsInt("output_offset");
|
||||
}
|
||||
return output_offset;
|
||||
};
|
||||
|
||||
|
||||
gnalog() << "Connecting output " << layer->name << " ...\n";
|
||||
// in case of Memory Layer it's input allocated in meminput layer
|
||||
if (layer->outData.size() == 1) {
|
||||
@ -1816,12 +1827,12 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
||||
|
||||
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
|
||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2073,7 +2084,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
||||
} else {
|
||||
if (num_data_bytes_in > memorySize) {
|
||||
if (num_data_bytes_in > memorySize - offset) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
||||
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize;
|
||||
}
|
||||
|
@ -362,7 +362,9 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<UnrollTIPass>();
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<InsertIdentityToLSTMCellPass>();
|
||||
passes->registerPass<UnrollLSTMCellPass>();
|
||||
passes->registerPass<RemoveSingleInputConcatPass>();
|
||||
|
||||
passes->registerPass<SubstitutePReluPass>();
|
||||
passes->registerPass<SubstituteSoftSignPass>();
|
||||
@ -556,15 +558,15 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
auto irLayerAvatar = std::find_if(
|
||||
graphCompiler.dnnComponents.components.begin(),
|
||||
graphCompiler.dnnComponents.components.end(),
|
||||
[&layer](std::pair<std::string, intel_dnn_component_t> & value) {
|
||||
return value.first == layer->name;
|
||||
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
|
||||
return value.name == layer->name;
|
||||
});
|
||||
|
||||
gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
|
||||
|
||||
// probing gna_primitives
|
||||
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
|
||||
initOutput(portId, irLayerAvatar->second, layer);
|
||||
initOutput(portId, irLayerAvatar->dnnComponent, layer);
|
||||
stopSearching = true;
|
||||
}
|
||||
|
||||
@ -620,9 +622,8 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
1);
|
||||
|
||||
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
|
||||
for (auto &element : graphCompiler.dnnComponents.components) {
|
||||
dnn->component.push_back(element.second);
|
||||
}
|
||||
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
|
||||
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
|
||||
|
||||
// in fp32 mode last PWL cannot be computed without that
|
||||
dnn->InitActiveList(NULL);
|
||||
|
17
inference-engine/src/gna_plugin/layers/gna_copy_layer.hpp
Normal file
17
inference-engine/src/gna_plugin/layers/gna_copy_layer.hpp
Normal file
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace GNAPluginNS {
|
||||
/**
|
||||
* GNA primitive created in sorting order for this copy layer
|
||||
*/
|
||||
static constexpr auto CopyLayerName = "Copy";
|
||||
/**
|
||||
* GNA primitive created at the end of primitives sequence
|
||||
*/
|
||||
static constexpr auto DelayedCopyLayerName = "DelayedCopy";
|
||||
|
||||
} // namespace GNAPluginNS
|
@ -13,6 +13,7 @@
|
||||
#include "backend/gna_types.h"
|
||||
#include "gna_permute.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "gna_copy_layer.hpp"
|
||||
|
||||
|
||||
namespace GNAPluginNS {
|
||||
@ -201,13 +202,13 @@ class LayerInfo {
|
||||
return isOfType("concat");
|
||||
}
|
||||
bool isNonFunctional() const noexcept {
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze");
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
|
||||
}
|
||||
bool isPermute() const noexcept {
|
||||
return isOfType("permute");
|
||||
}
|
||||
// @brief this not only mathematically trivial, has some WA for kaldi case
|
||||
bool isTrivialPermute() {
|
||||
bool isTrivialPermute() const {
|
||||
if (!isPermute()) return false;
|
||||
|
||||
auto layerOrder = layer->GetParamAsInts("order");
|
||||
@ -269,8 +270,13 @@ class LayerInfo {
|
||||
return false;
|
||||
}
|
||||
bool isCopy() const noexcept {
|
||||
return isOfType("copy");
|
||||
return isOfType(CopyLayerName) || isOfType(DelayedCopyLayerName);
|
||||
}
|
||||
|
||||
bool isCopyDelayed() const noexcept {
|
||||
return isOfType(DelayedCopyLayerName);
|
||||
}
|
||||
|
||||
size_t paddingSize() const {
|
||||
static InferenceEngine::details::caseless_set<std::string> layersWithPossiblePadding = {"FullyConnected",
|
||||
"InnerProduct",
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <legacy/net_pass.h>
|
||||
#include <layers/gna_copy_layer.hpp>
|
||||
|
||||
#include "gna_plugin_log.hpp"
|
||||
#include "frontend/quantized_layer_params.hpp"
|
||||
@ -47,6 +48,7 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
|
||||
}
|
||||
|
||||
// indexes stored in pass manager
|
||||
static const char identityLayersCounterName[] = "identityLayerCounter";
|
||||
static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
|
||||
static const char copyLayersCounter[] = "numCopyLayers";
|
||||
static const char softSignLayersCounter[] = "numSoftSignLayers";
|
||||
@ -94,12 +96,13 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
||||
* @brief copy layer inserted by several passes
|
||||
* @returns pointer to newly created COPYLayer
|
||||
*/
|
||||
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx, std::shared_ptr<IPassManager> passmanager) {
|
||||
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
|
||||
std::shared_ptr<IPassManager> passmanager, std::string copyLayerType) {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
|
||||
std::string copyName = std::string("copy_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
|
||||
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
|
||||
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
|
||||
|
||||
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
|
||||
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, copyLayerType, Precision::FP32}));
|
||||
|
||||
auto inputData = nextLayer->insData[beforeIdx].lock();
|
||||
auto dataPtr = std::make_shared<Data>(copyName, inputData->getTensorDesc());
|
||||
@ -124,7 +127,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
auto PrevFunctionalLayer = [](CNNLayerPtr l, int idx = 0) {
|
||||
auto prevLayer = CNNNetPrevLayerSkipCertain(l, idx, [](CNNLayerPtr ptr) {
|
||||
return LayerInfo(ptr).isNonFunctional();
|
||||
});
|
||||
});
|
||||
gnalog() << "CNNNetPrevLayerSkipCertain for :: " << l->name << "returned: " << prevLayer->name << std::endl;
|
||||
return prevLayer;
|
||||
};
|
||||
@ -148,35 +151,35 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
auto prev1 = PrevFunctionalLayer(l, 1);
|
||||
|
||||
switch (eltwise->_operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
case EltwiseLayer::Sum:
|
||||
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
// TODO: whether there are possibility to select after what layer identity gets inserted
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
break;
|
||||
case EltwiseLayer::Prod: {
|
||||
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
|
||||
if (LayerInfo(prev0).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
}
|
||||
|
||||
// if layers of outdata are different
|
||||
auto prevData0 = l->insData[0].lock();
|
||||
auto prevData1 = l->insData[1].lock();
|
||||
|
||||
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 1));
|
||||
}
|
||||
|
||||
break;
|
||||
case EltwiseLayer::Sub:
|
||||
case EltwiseLayer::Sum:
|
||||
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
default :
|
||||
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
|
||||
// TODO: whether there are possibility to select after what layer identity gets inserted
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
break;
|
||||
case EltwiseLayer::Prod: {
|
||||
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
|
||||
if (LayerInfo(prev0).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
}
|
||||
|
||||
// if layers of outdata are different
|
||||
auto prevData0 = l->insData[0].lock();
|
||||
auto prevData1 = l->insData[1].lock();
|
||||
|
||||
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 1));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default :
|
||||
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
|
||||
}
|
||||
} else if (concat != nullptr) {
|
||||
for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
|
||||
@ -624,12 +627,12 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
}
|
||||
|
||||
void InsertIdentityLayerPass::run() {
|
||||
int numOfIdentityLayers = 0;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
for (auto & l : *pLayers) {
|
||||
for (auto && prev : getCandidatesForIdentityInsertion(l)) {
|
||||
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
|
||||
// actual insertion
|
||||
auto activationName = std::string("identity_") + std::to_string(++numOfIdentityLayers);
|
||||
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
|
||||
|
||||
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
|
||||
|
||||
@ -692,27 +695,34 @@ void InsertCopyLayerPass::run() {
|
||||
for (int i=0; i != prevLayers.size(); i++) {
|
||||
auto & prevIndirectLayer = prevLayers[i].first;
|
||||
bool bInsert = false;
|
||||
/// Delayed copy layers need to be moved to the very end of processing
|
||||
bool bInsertDelayed = false;
|
||||
|
||||
auto isInserted = [&bInsertDelayed, &bInsert]() {
|
||||
return bInsert || bInsertDelayed;
|
||||
};
|
||||
|
||||
if (LayerInfo(l).isMemory()) {
|
||||
if (LayerInfo(prevIndirectLayer).isConcat()) { bInsert = true;}
|
||||
if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()) { bInsertDelayed = true;}
|
||||
// memory usualy preceded by either activation or split, or other layers in order to have 2b precision
|
||||
for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
|
||||
// if preceding layer is common for memory and concat
|
||||
if (LayerInfo(inputto.second).isConcat()) {
|
||||
bInsert = true;
|
||||
bInsertDelayed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
|
||||
if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
|
||||
|
||||
if (bInsert) {
|
||||
if (isInserted()) {
|
||||
if (LayerInfo(prevIndirectLayer).isCropAffined()) {
|
||||
// The crop will be replaced by affine.
|
||||
// Copy layer insertion is not required
|
||||
continue;
|
||||
}
|
||||
auto prevLayer = CNNNetPrevLayer(l, i);
|
||||
InsertCopyLayer(prevLayer, l, i, getPassManager());
|
||||
InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1253,6 +1263,48 @@ void BroadcastConstPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void InsertIdentityToLSTMCellPass::run() {
|
||||
for (auto layer : *pLayers) {
|
||||
if (layer->type == "LSTMCell") {
|
||||
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
|
||||
// which results in scratch buffer being used so outputs cannot be used in form of blob or by non-functional layers
|
||||
// downside is scaling down from i32 to i16 which may
|
||||
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
|
||||
int numOfIdentityLayers = ((this->getPassManager())->getIntVar(identityLayersCounterName))++;
|
||||
auto activationName = std::string("lstm_identity_") + std::to_string(numOfIdentityLayers);
|
||||
auto& output = layer->outData[output_idx];
|
||||
auto& input_to = getInputTo(output);
|
||||
|
||||
CNNLayerPtr activationLayer =
|
||||
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", InferenceEngine::Precision::FP32}));
|
||||
|
||||
auto dataPtr = std::make_shared<Data>("lstm_identity_data_" + std::to_string(numOfIdentityLayers), output->getTensorDesc());
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
auto activationLayerWithQuant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) : activationLayer;
|
||||
getCreatorLayer(dataPtr) = activationLayerWithQuant;
|
||||
activationLayerWithQuant->outData.push_back(dataPtr);
|
||||
activationLayerWithQuant->insData.push_back(output);
|
||||
auto& activationInputTo = getInputTo(dataPtr);
|
||||
|
||||
for (auto& input : input_to) {
|
||||
auto& next_layer = input.second;
|
||||
activationInputTo[input.first] = next_layer;
|
||||
for (int i = next_layer->insData.size() -1; i>= 0; i--) {
|
||||
auto ins = next_layer->insData[i].lock();
|
||||
if (ins == output) {
|
||||
next_layer->insData.erase(next_layer->insData.begin() + i);
|
||||
}
|
||||
}
|
||||
next_layer->insData.push_back(dataPtr);
|
||||
}
|
||||
input_to.clear();
|
||||
input_to[activationName] = activationLayerWithQuant;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnrollLSTMCellPass::run() {
|
||||
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
|
||||
if (rnn.clip != 0.0f)
|
||||
@ -1286,6 +1338,33 @@ void RemoveConstPass::run() {
|
||||
transformer.fullTrim();
|
||||
}
|
||||
|
||||
void RemoveSingleInputConcatPass::run() {
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->type == "Concat") {
|
||||
auto concat = dynamic_cast<ConcatLayer*>(l.get());
|
||||
if (concat->insData.size() == 1 && concat->outData.size() > 0) {
|
||||
auto in = concat->insData[0];
|
||||
auto in_layer = getCreatorLayer(in.lock());
|
||||
|
||||
auto out = concat->outData[0];
|
||||
|
||||
for (auto out_layer : getInputTo(out)) {
|
||||
for (int i = 0; i < out_layer.second->insData.size(); i++) {
|
||||
if (out_layer.second->insData[i].lock() == out) {
|
||||
out_layer.second->insData[i] = in;
|
||||
getInputTo(in.lock())[out_layer.second->name] = out_layer.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
getInputTo(in.lock()).erase(concat->name);
|
||||
getInputTo(out).clear();
|
||||
concat->insData.clear();
|
||||
concat->outData.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FuseMultipleIdentitiesPass::run() {
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->insData.empty()) continue;
|
||||
|
@ -144,6 +144,8 @@ DECL_PASS(InsertConcatAligningFilter);
|
||||
*/
|
||||
DECL_PASS(ReorderConcatInputs);
|
||||
|
||||
DECL_PASS_BEFORE_COPY(InsertIdentityToLSTMCell);
|
||||
|
||||
/**
|
||||
* @brief unrolled LSTM cell layer in supported GNA primitives
|
||||
*/
|
||||
@ -159,6 +161,10 @@ DECL_PASS_BEFORE_COPY(UnrollTI);
|
||||
*/
|
||||
DECL_PASS_BEFORE_COPY(RemoveConst);
|
||||
|
||||
/**
|
||||
*/
|
||||
DECL_PASS_BEFORE_COPY(RemoveSingleInputConcat);
|
||||
|
||||
/**
|
||||
* @brief removed extra identity layer for multi-output
|
||||
*/
|
||||
|
@ -581,6 +581,12 @@ bool unrollTI(CNNLayerPtr cur, ICNNNetwork& net) {
|
||||
auto& rule = first_class[i];
|
||||
auto out_data = ti->outData[rule.from];
|
||||
|
||||
if (num == 1) {
|
||||
getInputTo(body_list[0].outputs[rule.to]) = getInputTo(out_data);
|
||||
getInputTo(body_list[0].outputs[rule.to]).begin()->second->insData[0] = body_list[0].outputs[rule.to];
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string name = ti->name + ":out_concat_" + std::to_string(i);
|
||||
auto concat = std::make_shared<ConcatLayer>(LayerParams {name, "Concat", cur->precision});
|
||||
concat->_axis = rule.axis;
|
||||
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <subgraph_tests/memory_LSTMCell.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
64,
|
||||
100,
|
||||
25
|
||||
};
|
||||
|
||||
std::vector<size_t> hidden_sizes = {
|
||||
128,
|
||||
200,
|
||||
300,
|
||||
24,
|
||||
32,
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {
|
||||
{"GNA_COMPACT_MODE", "NO"},
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MemoryLSTMCellTest, MemoryLSTMCellTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(hidden_sizes),
|
||||
::testing::Values(additional_config)),
|
||||
MemoryLSTMCellTest::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include <ie_core.hpp>
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
std::string, // Target device name
|
||||
InferenceEngine::Precision, // Network precision
|
||||
size_t, // Input size
|
||||
size_t, // Hidden size
|
||||
std::map<std::string, std::string> // Configuration
|
||||
> memoryLSTMCellParams;
|
||||
|
||||
class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
|
||||
public testing::WithParamInterface<memoryLSTMCellParams> {
|
||||
private:
|
||||
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
||||
void switchToNgraphFriendlyModel();
|
||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||
std::vector<float> input_bias;
|
||||
std::vector<float> input_weights;
|
||||
std::vector<float> hidden_memory_init;
|
||||
std::vector<float> cell_memory_init;
|
||||
std::vector<float> weights_vals;
|
||||
std::vector<float> reccurrenceWeights_vals;
|
||||
std::vector<float> bias_vals;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void Run() override;
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,225 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
#include "ie_core.hpp"
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "functional_test_utils/precision_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include <transformations/lstm_cell_decomposition.hpp>
|
||||
#include "subgraph_tests/memory_LSTMCell.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
|
||||
std::string targetDevice;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
size_t inputSize;
|
||||
size_t hiddenSize;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||
std::ostringstream result;
|
||||
|
||||
result << "netPrecision=" << netPrecision.name() << "_";
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "HS=" << hiddenSize << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
size_t hiddenSize;
|
||||
|
||||
|
||||
void MemoryLSTMCellTest::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
std::vector<size_t> squeeze_axes {0};
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
const int seed = 0;
|
||||
std::mt19937 gen(static_cast<float>(seed));
|
||||
|
||||
auto generateFloatNumbers = [gen](std::size_t vec_len, float min, float max) mutable {
|
||||
std::vector<float> res;
|
||||
|
||||
std::uniform_real_distribution<float> dist(min, max);
|
||||
for (int i = 0; i < vec_len; i++)
|
||||
res.emplace_back(static_cast<float>(dist(gen)));
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
input_bias = generateFloatNumbers(inputSize, -0.25f, 0.0f);
|
||||
input_weights = generateFloatNumbers(inputSize, 0.0f, 0.15f);
|
||||
hidden_memory_init = generateFloatNumbers(hiddenSize, -0.2f, 0.2f);
|
||||
cell_memory_init = generateFloatNumbers(hiddenSize, -0.2f, 0.2f);
|
||||
weights_vals = generateFloatNumbers(4 * hiddenSize * inputSize, -0.1f, 0.1f);
|
||||
reccurrenceWeights_vals = generateFloatNumbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
||||
bias_vals = generateFloatNumbers(4 * hiddenSize, -0.25f, 0.15f);
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::op::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_read = std::make_shared<ngraph::op::ReadValue>(cell_memory_constant, "cell_memory");
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_read = std::make_shared<ngraph::op::ReadValue>(hidden_memory_constant, "hidden_memory");
|
||||
|
||||
// Body - inputs
|
||||
auto X = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<ngraph::op::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::op::Squeeze>(X, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
// body - outputs
|
||||
auto H_o = lstm->output(0);
|
||||
auto C_o = lstm->output(1);
|
||||
auto unsqueeze_o = unsqueeze->output(0);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<ngraph::op::TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_invariant_input(X, permute_in);
|
||||
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
||||
tensor_iterator->set_merged_input(C_t, cell_memory_read, C_o);
|
||||
|
||||
auto out_unsqueeze = tensor_iterator->get_iter_value(unsqueeze_o, -1);
|
||||
auto out_hidden = tensor_iterator->get_iter_value(H_o, -1);
|
||||
auto out_cell = tensor_iterator->get_iter_value(C_o, -1);
|
||||
|
||||
|
||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||
out_cell.get_tensor().set_element_type(ngPrc);
|
||||
|
||||
auto cell_memory_write = std::make_shared<ngraph::op::Assign>(out_cell, "cell_memory");
|
||||
auto hidden_memory_write = std::make_shared<ngraph::op::Assign>(out_hidden, "hidden_memory");
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||
|
||||
cell_memory_write->add_control_dependency(cell_memory_read);
|
||||
final_reshape->add_control_dependency(cell_memory_write);
|
||||
|
||||
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
||||
final_reshape->add_control_dependency(hidden_memory_write);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
std::vector<size_t> squeeze_axes {0};
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::op::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::op::Squeeze>(unsqueeze_input, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||
reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::op::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(unsqueeze, final_reshape_pattern, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
ConfigurePlugin();
|
||||
LoadNetwork();
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
if (name == "cell_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
||||
cell_memory_init.data(), cell_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else if (name == "hidden_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
||||
hidden_memory_init.data(), hidden_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else {
|
||||
GTEST_FAIL() << "unknown memory state";
|
||||
}
|
||||
}
|
||||
Infer();
|
||||
switchToNgraphFriendlyModel();
|
||||
Validate();
|
||||
}
|
||||
|
||||
TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
Loading…
Reference in New Issue
Block a user