[GNA] Support of constant trivial networks (#4169)

This commit is contained in:
Elizaveta Lobanova
2021-02-08 15:25:46 +03:00
committed by GitHub
parent 2db879207f
commit 42c8d1c45f
6 changed files with 106 additions and 90 deletions

View File

@@ -42,7 +42,6 @@ private:
SplitConnection split_connection;
CropConnection crop_connection;
ConstConnections const_connections;
intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);
@@ -57,6 +56,7 @@ public:
GNAPluginNS::backend::DnnComponents dnnComponents;
MemoryConnection memory_connection;
ConcatConnection concat_connection;
ConstConnections const_connections;
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@@ -28,9 +28,6 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
if (networkOutputs.empty()) {
THROW_GNA_EXCEPTION << "GNAInferRequest :: network has zero outputs";
}
if (networkInputs.empty()) {
THROW_GNA_EXCEPTION << "GNAInferRequest :: network has zero inputs";
}
// copy inputs blobs since we need to have them in separate address space to allow simultaneous infer requests
for (auto output : _networkOutputs) {

View File

@@ -432,6 +432,60 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
}
}
bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) {
auto initOutput = [this, portId, layer]
(intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) {
auto & desc = outputsDesc[portId];
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
desc.orientation = orientation;
desc.num_bytes_per_element = numBytesPerElem;
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
desc.num_elements = numElem;
// binding ptr for first infer request - then others will be setup during relocation
gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
};
// probing gna_primitives
auto irLayerAvatar = std::find_if(
graphCompiler.dnnComponents.components.begin(),
graphCompiler.dnnComponents.components.end(),
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
return value.name == layer->name;
});
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
initOutput(irLayerAvatar->dnnComponent.orientation_out, irLayerAvatar->dnnComponent.num_bytes_per_output,
irLayerAvatar->dnnComponent.num_rows_out, &irLayerAvatar->dnnComponent.ptr_outputs);
return true;
}
// probing concatInfo
if (LayerInfo(layer).isConcat()) {
auto concatConnection = graphCompiler.concat_connection.find(layer->name);
if (concatConnection != graphCompiler.concat_connection.end()) {
auto precision = layer->outData.front()->getPrecision().size();
initOutput(kDnnInterleavedOrientation, precision, concatConnection->second.reserved_size / precision,
&concatConnection->second.gna_ptr);
return true;
}
}
// probing a constant info, for constant trivial networks support
if (LayerInfo(layer).isConst()) {
auto const_blob = layer->blobs["custom"];
auto constConnection = graphCompiler.const_connections.find(layer->name);
if (constConnection != graphCompiler.const_connections.end()) {
initOutput(kDnnInterleavedOrientation, layer->outData.front()->getPrecision().size(),
const_blob->size(), &constConnection->second);
return true;
}
}
return false;
}
static void TransposeTensorFromNCHWToNHWC(size_t precision, size_t rows, size_t columns, uint8_t* buffer, bool transpose_rows,
const std::vector<TranspositionInfo> &transpositionInfo) {
size_t weightsTotalSize = rows * columns * precision;
@@ -821,7 +875,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// keep inputs information and create input primitives
inputsDataMap = newNet.getInputsInfo();
if (inputsDataMap.empty()) {
THROW_GNA_EXCEPTION << " No inputs for the topology";
gnawarn() << "No inputs for the topology\n";
}
// keep output dims
@@ -838,37 +892,22 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
for (auto & layer : sortedNoMem) {
graphCompiler.CreateLayerPrimitive(layer);
}
for (auto& inputLayer : inputLayers) {
auto layerInfo = LayerInfo(inputLayer);
if (layerInfo.isInput() && 0 == inputsDesc->bytes_allocated_for_input[inputLayer->name]) {
graphCompiler.connectOutput(inputLayer, &inputsDesc->getPtrInputsGlobal(inputLayer->name).front(), 0);
}
}
// TODO: graph might be static - should we support that
if (graphCompiler.dnnComponents.components.empty()) {
THROW_GNA_EXCEPTION << "No GNA primitives created based on topology. This might indicate trivial topology";
gnawarn() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
trivialTopology = true;
}
/// setting-up output layers information
outputsDesc.resize(outputsDataMap.size());
auto initOutput = [this]
(int idx, const intel_dnn_component_t & component, CNNLayerPtr layer) {
// auto idx = std::distance(outputsDataMap.begin(), outputPort);
auto & desc = outputsDesc[idx];
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
desc.orientation = component.orientation_out;
desc.num_bytes_per_element = component.num_bytes_per_output;
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
// TODO: this need to be fixed
desc.num_elements = component.num_rows_out;
// binding ptr for first infer request - then others will be setup during relocation
gnamem->bind_ptr(&desc.ptrs.front(), &component.ptr_outputs);
};
int portId = 0;
for (auto && outPort : outputsDataMap) {
// gets output layer pointer in original topology not in cloned
@@ -891,43 +930,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
bool stopSearching = false;
CNNNetDFS(outLayer, [this, &outPort, portId, &stopSearching, &initOutput](CNNLayerPtr layer) {
auto irLayerAvatar = std::find_if(
graphCompiler.dnnComponents.components.begin(),
graphCompiler.dnnComponents.components.end(),
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
return value.name == layer->name;
});
CNNNetDFS(outLayer, [this, &outPort, portId, &stopSearching](CNNLayerPtr layer) {
gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
// probing gna_primitives
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
initOutput(portId, irLayerAvatar->dnnComponent, layer);
stopSearching = true;
}
// probing concatInfo
if (!stopSearching && LayerInfo(layer).isConcat()) {
auto concatConnection = graphCompiler.concat_connection.find(layer->name);
if (concatConnection != graphCompiler.concat_connection.end()) {
//initOutput(portId, irLayerAvatar->second, layer);
auto &desc = outputsDesc[portId];
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
// TODO: what is orientation for concat
desc.orientation = kDnnInterleavedOrientation;
desc.num_bytes_per_element = layer->outData.front()->getPrecision().size();
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
desc.num_elements = concatConnection->second.reserved_size / desc.num_bytes_per_element;
// binding ptr for first infer request - then others will be setup during relocation
gnamem->bind_ptr(&desc.ptrs.front(), &concatConnection->second.gna_ptr);
stopSearching = true;
}
}
stopSearching = TryToInitOutput(portId, layer);
}, true, [&stopSearching](InferenceEngine::CNNLayer* from) {
return make_upstream_order(!stopSearching ? from : nullptr);
});
@@ -963,14 +968,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
// in fp32 mode last PWL cannot be computed without that
dnn->InitActiveList(NULL);
if (!graphCompiler.dnnComponents.components.empty()) {
dnn->InitActiveList(NULL);
}
#if GNA_LIB_VER == 2
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
#else
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
#endif
if (!gnaFlags->sw_fp32) {
if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
// number of layer gets calculated inside that InitGNAStruct function
#if GNA_LIB_VER == 2
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj);
@@ -1089,7 +1096,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
#if GNA_LIB_VER == 2
void GNAPlugin::createRequestConfigsForGnaModels() {
if (!gnadevice) {
if (!gnadevice || trivialTopology) {
gnaRequestConfigToRequestIdMap.push_back(std::make_tuple(FAKE_REQUEST_CONFIG_ID, -1, InferenceEngine::BlobMap()));
return;
}
@@ -1266,7 +1273,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
++inputNum;
}
// If there is no gnadevice infer using reference FP32 transforamtions
if (!gnadevice) {
if (!gnadevice || trivialTopology) {
auto runtime = runtime::FP(dnn);
runtime.infer();
if (freeNnet != nnets.end()) {
@@ -1311,7 +1318,7 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
// already synced TODO: might be copy required ???
if (std::get<1>(nnets[request_idx]) == -1) return GNA_REQUEST_COMPLETED;
if (gnadevice) {
if (gnadevice && !trivialTopology) {
const auto waitStatus = gnadevice->wait(std::get<1>(nnets[request_idx]), millisTimeout);
if (waitStatus == GNA_REQUEST_ABORTED) {
std::get<1>(nnets[request_idx]) = -1;
@@ -1567,7 +1574,10 @@ InferenceEngine::ExecutableNetwork GNAPlugin::ImportNetwork(std::istream& networ
dnn->WriteGraphWizModel("gna-blob-imported.dot");
#endif
#if GNA_LIB_VER == 2
trivialTopology = (std::get<0>(gnaModels.back())->obj.NumberOfOperations == 0);
createRequestConfigsForGnaModels();
#else
trivialTopology = (std::get<0>(nnets.back())->obj.nLayers == 0);
#endif
return {};
}

View File

@@ -84,6 +84,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
InferenceEngine::InputsDataMap inputsDataMap;
InferenceEngine::OutputsDataMap outputsDataMap;
std::vector<InferenceEngine::VariableStateInternal::Ptr> memoryStates;
bool trivialTopology = false;
public:
explicit GNAPlugin(const std::map<std::string, std::string>& configMap);
@@ -220,6 +221,13 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
void UpdateFieldsFromConfig();
void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
/**
* @brief Tries to init an output on the base of a layer data
* @param portId output port identificator
* @param layer layer pointer
* @return true if the output is initiated, false otherwise
*/
bool TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer);
/**
* @brief Converts a model from NCHW to NHWC. It fills inputs and outputs transposition info and

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@@ -19,35 +19,38 @@ GNAPluginNS::LayerType GNAPluginNS::LayerTypeFromStr(const std::string &str) {
bool GNAPluginNS::AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::CNNLayerSet inputLayers;
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
std::unordered_set<InferenceEngine::CNNLayer *> allLayers;
IE_ASSERT(!inputs.empty());
auto network_input_precision = inputs.begin()->second->getPrecision();
auto batch_size = network.getBatchSize();
if (network_input_precision != InferenceEngine::Precision::FP32 &&
network_input_precision != InferenceEngine::Precision::I16 &&
network_input_precision != InferenceEngine::Precision::U8) {
errMessage = "The plugin does not support input precision with " + std::string(network_input_precision.name()) + " format. Supported input precisions "
"FP32, I16, U8\n";
return false;
}
InferenceEngine::CNNLayerPtr startLayer;
if (inputs.empty()) {
errMessage = "Network is empty (GNA)\n";
return false;
}
auto outputs = network.getOutputsInfo();
IE_ASSERT(!outputs.empty());
// If there are no inputs start search from an output
startLayer = getCreatorLayer(outputs.begin()->second).lock();
} else {
auto network_input_precision = inputs.begin()->second->getPrecision();
auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
if (secondLayers.empty()) {
errMessage = "Network consists of input layer only (GNA)\n";
return false;
if (network_input_precision != InferenceEngine::Precision::FP32 &&
network_input_precision != InferenceEngine::Precision::I16 &&
network_input_precision != InferenceEngine::Precision::U8) {
errMessage = "The plugin does not support input precision with " +
std::string(network_input_precision.name()) +
" format. Supported input precisions FP32, I16, U8\n";
return false;
}
auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
if (secondLayers.empty()) {
errMessage = "Network consists of input layer only (GNA)\n";
return false;
}
startLayer = secondLayers.begin()->second;
}
auto batch_size = network.getBatchSize();
bool check_result = true;
InferenceEngine::details::UnorderedDFS(allLayers,
secondLayers.begin()->second,
startLayer,
[&](const InferenceEngine::CNNLayerPtr layer) {
if (LayerTypeFromStr(layer->type) == LayerType::NO_TYPE) {
errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n";

View File

@@ -48,8 +48,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.8\).*)",
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.16\).*)",
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.32\).*)",
// TODO: Issue: 40960
R"(.*(ConstantResultSubgraphTest).*)",
// TODO: Issue: 29577
R"(.*CoreThreadingTests.smoke_QueryNetwork.*)",
//TODO: Issue: 46416