[GNA] Support of constant trivial networks (#4169)

2021-02-08 15:25:46 +03:00
parent 2db879207f
commit 42c8d1c45f
6 changed files with 106 additions and 90 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -42,7 +42,6 @@ private:

    SplitConnection  split_connection;
    CropConnection   crop_connection;
-    ConstConnections const_connections;

    intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);

@@ -57,6 +56,7 @@ public:
    GNAPluginNS::backend::DnnComponents dnnComponents;
    MemoryConnection memory_connection;
    ConcatConnection concat_connection;
+    ConstConnections const_connections;

    void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
    void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
--- a/inference-engine/src/gna_plugin/gna_infer_request.hpp
+++ b/inference-engine/src/gna_plugin/gna_infer_request.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@@ -28,9 +28,6 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
        if (networkOutputs.empty()) {
            THROW_GNA_EXCEPTION << "GNAInferRequest :: network has zero outputs";
        }
-        if (networkInputs.empty()) {
-            THROW_GNA_EXCEPTION << "GNAInferRequest :: network has zero inputs";
-        }

        // copy inputs blobs since we need to have them in separate address space to allow simultaneous infer requests
        for (auto output : _networkOutputs) {
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -432,6 +432,60 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
    }
 }

+bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) {
+    auto initOutput = [this, portId, layer]
+            (intel_dnn_orientation_t orientation, size_t numBytesPerElem, size_t numElem, void* outputPtr) {
+        auto & desc = outputsDesc[portId];
+        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+
+        desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
+        desc.orientation = orientation;
+        desc.num_bytes_per_element = numBytesPerElem;
+        desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
+        desc.num_elements = numElem;
+
+        // binding ptr for first infer request - then others will be setup during relocation
+        gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
+    };
+
+    // probing gna_primitives
+    auto irLayerAvatar = std::find_if(
+        graphCompiler.dnnComponents.components.begin(),
+        graphCompiler.dnnComponents.components.end(),
+        [&layer](const backend::DnnComponents::storage_type::value_type & value) {
+            return value.name == layer->name;
+    });
+    if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
+        initOutput(irLayerAvatar->dnnComponent.orientation_out, irLayerAvatar->dnnComponent.num_bytes_per_output,
+                   irLayerAvatar->dnnComponent.num_rows_out, &irLayerAvatar->dnnComponent.ptr_outputs);
+        return true;
+    }
+
+    // probing concatInfo
+    if (LayerInfo(layer).isConcat()) {
+        auto concatConnection  = graphCompiler.concat_connection.find(layer->name);
+        if (concatConnection != graphCompiler.concat_connection.end()) {
+            auto precision = layer->outData.front()->getPrecision().size();
+            initOutput(kDnnInterleavedOrientation, precision, concatConnection->second.reserved_size / precision,
+                       &concatConnection->second.gna_ptr);
+            return true;
+        }
+    }
+
+    // probing a constant info, for constant trivial networks support
+    if (LayerInfo(layer).isConst()) {
+        auto const_blob = layer->blobs["custom"];
+        auto constConnection  = graphCompiler.const_connections.find(layer->name);
+        if (constConnection != graphCompiler.const_connections.end()) {
+            initOutput(kDnnInterleavedOrientation, layer->outData.front()->getPrecision().size(),
+                       const_blob->size(), &constConnection->second);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void TransposeTensorFromNCHWToNHWC(size_t precision, size_t rows, size_t columns, uint8_t* buffer, bool transpose_rows,
                                          const std::vector<TranspositionInfo> &transpositionInfo) {
    size_t weightsTotalSize = rows * columns * precision;
@@ -821,7 +875,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
    // keep inputs information and create input primitives
    inputsDataMap = newNet.getInputsInfo();
    if (inputsDataMap.empty()) {
-        THROW_GNA_EXCEPTION << " No inputs for the topology";
+        gnawarn() << "No inputs for the topology\n";
    }

    // keep output dims
@@ -838,37 +892,22 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
    for (auto & layer : sortedNoMem) {
        graphCompiler.CreateLayerPrimitive(layer);
    }
+
    for (auto& inputLayer : inputLayers) {
        auto layerInfo = LayerInfo(inputLayer);
        if (layerInfo.isInput() && 0 == inputsDesc->bytes_allocated_for_input[inputLayer->name]) {
            graphCompiler.connectOutput(inputLayer, &inputsDesc->getPtrInputsGlobal(inputLayer->name).front(), 0);
        }
    }
-    // TODO: graph might be static - should we support that
+
    if (graphCompiler.dnnComponents.components.empty()) {
-        THROW_GNA_EXCEPTION << "No GNA primitives created based on topology. This might indicate trivial topology";
+        gnawarn() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
+        trivialTopology = true;
    }

    /// setting-up output layers information
    outputsDesc.resize(outputsDataMap.size());

-    auto initOutput = [this]
-            (int idx, const intel_dnn_component_t & component, CNNLayerPtr layer) {
-        // auto idx = std::distance(outputsDataMap.begin(), outputPort);
-        auto & desc = outputsDesc[idx];
-        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
-        desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
-        desc.orientation = component.orientation_out;
-        desc.num_bytes_per_element = component.num_bytes_per_output;
-        desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
-        // TODO: this need to be fixed
-        desc.num_elements = component.num_rows_out;
-
-        // binding ptr for first infer request - then others will be setup during relocation
-        gnamem->bind_ptr(&desc.ptrs.front(), &component.ptr_outputs);
-    };
-
    int portId = 0;
    for (auto && outPort : outputsDataMap) {
        // gets output layer pointer in original topology not in cloned
@@ -891,43 +930,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
        bool stopSearching = false;

-        CNNNetDFS(outLayer, [this, &outPort, portId, &stopSearching, &initOutput](CNNLayerPtr layer) {
-            auto irLayerAvatar = std::find_if(
-                graphCompiler.dnnComponents.components.begin(),
-                graphCompiler.dnnComponents.components.end(),
-                [&layer](const backend::DnnComponents::storage_type::value_type & value) {
-                    return value.name == layer->name;
-            });
-
+        CNNNetDFS(outLayer, [this, &outPort, portId, &stopSearching](CNNLayerPtr layer) {
            gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
-
-            // probing gna_primitives
-            if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
-                initOutput(portId, irLayerAvatar->dnnComponent, layer);
-                stopSearching = true;
-            }
-
-            // probing concatInfo
-            if (!stopSearching && LayerInfo(layer).isConcat()) {
-                auto concatConnection  = graphCompiler.concat_connection.find(layer->name);
-                if (concatConnection != graphCompiler.concat_connection.end()) {
-                    //initOutput(portId, irLayerAvatar->second, layer);
-
-                    auto &desc = outputsDesc[portId];
-                    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-
-                    desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
-                    // TODO: what is orientation for concat
-                    desc.orientation = kDnnInterleavedOrientation;
-                    desc.num_bytes_per_element = layer->outData.front()->getPrecision().size();
-                    desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
-                    desc.num_elements = concatConnection->second.reserved_size / desc.num_bytes_per_element;
-
-                    // binding ptr for first infer request - then others will be setup during relocation
-                    gnamem->bind_ptr(&desc.ptrs.front(), &concatConnection->second.gna_ptr);
-                    stopSearching = true;
-                }
-            }
+            stopSearching = TryToInitOutput(portId, layer);
        }, true, [&stopSearching](InferenceEngine::CNNLayer* from) {
            return make_upstream_order(!stopSearching ? from : nullptr);
        });
@@ -963,14 +968,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
    dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());

    // in fp32 mode last PWL cannot be computed without that
-    dnn->InitActiveList(NULL);
+    if (!graphCompiler.dnnComponents.components.empty()) {
+        dnn->InitActiveList(NULL);
+    }

 #if GNA_LIB_VER == 2
    gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
 #else
    nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
 #endif
-    if (!gnaFlags->sw_fp32) {
+    if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
        // number of layer gets calculated inside that InitGNAStruct function
 #if GNA_LIB_VER == 2
        dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj);
@@ -1089,7 +1096,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {

 #if GNA_LIB_VER == 2
 void GNAPlugin::createRequestConfigsForGnaModels() {
-    if (!gnadevice) {
+    if (!gnadevice || trivialTopology) {
        gnaRequestConfigToRequestIdMap.push_back(std::make_tuple(FAKE_REQUEST_CONFIG_ID, -1, InferenceEngine::BlobMap()));
        return;
    }
@@ -1266,7 +1273,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
        ++inputNum;
    }
    // If there is no gnadevice infer using reference FP32 transforamtions
-    if (!gnadevice) {
+    if (!gnadevice || trivialTopology) {
        auto runtime = runtime::FP(dnn);
        runtime.infer();
        if (freeNnet != nnets.end()) {
@@ -1311,7 +1318,7 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
    // already synced TODO: might be copy required ???
    if (std::get<1>(nnets[request_idx]) == -1) return GNA_REQUEST_COMPLETED;

-    if (gnadevice) {
+    if (gnadevice && !trivialTopology) {
        const auto waitStatus = gnadevice->wait(std::get<1>(nnets[request_idx]), millisTimeout);
        if (waitStatus == GNA_REQUEST_ABORTED) {
            std::get<1>(nnets[request_idx]) = -1;
@@ -1567,7 +1574,10 @@ InferenceEngine::ExecutableNetwork GNAPlugin::ImportNetwork(std::istream& networ
    dnn->WriteGraphWizModel("gna-blob-imported.dot");
 #endif
 #if GNA_LIB_VER == 2
+    trivialTopology = (std::get<0>(gnaModels.back())->obj.NumberOfOperations == 0);
    createRequestConfigsForGnaModels();
+#else
+    trivialTopology = (std::get<0>(nnets.back())->obj.nLayers == 0);
 #endif
    return {};
 }
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -84,6 +84,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
    InferenceEngine::InputsDataMap inputsDataMap;
    InferenceEngine::OutputsDataMap outputsDataMap;
    std::vector<InferenceEngine::VariableStateInternal::Ptr> memoryStates;
+    bool trivialTopology = false;

 public:
    explicit GNAPlugin(const std::map<std::string, std::string>& configMap);
@@ -220,6 +221,13 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
    void UpdateFieldsFromConfig();
    void UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork &);
    void UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork &);
+    /**
+     * @brief Tries to init an output on the base of a layer data
+     * @param portId output port identificator
+     * @param layer layer pointer
+     * @return true if the output is initiated, false otherwise
+    */
+    bool TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer);

    /**
     * @brief Converts a model from NCHW to NHWC. It fills inputs and outputs transposition info and
--- a/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@@ -19,35 +19,38 @@ GNAPluginNS::LayerType GNAPluginNS::LayerTypeFromStr(const std::string &str) {

 bool GNAPluginNS::AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
    IE_SUPPRESS_DEPRECATED_START
-    InferenceEngine::CNNLayerSet inputLayers;
    InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
    std::unordered_set<InferenceEngine::CNNLayer *> allLayers;
-    IE_ASSERT(!inputs.empty());
-    auto network_input_precision = inputs.begin()->second->getPrecision();
-    auto batch_size = network.getBatchSize();
-
-    if (network_input_precision != InferenceEngine::Precision::FP32 &&
-        network_input_precision != InferenceEngine::Precision::I16 &&
-        network_input_precision != InferenceEngine::Precision::U8) {
-        errMessage = "The plugin does not support input precision with " + std::string(network_input_precision.name()) + " format. Supported  input precisions "
-                                                                                                                         "FP32, I16, U8\n";
-        return false;
-    }
-
+    InferenceEngine::CNNLayerPtr startLayer;
    if (inputs.empty()) {
-        errMessage = "Network is empty (GNA)\n";
-        return false;
-    }
+        auto outputs = network.getOutputsInfo();
+        IE_ASSERT(!outputs.empty());
+        // If there are no inputs start search from an output
+        startLayer = getCreatorLayer(outputs.begin()->second).lock();
+    } else {
+        auto network_input_precision = inputs.begin()->second->getPrecision();

-    auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
-    if (secondLayers.empty()) {
-        errMessage = "Network consists of input layer only (GNA)\n";
-        return false;
+        if (network_input_precision != InferenceEngine::Precision::FP32 &&
+            network_input_precision != InferenceEngine::Precision::I16 &&
+            network_input_precision != InferenceEngine::Precision::U8) {
+            errMessage = "The plugin does not support input precision with " +
+                         std::string(network_input_precision.name()) +
+                         " format. Supported  input precisions FP32, I16, U8\n";
+            return false;
+        }
+
+        auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
+        if (secondLayers.empty()) {
+            errMessage = "Network consists of input layer only (GNA)\n";
+            return false;
+        }
+        startLayer = secondLayers.begin()->second;
    }
+    auto batch_size = network.getBatchSize();

    bool check_result = true;
    InferenceEngine::details::UnorderedDFS(allLayers,
-                                           secondLayers.begin()->second,
+                                           startLayer,
                                           [&](const InferenceEngine::CNNLayerPtr layer) {
                                               if (LayerTypeFromStr(layer->type) == LayerType::NO_TYPE) {
                                                   errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n";
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -48,8 +48,6 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.8\).*)",
        R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.16\).*)",
        R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.32\).*)",
-        // TODO: Issue: 40960
-        R"(.*(ConstantResultSubgraphTest).*)",
        // TODO: Issue: 29577
        R"(.*CoreThreadingTests.smoke_QueryNetwork.*)",
        //TODO: Issue: 46416