[VPU][Benchmark app] Support trivial networks (#4060)

* [BENCHMARK APP] Support I64 input filling * [VPU] Support trivial networks * [VPU] Add test cases * [VPU][Samples] Review fixes * [Benchmark] Support scalar generating in python benchmark app * [VPU] Allocate shape for unused inputs robustly * [VPU] Make processing trivial cases more efficient * [VPU] Rename isUsed->isConsumed * [VPU][Tests][Samples] Review fixes * [VPU] Process trivial cases at the very beginning * [VPU] Review fixes
2021-02-09 16:00:41 +03:00
parent 2c4c3a777a
commit 61c72fe1d6
17 changed files with 212 additions and 28 deletions
--- a/inference-engine/samples/benchmark_app/inputs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp
@@ -275,6 +275,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
                        fillBlobBinary<short>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
                    } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
                        fillBlobBinary<int32_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
+                    } else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
+                        fillBlobBinary<int64_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
                    } else if (item.second->getPrecision() == InferenceEngine::Precision::U8) {
                        fillBlobBinary<uint8_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
                    } else {
@@ -294,6 +296,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
                        fillBlobImInfo<short>(inputBlob, batchSize, image_size);
                    } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
                        fillBlobImInfo<int32_t>(inputBlob, batchSize, image_size);
+                    } else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
+                        fillBlobImInfo<int64_t>(inputBlob, batchSize, image_size);
                    } else {
                        THROW_IE_EXCEPTION << "Input precision is not supported for image info!";
                    }
@@ -310,6 +314,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
                fillBlobRandom<short>(inputBlob);
            } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
                fillBlobRandom<int32_t>(inputBlob);
+            } else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
+                fillBlobRandom<int64_t>(inputBlob);
            } else if (item.second->getPrecision() == InferenceEngine::Precision::U8) {
                fillBlobRandom<uint8_t>(inputBlob);
            } else if (item.second->getPrecision() == InferenceEngine::Precision::I8) {
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
@@ -209,6 +209,8 @@ private:
    void parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
                    const UnsupportedLayerCallback& onUnsupported, const SupportedLayerCallback& onSupported = nullptr);

+    void processTrivialCases(const Model& model);
+
 private:
    StageBuilder::Ptr _stageBuilder;
    const ie::ICore* _core = nullptr;
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
@@ -215,6 +215,8 @@ public:

    Data getTopParentData() const;

+    bool isConsumed() const;
+
    //
    // DataDesc
    //
--- a/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
@@ -63,9 +63,6 @@ int BackEnd::serializeIOInfoSection(
            VPU_INTERNAL_CHECK(data->producerEdge() == nullptr,
                "serializeIOInfoSection failed on input data {}. Input must have no producer but actually it has: {} with type {}",
                data->name(), data->producerEdge()->producer()->name(), data->producerEdge()->producer()->type());
-            VPU_INTERNAL_CHECK(data->numConsumers() != 0,
-                "serializeIOInfoSection failed on input data {}. Input must have at least one consumer but it doesn't ",
-                data->usage());
        }

        if (dataUsage == DataUsage::Output) {
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
@@ -378,6 +378,39 @@ void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, cons
    }
 }

+void FrontEnd::processTrivialCases(const Model& model) {
+    std::unordered_map<ie::DataPtr, DataVector> ieToVpuDataVector;
+    for (const auto& data : model->datas()) {
+        const auto& origData = data->origData();
+        if (origData != nullptr) {
+            ieToVpuDataVector[origData].push_back(data);
+        }
+    }
+
+    std::vector<DataVector> trivialCases;
+    for (const auto& dataObjectsWithTheSameOrigData : ieToVpuDataVector) {
+        if (dataObjectsWithTheSameOrigData.second.size() > 1) {
+            VPU_THROW_UNLESS(dataObjectsWithTheSameOrigData.second.size() == 2,
+                             "There can't be more than two data objects associated with the same original IE data object with name {}",
+                             dataObjectsWithTheSameOrigData.first->getName());
+            trivialCases.push_back(dataObjectsWithTheSameOrigData.second);
+        }
+    }
+
+    for (const auto& trivialCase : trivialCases) {
+        const auto& unconnectedOutput = trivialCase.front()->usage() == DataUsage::Output ? trivialCase.front() : trivialCase.back();
+        const auto& unconnectedInput = unconnectedOutput == trivialCase.front() ? trivialCase.back() : trivialCase.front();
+
+        _stageBuilder->addCopyStage(
+            model,
+            unconnectedInput->name() + "@copy",
+            nullptr,
+            {unconnectedInput},
+            {unconnectedOutput},
+            "processTrivialCase");
+    }
+}
+
 void FrontEnd::defaultOnUnsupportedLayerCallback(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
                                                 const std::string& extraMessage) {
    const auto& env = CompileEnv::get();
@@ -475,6 +508,12 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,

        parseInputAndOutputData(model);

+        //
+        // Process trivial cases like `input->output`, `const->output`
+        //
+
+        processTrivialCases(model);
+
        if (!CompileEnv::get().config.disableConvertStages) {
            addDataTypeConvertStages(model);
        }
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/in_out_convert.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/in_out_convert.cpp
@@ -80,6 +80,10 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {

                bindData(inputFP16, input->origData());

+                for (const auto consumerEdge : input->consumerEdges()) {
+                    model->replaceStageInput(consumerEdge, inputFP16);
+                }
+
                _stageBuilder->createConvertStage(
                        model,
                        inputFP16->name(),
@@ -126,6 +130,10 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {

        bindData(outputFP16, output->origData());

+        if (const auto producerEdge = output->producerEdge()) {
+            model->replaceStageOutput(producerEdge, outputFP16);
+        }
+
        const auto stage = _stageBuilder->createConvertStage(
            model,
            outputFP16->name(),
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp
@@ -116,21 +116,6 @@ void FrontEnd::parseInputAndOutputData(const Model& model) {
            descriptor,
            ieBlobContent(ieBlob, descriptor.type()));

-        // User might ask to return the output from Const layer.
-        if (const auto vpuOutData = getVpuData(ieData)) {
-            env.log->trace("The constant %s is network output", vpuData);
-
-            IE_ASSERT(vpuOutData->usage() == DataUsage::Output);
-
-            _stageBuilder->addCopyStage(
-                model,
-                formatString("%s@return-const", vpuData->name()),
-                nullptr,
-                vpuData,
-                vpuOutData,
-                "parseInputAndOutputData::const");
-        }
-
        bindData(vpuData, ieData);
    }
 }
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp
@@ -206,14 +206,14 @@ AllocationResult runAllocator(const Model& model, EnableShapeAllocation enableSh
    //

    if (enableShapeAllocation == EnableShapeAllocation::YES) {
-        for (const auto& stage : model->getStages()) {
-            const auto& allocateShape = [&allocator](const Data& data) {
-                if (!data->isShapeAllocated()) {
-                    const auto shapeLocation = allocator.allocateShape(data);
-                    data->setShapeAllocationInfo(shapeLocation);
-                }
-            };
+        const auto& allocateShape = [&allocator](const Data& data) {
+            if (!data->isShapeAllocated()) {
+                const auto shapeLocation = allocator.allocateShape(data);
+                data->setShapeAllocationInfo(shapeLocation);
+            }
+        };

+        for (const auto& stage : model->getStages()) {
            for (const auto& input : stage->inputs()) {
                allocateShape(input);
            }
@@ -224,6 +224,22 @@ AllocationResult runAllocator(const Model& model, EnableShapeAllocation enableSh
                allocateShape(tempBuffer);
            }
        }
+
+        // Allocate shape for unused inputs
+        DataVector unusedInputs;
+        const auto& dataObjects = model->datas();
+        std::copy_if(dataObjects.begin(), dataObjects.end(), std::back_inserter(unusedInputs), [](const Data& data) {
+            return data->usage() == DataUsage::Input && !data->isConsumed();
+        });
+        // There is no guarantee that model->datas() always contain data objects in the same order from run to run,
+        // so to stabilize allocation, and as a result, the final blob, we need to sort them
+        std::sort(unusedInputs.begin(), unusedInputs.end(), [](const Data& lhs, const Data& rhs) { return lhs->name() < rhs->name(); });
+        std::for_each(unusedInputs.begin(), unusedInputs.end(), [&allocateShape](const Data& unusedInput) { allocateShape(unusedInput); });
+
+        for (const auto& data : model->datas()) {
+            VPU_THROW_UNLESS(data->isShapeAllocated(), "Shape for data {} with usage {} is not allocated",
+                data->name(), data->usage());
+        }
    }

    return AllocationResult();
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_redundant_conversions.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_redundant_conversions.cpp
@@ -29,6 +29,17 @@ void PassImpl::runForStage(const Model& model, const Stage& convert) {
    const auto input = convert->input(0);
    const auto output = convert->output(0);

+    //
+    // Check and remove the convert that was added to unused input
+    // In this case we will have the converted intermediate data object which is not consumed
+    //
+
+    if (output->usage() == DataUsage::Intermediate && !output->isConsumed()) {
+        model->removeStage(convert);
+        model->removeUnusedData(output);
+        return;
+    }
+
    //
    // We remove Convert stage if input and output data types are equal.
    // It could happen if there was a non-IO FP16 <-> FP32 conversion in
--- a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
@@ -31,6 +31,10 @@ namespace vpu {
 // DataNode
 //

+bool DataNode::isConsumed() const {
+    return numConsumers() > 0 || !childDataToShapeEdges().empty();
+}
+
 Data DataNode::getTopParentData() const {
    Data topParent = this;
    while (auto nextParent = topParent->parentData()) {
--- a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
@@ -1952,8 +1952,6 @@ void ModelObj::removeStage(const Stage& stage) {
 void ModelObj::cleanUp() {
    for (const auto& data : datas()) {
        if (data->_usage == DataUsage::Input) {
-            VPU_THROW_UNLESS(!data->_consumerEdges.empty() || !data->childDataToShapeEdges().empty(),
-                    "Input data {} must either have at least one consumer (but got zero) or be a shape data.", data->name());
            IE_ASSERT(data->_parentDataToDataEdge == nullptr);
        } else if (data->_usage == DataUsage::Output) {
            IE_ASSERT(data->_producerEdge != nullptr);
--- a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/parameter_result.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/parameter_result.cpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/parameter_result.hpp"
+
+using namespace SubgraphTestsDefinitions;
+
+namespace {
+
+INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterResultSubgraphTest,
+                        ::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
+                        ParameterResultSubgraphTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/parameter_shapeof_result.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/parameter_shapeof_result.cpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/parameter_shapeof_result.hpp"
+
+using namespace SubgraphTestsDefinitions;
+
+namespace {
+
+INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterShapeOfResultSubgraphTest,
+                        ::testing::Combine(
+                            ::testing::Values(
+                                ngraph::element::f32,
+                                ngraph::element::f16),
+                            ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+                        ParameterShapeOfResultSubgraphTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/parameter_shapeof_result.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/parameter_shapeof_result.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/subgraph/parameter_shapeof_result.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+TEST_P(ParameterShapeOfResultSubgraphTest, CompareWithRefs) {
+    Run();
+}
+
+}  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/parameter_shapeof_result.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/parameter_shapeof_result.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace SubgraphTestsDefinitions {
+
+typedef std::tuple<
+    ngraph::element::Type, // Input type
+    std::string            // Device name
+> parameterShapeOfResultParams;
+
+class ParameterShapeOfResultSubgraphTest : public testing::WithParamInterface<parameterShapeOfResultParams>,
+                                           virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<parameterShapeOfResultParams> obj);
+protected:
+    void SetUp() override;
+};
+
+}  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/parameter_shapeof_result.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/parameter_shapeof_result.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/subgraph/parameter_shapeof_result.hpp"
+
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/opsets/opset6.hpp>
+
+namespace SubgraphTestsDefinitions {
+
+std::string ParameterShapeOfResultSubgraphTest::getTestCaseName(testing::TestParamInfo<parameterShapeOfResultParams> obj) {
+    ngraph::element::Type inType;
+    std::string targetDevice;
+    std::tie(inType, targetDevice) = obj.param;
+    std::ostringstream result;
+    result << "InType=" << inType;
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void ParameterShapeOfResultSubgraphTest::SetUp() {
+    ngraph::element::Type inType;
+    std::tie(inType, targetDevice) = this->GetParam();
+    inPrc = InferenceEngine::details::convertPrecision(inType);
+
+    const auto parameter = std::make_shared<ngraph::opset6::Parameter>(inType, ngraph::Shape{1, 3, 10, 10});
+    const auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(parameter);
+    const ngraph::ResultVector results{std::make_shared<ngraph::opset6::Result>(shapeOf)};
+    ngraph::ParameterVector params = {parameter};
+    function = std::make_shared<ngraph::Function>(results, params, "ParameterShapeOfResult");
+}
+
+}  // namespace SubgraphTestsDefinitions
--- a/tools/benchmark/utils/inputs_filling.py
+++ b/tools/benchmark/utils/inputs_filling.py
@@ -178,6 +178,7 @@ def get_dtype(precision):
    format_map = {
      'FP32' : np.float32,
      'I32'  : np.int32,
+      'I64'  : np.int64,
      'FP16' : np.float16,
      'I16'  : np.int16,
      'U16'  : np.uint16,
@@ -219,4 +220,6 @@ def fill_blob_with_image_info(image_size, layer):
    return im_info

 def fill_blob_with_random(layer):
-    return np.random.rand(*layer.shape).astype(get_dtype(layer.precision))
+    if layer.shape:
+        return np.random.rand(*layer.shape).astype(get_dtype(layer.precision))
+    return (get_dtype(layer.precision))(np.random.rand())