[IE][VPU]: Introduces annotations of stages memory types (#1991)

* Moves splitLargeKernelConv tests to unit tests Originally, file with tests has been placed in a wrong place so it was not integrated into any testing application. Now it is a part of unit tests on VPU. Test itself has been disabled due to issue with NCE unit usage described in #-33366 * Introduces pass I/O memory types annotation of stages It is useful to see where inputs and outputs are located in performance report for analysing possible issues. * Introduces endsWith and tuple2Vector utilities endsWith checks if source has suffix equals to second argument. tuple2Vector converts tuple of arbitrary size containing the same type to vector. It could be useful working with gtest parameter generators that have std::tuple as return type. * Introduces unit tests on annotating stages memory types * Introduces missing format placeholders * Makes memory types annotation optional Enables private option "enableMemoryTypesAnnotation" which disabled by default. Disabling annotation by default allows avoid issues with tests which rely on stages names. Signed-off-by: Gladilov, Gleb <gleb.gladilov@intel.com>
2020-09-04 14:33:10 +03:00 · 2020-09-04 14:33:10 +03:00 · 4ecdd19653
commit 4ecdd19653
parent 9df59284bc
12 changed files with 231 additions and 23 deletions
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@ -109,6 +109,7 @@ struct CompilationConfig final {
    bool enableReplaceWithReduceMean = true;
    bool enableTensorIteratorUnrolling = false;
    bool forcePureTensorIterator = false;
+    bool enableMemoryTypesAnnotation = false;

    //
    // Deprecated options
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp
@ -246,6 +246,8 @@ public:

    Pass::Ptr propagateDynamism();

+    Pass::Ptr annotateMemoryTypes();
+
 protected:
    StageBuilder::Ptr _stageBuilder;
    BackEnd::Ptr _backEnd;
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
@ -35,6 +35,7 @@ DECLARE_VPU_CONFIG(MYRIAD_PERF_REPORT_MODE);
 DECLARE_VPU_CONFIG(MYRIAD_PER_LAYER);
 DECLARE_VPU_CONFIG(MYRIAD_PER_STAGE);

+DECLARE_VPU_CONFIG(MYRIAD_ENABLE_MEMORY_TYPES_ANNOTATION);
 //
 // Debug options
 //
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
@ -344,6 +344,11 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
    ADD_PASS(markFastStages);
    ADD_DUMP_PASS("markFastStages");

+    if (env.config.enableMemoryTypesAnnotation) {
+        ADD_PASS(annotateMemoryTypes);
+        ADD_DUMP_PASS("annotateMemoryTypes");
+    }
+
    //
    // Final check
    //
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/annotate_memory_types.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/annotate_memory_types.cpp
@ -0,0 +1,39 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/middleend/pass_manager.hpp"
+
+namespace vpu {
+
+namespace {
+
+template<class DataObjects>
+std::vector<vpu::MemoryType> gather(const DataObjects& dataObjects) {
+    std::vector<vpu::MemoryType> types;
+    types.reserve(dataObjects.size());
+    std::transform(dataObjects.begin(), dataObjects.end(), std::back_inserter(types), [](const Data& data) { return data->memReqs(); });
+    return types;
+}
+
+class PassImpl final : public Pass {
+public:
+    void run(const Model& model) override {
+        for (const auto& stage : model->getStages()) {
+            std::stringstream suffix;
+            suffix << "@";
+            printTo(suffix, gather(stage->inputs()));
+            suffix << "->";
+            printTo(suffix, gather(stage->outputs()));
+            stage->appendNamePostfix(suffix.str());
+        }
+    }
+};
+
+}  // namespace
+
+Pass::Ptr PassManager::annotateMemoryTypes() {
+    return std::make_shared<PassImpl>();
+}
+
+}  // namespace vpu
--- a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
@ -227,6 +227,7 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
    setOption(_printReceiveTensorTime,                       switches, config, ie::MYRIAD_ENABLE_RECEIVING_TENSOR_TIME);
    setOption(_perfCount,                                    switches, config, CONFIG_KEY(PERF_COUNT));
    setOption(_perfReport,                                perfReports, config, ie::MYRIAD_PERF_REPORT_MODE);
+    setOption(_compileConfig.enableMemoryTypesAnnotation,    switches, config, ie::MYRIAD_ENABLE_MEMORY_TYPES_ANNOTATION);

 IE_SUPPRESS_DEPRECATED_START
    setOption(_compileConfig.hwOptimization,                 switches, config, VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION));
--- a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
@ -51,7 +51,7 @@ void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const
            "data usage, actual: {}", layer->name, layer->type, 0, data->name(), data->usage());
        const auto& origData = dataOutput->origData();
        VPU_THROW_UNLESS(origData != nullptr,
-            "Parsing layer {} of type {} failed: output data {} must have original IE data",
+            "Parsing layer {} of type {} failed: output data with index {} (of name {}) must have original IE data",
            layer->name, layer->type, 0, dataOutput->name());

        bindData(data, origData);
@ -60,7 +60,7 @@ void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const
    } else {
        VPU_THROW_UNLESS(data->usage() == DataUsage::Intermediate,
            "Parsing layer {} of type {} failed: if input with index {} (of name {}) has a producer, it must have Intermediate "
-            "data usage, actual: ", layer->name, layer->type, 0, data->name(), data->usage());
+            "data usage, actual: {}", layer->name, layer->type, 0, data->name(), data->usage());

        if (auto dataToShapeEdge = data->parentDataToShapeEdge()) {
            const auto& parent = dataToShapeEdge->parent();
--- a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
@ -82,4 +82,39 @@ std::vector<std::pair<std::vector<size_t>, std::vector<elementTypeVector>>>
    return resVec;
 }

+inline bool endsWith(const std::string& source, const std::string& expectedSuffix) {
+    return expectedSuffix.size() <= source.size() && source.compare(source.size() - expectedSuffix.size(), expectedSuffix.size(), expectedSuffix) == 0;
+}
+
+template<std::size_t... I>
+struct Indices {
+    using next = Indices<I..., sizeof...(I)>;
+};
+
+template<std::size_t Size>
+struct MakeIndices {
+    using value = typename MakeIndices<Size - 1>::value::next;
+};
+
+template<>
+struct MakeIndices<0> {
+    using value = Indices<>;
+};
+
+template<class Tuple>
+constexpr typename MakeIndices<std::tuple_size<typename std::decay<Tuple>::type>::value>::value makeIndices() {
+    return {};
+}
+
+template<class Tuple, std::size_t... I>
+std::vector<typename std::tuple_element<0, typename std::decay<Tuple>::type>::type> tuple2Vector(Tuple&& tuple, Indices<I...>) {
+    using std::get;
+    return {{ get<I>(std::forward<Tuple>(tuple))... }};
+}
+
+template<class Tuple>
+inline auto tuple2Vector(Tuple&& tuple) -> decltype(tuple2Vector(std::declval<Tuple>(), makeIndices<Tuple>())) {
+    return tuple2Vector(std::forward<Tuple>(tuple), makeIndices<Tuple>());
+}
+
 }  // namespace CommonTestUtils
--- a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
+++ b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
@ -51,11 +51,11 @@ InputInfo InputInfo::fromNetwork(int ind) {
    return info;
 }

-InputInfo InputInfo::fromPrevStage(int ind) {
+InputInfo InputInfo::fromPrevStage(int ind, int outputInd) {
    InputInfo info;
    info.type = InputType::PrevStageOutput;
    info.prevStageInd = ind;
-    info.prevStageOutputInd = 0;
+    info.prevStageOutputInd = outputInd;
    return info;
 }

@ -79,6 +79,14 @@ OutputInfo OutputInfo::intermediate(const DataDesc& desc) {
    return info;
 }

+OutputInfo OutputInfo::intermediate(MemoryType memReq) {
+    OutputInfo info;
+    info.type = OutputType::Intermediate;
+    info.desc = DataDesc{};
+    info.memReq = memReq;
+    return info;
+}
+
 void TestStage::propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) {
    setInOutPortInfo(this, "DataOrder", orderInfo);
 }
@ -124,7 +132,8 @@ const StageVector& TestModel::getStages() const {
    return _stages;
 }

-void TestModel::createInputs(std::vector<DataDesc> inputDescs) {
+void TestModel::createInputs(std::vector<DataDesc> descriptors) {
+    const auto& inputDescs = descriptors.empty() ? std::vector<DataDesc>{DataDesc{}} : descriptors;
    const auto numInputs = inputDescs.size();

    _model->attrs().set<int>("numInputs", numInputs);
@ -135,7 +144,8 @@ void TestModel::createInputs(std::vector<DataDesc> inputDescs) {
    }
 }

-void TestModel::createOutputs(std::vector<DataDesc> outputDescs) {
+void TestModel::createOutputs(std::vector<DataDesc> descriptors) {
+    const auto& outputDescs = descriptors.empty() ? std::vector<DataDesc>{DataDesc{}} : descriptors;
    const auto numOutputs = outputDescs.size();

    _model->attrs().set<int>("numOutputs", numOutputs);
@ -146,9 +156,7 @@ void TestModel::createOutputs(std::vector<DataDesc> outputDescs) {
    }
 }

-Stage TestModel::addStage(
-        std::initializer_list<InputInfo> curInputInfos,
-        std::initializer_list<OutputInfo> curOutputInfos) {
+Stage TestModel::addStage(const std::vector<InputInfo>& curInputInfos, const std::vector<OutputInfo>& curOutputInfos) {
    DataVector curInputs;
    for (const auto& info : curInputInfos) {
        if (info.type == InputType::Original) {
@ -163,7 +171,9 @@ Stage TestModel::addStage(
        if (info.type == OutputType::Original) {
            curOutputs.push_back(_outputs.at(info.originalOutputInd));
        } else {
-            curOutputs.push_back(_model->addNewData(formatString("Data %d / %d", _stages.size(), curOutputs.size()), info.desc));
+            auto data = _model->addNewData(formatString("Data %d / %d", _stages.size(), curOutputs.size()), info.desc);
+            data->setMemReqs(info.memReq);
+            curOutputs.push_back(std::move(data));
        }
    }

--- a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.hpp
+++ b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.hpp
@ -57,7 +57,7 @@ struct InputInfo final {

    static InputInfo fromNetwork(int ind = 0);

-    static InputInfo fromPrevStage(int ind);
+    static InputInfo fromPrevStage(int ind, int outputInd = 0);

    InputInfo& output(int ind);
 };
@ -71,10 +71,12 @@ struct OutputInfo final {
    OutputType type = OutputType::Original;
    int originalOutputInd = -1;
    DataDesc desc = DataDesc();
+    MemoryType memReq = MemoryType::DDR;

    static OutputInfo fromNetwork(int ind = 0);

    static OutputInfo intermediate(const DataDesc& desc = DataDesc());
+    static OutputInfo intermediate(MemoryType memReq = MemoryType::DDR);
 };

 class TestModel final {
@ -87,12 +89,10 @@ public:
    const DataVector& getOutputs() const;
    const StageVector& getStages() const;

-    void createInputs(std::vector<DataDesc> inputDescs);
-    void createOutputs(std::vector<DataDesc> outputDescs);
+    void createInputs(std::vector<DataDesc> inputDescs = {});
+    void createOutputs(std::vector<DataDesc> outputDescs = {});

-    Stage addStage(
-            std::initializer_list<InputInfo> curInputInfos,
-            std::initializer_list<OutputInfo> curOutputInfos);
+    Stage addStage(const std::vector<InputInfo>& curInputInfos, const std::vector<OutputInfo>& curOutputInfos);

    void setStageDataOrderInfo(
            int stageInd,
--- a/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/annotate_memory_types.cpp
+++ b/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/annotate_memory_types.cpp
@ -0,0 +1,116 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "graph_transformer_tests.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+namespace vpu {
+
+namespace ie = InferenceEngine;
+
+namespace {
+
+using TestParam = std::tuple<
+    // input MemoryType of first stage is always DDR
+    std::tuple<MemoryType, MemoryType, MemoryType>, // outputs MemoryTypes for first stage
+    std::tuple<MemoryType, MemoryType>              // outputs MemoryTypes for second stage
+    // output MemoryType of third stage is always DDR
+>;
+
+}
+
+class AnnotateMemoryTypes : public GraphTransformerTest, public testing::WithParamInterface<TestParam> {
+protected:
+    void SetUp() override {
+        ASSERT_NO_FATAL_FAILURE(GraphTransformerTest::SetUp());
+        config.enableMemoryTypesAnnotation = true;
+
+        ASSERT_NO_FATAL_FAILURE(InitCompileEnv());
+        ASSERT_NO_FATAL_FAILURE(InitPipeline());
+
+        const auto& parameters = GetParam();
+        const auto& firstStageOutputs = CommonTestUtils::tuple2Vector(std::get<0>(parameters));
+        const auto& secondStageOutputs = CommonTestUtils::tuple2Vector(std::get<1>(parameters));
+        ASSERT_NO_FATAL_FAILURE(InitModel(firstStageOutputs, secondStageOutputs));
+        ASSERT_NO_FATAL_FAILURE(Compile());
+        ASSERT_NO_FATAL_FAILURE(Validate(firstStageOutputs, secondStageOutputs));
+    }
+
+    void Compile() {
+        m_pipeline.run(m_testModel.getBaseModel());
+    }
+
+    void Validate(const std::vector<MemoryType>& firstStageOutputs, const std::vector<MemoryType>& secondStageOutputs) {
+        const auto& stages = m_testModel.getStages();
+        ASSERT_EQ(stages.size(), 3);
+
+        const auto& stage0 = stages.front();
+        const auto& stage1 = stages[1];
+        const auto& stage2 = stages.back();
+
+        ASSERT_TRUE(CommonTestUtils::endsWith(stage0->name(), "@[DDR]->" + GenerateSuffix(firstStageOutputs)));
+        ASSERT_TRUE(CommonTestUtils::endsWith(stage1->name(), "@" + GenerateSuffix(firstStageOutputs) + "->" + GenerateSuffix(secondStageOutputs)));
+        ASSERT_TRUE(CommonTestUtils::endsWith(stage2->name(), "@" + GenerateSuffix(secondStageOutputs) + "->[DDR]"));
+    }
+
+protected:
+    TestModel m_testModel;
+
+private:
+    void InitModel(const std::vector<MemoryType>& firstStageOutputs, const std::vector<MemoryType>& secondStageOutputs) {
+        m_testModel = CreateTestModel();
+
+        m_testModel.createInputs();
+        m_testModel.createOutputs();
+
+        const auto generateInputs = [](const std::vector<MemoryType>& inputsMemoryTypes, std::size_t prevStageIndex) {
+            std::vector<int> indices(inputsMemoryTypes.size());
+            std::iota(indices.begin(), indices.end(), 0);
+            std::vector<InputInfo> inputs;
+            std::transform(indices.cbegin(), indices.cend(), std::back_inserter(inputs),
+                [&prevStageIndex](int index) { return InputInfo::fromPrevStage(static_cast<int>(prevStageIndex), index); });
+            return inputs;
+        };
+
+        const auto generateOutputs = [](const std::vector<MemoryType>& outputsMemoryTypes) {
+            std::vector<OutputInfo> outputs;
+            std::transform(outputsMemoryTypes.cbegin(), outputsMemoryTypes.cend(), std::back_inserter(outputs),
+                [](MemoryType type) { return OutputInfo::intermediate(type); });
+            return outputs;
+        };
+
+        m_testModel.addStage({InputInfo::fromNetwork()}, generateOutputs(firstStageOutputs));
+        m_testModel.addStage(generateInputs(firstStageOutputs, 0), generateOutputs(secondStageOutputs));
+        m_testModel.addStage(generateInputs(secondStageOutputs, 1), {OutputInfo::fromNetwork()});
+    }
+
+    void InitPipeline() {
+        m_pipeline = PassSet();
+        m_pipeline.addPass(passManager->annotateMemoryTypes());
+    }
+
+    template<class T>
+    static std::string GenerateSuffix(const T& outputs) {
+        std::stringstream suffix;
+        printTo(suffix, outputs);
+        return suffix.str();
+    }
+
+    PassSet m_pipeline;
+};
+
+TEST_P(AnnotateMemoryTypes, SubgraphOf3Stages) {
+}
+
+INSTANTIATE_TEST_CASE_P(unit, AnnotateMemoryTypes, testing::Combine(
+    testing::Combine(
+        testing::Values(MemoryType::DDR, MemoryType::CMX),
+        testing::Values(MemoryType::DDR, MemoryType::CMX),
+        testing::Values(MemoryType::DDR, MemoryType::CMX)),
+    testing::Combine(
+        testing::Values(MemoryType::DDR, MemoryType::CMX),
+        testing::Values(MemoryType::DDR, MemoryType::CMX))
+));
+
+} // namespace vpu
--- a/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/split_large_kernel_conv_tests.cpp
+++ b/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/split_large_kernel_conv_tests.cpp
@ -14,11 +14,7 @@ class VPU_SplitLargeKernelConvTest : public GraphTransformerTest {
    Model model;

 public:
-    void InitConvStage(
-        int inputX = 8960,
-        int inputY = 1,
-        bool isOutput4D = true) {
-
+    void InitConvStage(int inputX = 8960, int inputY = 1, bool isOutput4D = true) {
        int kernelx = 16;
        int kernely = 1;
        int kernelStrideX = 1;
@ -97,9 +93,11 @@ class VPU_SplitLargeKernelConvTest : public GraphTransformerTest {
    }
 };

-TEST_F(VPU_SplitLargeKernelConvTest, splitLargeKernelConvIfKernelSizeIs1x16) {
+// Test is going to fail if target convolution is not converted to HW stage
+// Conversion to HW stage fails due to #-33366
+TEST_F(VPU_SplitLargeKernelConvTest, DISABLED_splitLargeKernelConvIfKernelSizeIs1x16) {
    InitCompileEnv();
    InitConvStage();

    ASSERT_NO_THROW(pipeline.run(model));
-}
+}