[GNA] Remove extra reshape before maxpool. Fix activation and maxpool reordering. (#5404)

Fix convolution input transposition for Kaldi models with FakeQuantise layers. Fix floating point error in gnaFuncTests with debug logs.
2021-04-30 15:42:27 +03:00 · 2021-04-30 15:42:27 +03:00 · 22e4566faa
commit 22e4566faa
parent bcb67bfb6a
9 changed files with 238 additions and 9 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
    size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();

    auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
+    // Skip FakeQuantize and ScaleShift between Convolution and Input
+    if (LayerInfo(connectedInputLayer).isFakeQuantize()) {
+            connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) {
+            return LayerInfo(l).isScaleShift();
+        });
+    }

    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
    if (!dnn->do_rotate_input) {
@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
        ptr_weights,
        ptr_biases);

+    currentComponent.num_bytes_per_input = inputs->getPrecision().size();
    currentComponent.num_bytes_per_output = outputs->getPrecision().size();

    if (inputs->getLayout() == Layout::NHWC) {
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -56,6 +56,8 @@
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>

+#include "transformations/remove_extra_reshapes.hpp"
+
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>

@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
        manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
        manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
+        manager.register_pass<RemoveExtraReshapes>();
        // UnrollTI should be the last transformation in the transformation pipeline
        manager.register_pass<ngraph::pass::UnrollTensorIterator>();

--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -371,19 +371,21 @@ namespace {

 void ReorderMaxPoolPass::run() {
    // detecting following pattern
-    // conv->relu->maxpooling
-    // changing it to conv->maxpooling->relu
+    // conv->activation->maxpooling
+    // changing it to conv->maxpooling->activation
    for (auto & l : *pLayers) {
        auto pool = LayerInfo(l);
        if (!pool.isMaxPooling()) continue;

        // don't reorder if pooling is 2D for CNN2D
        auto pooling = dynamic_cast<PoolingLayer*>(l.get());
-        if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue;
+        // todo: return the check for stride after it'll be fixed in MO for Kaldi models
+        if (pooling == nullptr || (is2D(pooling->_kernel))) continue;

        // checking prev layer type
-        auto activation = LayerInfo(CNNNetPrevLayer(l));
-        if (!activation.isActivation()) continue;
+        auto actLayer = CNNNetPrevLayer(l);
+        auto activation = LayerInfo(actLayer);
+        if (!activation.isActivation() || actLayer->insData.size() > 1) continue;

        // if activation came from convolution
        auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
--- a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/remove_extra_reshapes.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
+
+RemoveExtraReshapes::RemoveExtraReshapes() {
+    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>();
+    const auto pooling = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({reshape});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
+        if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
+            return false;
+        }
+
+        ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
@ -0,0 +1,20 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+/**
+ * @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7.
+ */
+class RemoveExtraReshapes : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  RemoveExtraReshapes();
+};
+
+} // namespace GNAPluginNS
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
@ -0,0 +1,148 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input Shape
+    std::pair<float, float>,            // Input Min and Max
+    size_t                              // Levels
+> fqMaxpoolReorderingParams;
+
+namespace LayerTestsDefinitions {
+
+class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+    float inputDataMin = 0.0f;
+    float inputDataMax = 0.0f;
+    float inputDataResolution = 1.0f;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqMaxpoolReorderingParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        result << "_levels=" << levels;
+
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.first });
+        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.second });
+
+        auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+        auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
+        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
+        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
+        auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
+            convLowNode, convHighNode, convLowNode, convHighNode, levels);
+        auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
+
+        auto conv = std::make_shared<ngraph::opset1::Convolution>(inputFQ, convWeightsFQ, std::vector<size_t>{ 1, 1 },
+                                                                std::vector<ptrdiff_t>{ 0, 0 }, std::vector<ptrdiff_t>{ 0, 0 },
+                                                                std::vector<size_t>{ 1, 1 },
+                                                                ngraph::op::PadType::VALID);
+        auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector<float>{ 0.0f });
+        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
+
+        auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
+                                                    ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
+        function = std::make_shared<ngraph::Function>(results, inputVector, "FQMaxPoolReorder");
+    }
+};
+
+TEST_P(FQMaxpoolReordering, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    }
+};
+
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 1, 1, 1024},
+    {1, 8, 1, 168},
+};
+
+const std::vector<std::pair<float, float>> inputMinMax = {
+    {-0.5, 0.5},
+    {-2, 2},
+    {-8, 8}
+};
+
+const std::vector<size_t> levels = {
+    65535,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape),
+        ::testing::ValuesIn(inputMinMax),
+        ::testing::ValuesIn(levels)),
+    FQMaxpoolReordering::getTestCaseName);
+} // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine(
        ::testing::ValuesIn(outputChannels)
 );

+const std::vector<bool> permute = {false, true};
+
 INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest,
                        ::testing::Combine(
                                fqParams,
                                convParams,
+                                ::testing::ValuesIn(permute),
                                ::testing::ValuesIn(netPrecisions),
                                ::testing::ValuesIn(inputShapes),
                                ::testing::Values(CommonTestUtils::DEVICE_GNA),
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
@ -30,6 +30,7 @@ typedef std::tuple<
 typedef std::tuple<
        FqSpecificParams,
        ConvParams,
+        bool,                              // Permute after convolution
        InferenceEngine::Precision,        // Net precision
        InferenceEngine::SizeVector,       // Input shapes
        LayerTestsUtils::TargetDevice,     // Device name
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions {
 std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) {
    FqSpecificParams fqParams;
    ConvParams convParams;
+    bool permute;
    InferenceEngine::Precision netPrecision;
    InferenceEngine::SizeVector inputShapes;
    std::string targetDevice;
    std::map<std::string, std::string> config;
-    std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
+    std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param;

    std::vector<size_t> levels;
    std::vector<float> inputArg;
@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqA
    result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
    result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
    result << "IC=" << inputChannels << "_";
-    result << "OC=" << outputChannels;
+    result << "OC=" << outputChannels << "_";
+    result << "permute=" << permute << "\n";
    return result.str();
 }

 void FqConvFqAffineTest::SetUp() {
    FqSpecificParams fqParams;
    ConvParams convParams;
+    bool permute;
    std::vector<size_t> inputShape;
    std::map<std::string, std::string> config;
    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
-    std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
+    std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam();
    configuration.insert(config.begin(), config.end());

    std::vector<size_t> levels;
@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() {
    auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1;
    std::vector<size_t> outFormShapes = {1,  outputChannels * widthAfterConv * heightAfterConv };

+    ngraph::Output<ngraph::Node> nodeBeforeReshape;
+    if (permute) {
+        auto permuteOrder = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64,
+                                                                       ngraph::Shape{4},
+                                                                       ngraph::Shape{{0, 3, 2, 1}});
+        auto transpose = std::make_shared<ngraph::opset1::Transpose>(add, permuteOrder);
+        nodeBeforeReshape = transpose;
+    } else {
+        nodeBeforeReshape = add;
+    }
+
    auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
-    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(add, reshapePattern2, false);
+    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(nodeBeforeReshape, reshapePattern2, false);

    auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f });
    auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});