[GNA] Remove extra reshape before maxpool. Fix activation and maxpool reordering. (#5404)

Fix convolution input transposition for Kaldi models with FakeQuantise layers. Fix floating point error in gnaFuncTests with debug logs.
2021-04-30 15:42:27 +03:00 · 2021-04-30 15:42:27 +03:00 · 22e4566faa
commit 22e4566faa
parent bcb67bfb6a
9 changed files with 238 additions and 9 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
    size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
    auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
    // Skip FakeQuantize and ScaleShift between Convolution and Input
    if (LayerInfo(connectedInputLayer).isFakeQuantize()) {
            connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) {
            return LayerInfo(l).isScaleShift();
        });
    }
    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
    if (!dnn->do_rotate_input) {
@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
        ptr_weights,
        ptr_biases);
    currentComponent.num_bytes_per_input = inputs->getPrecision().size();
    currentComponent.num_bytes_per_output = outputs->getPrecision().size();
    if (inputs->getLayout() == Layout::NHWC) {
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -56,6 +56,8 @@
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
 #include "transformations/remove_extra_reshapes.hpp"
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
        manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
        manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
        manager.register_pass<RemoveExtraReshapes>();
        // UnrollTI should be the last transformation in the transformation pipeline
        manager.register_pass<ngraph::pass::UnrollTensorIterator>();
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -371,19 +371,21 @@ namespace {
 void ReorderMaxPoolPass::run() {
    // detecting following pattern
-    // conv->relu->maxpooling
+    // conv->activation->maxpooling
-    // changing it to conv->maxpooling->relu
+    // changing it to conv->maxpooling->activation
    for (auto & l : *pLayers) {
        auto pool = LayerInfo(l);
        if (!pool.isMaxPooling()) continue;
        // don't reorder if pooling is 2D for CNN2D
        auto pooling = dynamic_cast<PoolingLayer*>(l.get());
-        if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue;
+        // todo: return the check for stride after it'll be fixed in MO for Kaldi models
        if (pooling == nullptr || (is2D(pooling->_kernel))) continue;
        // checking prev layer type
-        auto activation = LayerInfo(CNNNetPrevLayer(l));
+        auto actLayer = CNNNetPrevLayer(l);
-        if (!activation.isActivation()) continue;
+        auto activation = LayerInfo(actLayer);
        if (!activation.isActivation() || actLayer->insData.size() > 1) continue;
        // if activation came from convolution
        auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
--- a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
@ -0,0 +1,31 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "transformations/remove_extra_reshapes.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 using namespace GNAPluginNS;
 NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
 RemoveExtraReshapes::RemoveExtraReshapes() {
    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>();
    const auto pooling = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({reshape});
    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
        const auto& pattern_map = m.get_pattern_value_map();
        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
        if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
            return false;
        }
        ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
        return true;
    };
    auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
    this->register_matcher(m, callback);
 }
--- a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
@ -0,0 +1,20 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <ngraph/pass/graph_rewrite.hpp>
 namespace GNAPluginNS {
 /**
 * @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7.
 */
 class RemoveExtraReshapes : public ngraph::pass::MatcherPass {
 public:
  NGRAPH_RTTI_DECLARATION;
  RemoveExtraReshapes();
 };
 } // namespace GNAPluginNS
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
@ -0,0 +1,148 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <memory>
 #include <tuple>
 #include <vector>
 #include <string>
 #include <ie_core.hpp>
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "shared_test_classes/base/layer_test_utils.hpp"
 #include "functional_test_utils/blob_utils.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ngraph_functions/pass/convert_prc.hpp"
 typedef std::tuple<
    InferenceEngine::Precision,         // Network Precision
    std::string,                        // Target Device
    std::map<std::string, std::string>, // Configuration
    std::vector<size_t>,                // Input Shape
    std::pair<float, float>,            // Input Min and Max
    size_t                              // Levels
 > fqMaxpoolReorderingParams;
 namespace LayerTestsDefinitions {
 class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
    public LayerTestsUtils::LayerTestsCommon {
    float inputDataMin = 0.0f;
    float inputDataMax = 0.0f;
    float inputDataResolution = 1.0f;
 public:
    static std::string getTestCaseName(testing::TestParamInfo<fqMaxpoolReorderingParams> obj) {
        InferenceEngine::Precision netPrecision;
        std::string targetDevice;
        std::map<std::string, std::string> configuration;
        std::vector<size_t> inputShape;
        std::pair<float, float> inputMinMax;
        size_t levels = 0;
        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
        std::ostringstream result;
        result << "netPRC=" << netPrecision.name() << "_";
        result << "targetDevice=" << targetDevice << "_";
        for (auto const& configItem : configuration) {
            result << "_configItem=" << configItem.first << "_" << configItem.second;
        }
        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
        result << "_levels=" << levels;
        return result.str();
    }
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
    }
 protected:
    void SetUp() override {
        InferenceEngine::Precision netPrecision;
        std::vector<size_t> inputShape;
        std::pair<float, float> inputMinMax;
        size_t levels = 0;
        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.first });
        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.second });
        auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});
        auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
        auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
        auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
            convLowNode, convHighNode, convLowNode, convHighNode, levels);
        auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
        auto conv = std::make_shared<ngraph::opset1::Convolution>(inputFQ, convWeightsFQ, std::vector<size_t>{ 1, 1 },
                                                                std::vector<ptrdiff_t>{ 0, 0 }, std::vector<ptrdiff_t>{ 0, 0 },
                                                                std::vector<size_t>{ 1, 1 },
                                                                ngraph::op::PadType::VALID);
        auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector<float>{ 0.0f });
        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
        auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
        auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
                                                    ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
        function = std::make_shared<ngraph::Function>(results, inputVector, "FQMaxPoolReorder");
    }
 };
 TEST_P(FQMaxpoolReordering, CompareWithRefImpl) {
    Run();
 };
 const std::vector<InferenceEngine::Precision> netPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16
 };
 const std::vector<std::map<std::string, std::string>> configs = {
    {
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
    }
 };
 const std::vector<std::vector<size_t>> inputShape = {
    {1, 1, 1, 1024},
    {1, 8, 1, 168},
 };
 const std::vector<std::pair<float, float>> inputMinMax = {
    {-0.5, 0.5},
    {-2, 2},
    {-8, 8}
 };
 const std::vector<size_t> levels = {
    65535,
 };
 INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
        ::testing::ValuesIn(configs),
        ::testing::ValuesIn(inputShape),
        ::testing::ValuesIn(inputMinMax),
        ::testing::ValuesIn(levels)),
    FQMaxpoolReordering::getTestCaseName);
 } // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine(
        ::testing::ValuesIn(outputChannels)
 );
 const std::vector<bool> permute = {false, true};
 INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest,
                        ::testing::Combine(
                                fqParams,
                                convParams,
                                ::testing::ValuesIn(permute),
                                ::testing::ValuesIn(netPrecisions),
                                ::testing::ValuesIn(inputShapes),
                                ::testing::Values(CommonTestUtils::DEVICE_GNA),
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
@ -30,6 +30,7 @@ typedef std::tuple<
 typedef std::tuple<
        FqSpecificParams,
        ConvParams,
        bool,                              // Permute after convolution
        InferenceEngine::Precision,        // Net precision
        InferenceEngine::SizeVector,       // Input shapes
        LayerTestsUtils::TargetDevice,     // Device name
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions {
 std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) {
    FqSpecificParams fqParams;
    ConvParams convParams;
    bool permute;
    InferenceEngine::Precision netPrecision;
    InferenceEngine::SizeVector inputShapes;
    std::string targetDevice;
    std::map<std::string, std::string> config;
-    std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
+    std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param;
    std::vector<size_t> levels;
    std::vector<float> inputArg;
@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqA
    result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
    result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
    result << "IC=" << inputChannels << "_";
-    result << "OC=" << outputChannels;
+    result << "OC=" << outputChannels << "_";
    result << "permute=" << permute << "\n";
    return result.str();
 }
 void FqConvFqAffineTest::SetUp() {
    FqSpecificParams fqParams;
    ConvParams convParams;
    bool permute;
    std::vector<size_t> inputShape;
    std::map<std::string, std::string> config;
    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
-    std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
+    std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam();
    configuration.insert(config.begin(), config.end());
    std::vector<size_t> levels;
@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() {
    auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1;
    std::vector<size_t> outFormShapes = {1,  outputChannels * widthAfterConv * heightAfterConv };
    ngraph::Output<ngraph::Node> nodeBeforeReshape;
    if (permute) {
        auto permuteOrder = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64,
                                                                       ngraph::Shape{4},
                                                                       ngraph::Shape{{0, 3, 2, 1}});
        auto transpose = std::make_shared<ngraph::opset1::Transpose>(add, permuteOrder);
        nodeBeforeReshape = transpose;
    } else {
        nodeBeforeReshape = add;
    }
    auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
-    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(add, reshapePattern2, false);
+    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(nodeBeforeReshape, reshapePattern2, false);
    auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f });
    auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});