[GNA] Fixed calculation of input scale factor and search of the next layer for FQ (#7246)

* [GNA] Fixed search of the next layer for FQ * [GNA] Fixed calculation of input scale factor for POT-quantized model in the case if the first layer after input is activation
2021-08-26 15:02:23 +03:00
parent 1a656f4e44
commit 2dcd09055f
4 changed files with 122 additions and 11 deletions
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -483,7 +483,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
            auto fp32eq = [](float p1, float p2) -> bool {
                return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
            };
-            float scaleInput = (fqLayer.getLevels() - 1) / (inputRange.second[0] - inputRange.first[0]);
+            // GNA input is always quantized to int16, so number of levels can't be greater than max uint16
+            size_t levels = std::min(fqLayer.getLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max()));
+            float scaleInput = (levels - 1) / (inputRange.second[0] - inputRange.first[0]);
            auto minAbsVal = std::min(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
            auto maxAbsVal = std::max(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
            if (fp32eq(minAbsVal, 0.0f) && !fp32eq(maxAbsVal, 0.0f)) {
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -1967,8 +1967,8 @@ void FuseFQIntoWeightsPass::run() {
            layers_connected_to_fq_count = inputTo.size();
        }
        for (int index = 0; index < layers_connected_to_fq_count; index++) {
-            auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first;
-            if (!LayerInfo(weightableLayer).isWeightable()) {
+            auto weightableLayer = CNNNetCheckNextLayerSkipCertain(layerBeforeWeightable, 0, index, true, isNonFunctional).first;
+            if (!weightableLayer || !LayerInfo(weightableLayer).isWeightable()) {
                continue;
            }
            if (weightableLayer->insData.size() < 2) {
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
@@ -25,7 +25,7 @@ typedef std::tuple<
    std::map<std::string, std::string>, // Configuration
    std::vector<size_t>,                // Input Shape
    std::pair<float, float>,            // Input Min and Max
-    size_t                              // Levels
+    std::pair<size_t, size_t>           // Levels for input and output FQs
 > fqActivationParams;

 namespace LayerTestsDefinitions {
@@ -43,7 +43,7 @@ public:
        std::map<std::string, std::string> configuration;
        std::vector<size_t> inputShape;
        std::pair<float, float> inputMinMax;
-        size_t levels = 0;
+        std::pair<size_t, size_t> levels;
        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;

        std::ostringstream result;
@@ -54,7 +54,7 @@ public:
        }
        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
-        result << "_levels=" << levels;
+        result << "_levels=" << levels.first << "," << levels.second;

        return result.str();
    }
@@ -69,20 +69,21 @@ protected:

        std::vector<size_t> inputShape;
        std::pair<float, float> inputMinMax;
-        size_t levels = 0;
+        std::pair<size_t, size_t> levels;
        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

+        std::tie(inputDataMin, inputDataMax) = inputMinMax;
        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.first });
        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.second });

        auto inputVector = ngraph::builder::makeParams(ngPrc, { inputShape });
        auto inputFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
-            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.first);

        auto relu = ngraph::builder::makeActivation(inputFQNode, ngraph::element::f32, ngraph::helpers::ActivationTypes::Relu);
        auto reluFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(relu,
-            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.second);

        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reluFQNode) };
        function = std::make_shared<ngraph::Function>(results, inputVector, "FQActivation");
@@ -118,8 +119,9 @@ const std::vector<std::pair<float, float>> inputMinMax = {
    {-100, 100},
 };

-const std::vector<size_t> levels = {
-    65535,
+const std::vector<std::pair<size_t, size_t>> levels = {
+    {std::numeric_limits<uint16_t>::max(), std::numeric_limits<uint16_t>::max()},
+    {std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint16_t>::max()}
 };

 INSTANTIATE_TEST_CASE_P(smoke_fq_activation, FQActivation,
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_with_multiple_out_connections.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_with_multiple_out_connections.cpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>  // Configuration
+> fqWithMultipleOutConnectionsParams;
+
+namespace LayerTestsDefinitions {
+
+class FQWithMultipleOutConnections : public testing::WithParamInterface<fqWithMultipleOutConnectionsParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqWithMultipleOutConnectionsParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::tie(netPrecision, targetDevice, configuration) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        const ngraph::Shape shape = {1, 128};
+        auto params = ngraph::builder::makeParams(ngPrc, {shape});
+
+        auto pattern1 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{3},
+            ngraph::Shape{1, 2, 64});
+        auto reshape1 = std::make_shared<ngraph::opset7::Reshape>(params[0], pattern1, false);
+
+        auto relu1 = std::make_shared<ngraph::opset7::Relu>(reshape1);
+
+        auto lowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -10.0f });
+        auto highNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { 10.0f });
+        auto fq = std::make_shared<ngraph::opset7::FakeQuantize>(relu1, lowNode, highNode, lowNode, highNode,
+            std::numeric_limits<uint16_t>::max());
+
+        auto pattern2 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{shape.size()},
+            shape);
+        auto reshape2 = std::make_shared<ngraph::opset7::Reshape>(fq, pattern2, false);
+
+        auto relu2 = std::make_shared<ngraph::opset7::Relu>(fq);
+        auto reshape3 = std::make_shared<ngraph::opset7::Reshape>(relu2, pattern2, false);
+
+        ngraph::ResultVector results{std::make_shared<ngraph::opset7::Result>(reshape2),
+                                     std::make_shared<ngraph::opset7::Result>(reshape3)};
+        function = std::make_shared<ngraph::Function>(results, params, "FQFusionWithMultipleWeights");
+    }
+};
+
+TEST_P(FQWithMultipleOutConnections, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    },
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_FP32"},
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_fq_fusion, FQWithMultipleOutConnections,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs)),
+    FQWithMultipleOutConnections::getTestCaseName);
+} // namespace LayerTestsDefinitions