[GNA] Improve CNN2D kernels/weights quantization (#5370)

* Introduce heuristic for weight reducer * handle kernels 3x3 and 7x1 to avoid saturation * enable tests * issue 54140
2021-05-13 15:31:23 +02:00 · 2021-05-13 15:31:23 +02:00 · 05b62258c7
commit 05b62258c7
parent 9248a5887d
4 changed files with 58 additions and 8 deletions
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -14,6 +14,7 @@
 #include <legacy/ie_layers.h>
 #include "gna_upstream_iterator.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "layers/gna_convolution_layer.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_slope_scale.h"
 #include "runtime/pwl.h"
@ -1107,8 +1108,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
            double weights_reducer = 1.0;
            auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
            if (conv) {
-                auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
-                weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
+                const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+                weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
+                weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
                weights_reducer = std::max(1.0, weights_reducer);
            }
            quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -30,6 +30,7 @@
 #include "frontend/model_quantizer.hpp"
 #include "layers/layers_builder.hpp"
 #include "layers/gna_concat_layer.hpp"
+#include "layers/gna_convolution_layer.hpp"
 #include "layers/gna_crop_layer.hpp"
 #include "layers/gna_fake_quantize_layer.hpp"
 #include "round_float_define.hpp"
@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
    }

    // Map 2d convolution to 1d if it's possible
-    if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) {
+    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
        in_width *= in_height;
        in_height = 1;
        out_width *= out_height;
@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
        dnn->new_num_conv_columns = 0;
    }

-    // TODO: refine following condition
-    if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
-        (convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
+    if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
        in_height != 1) {
        // TensorFlow default layout is NHWC
        // OpenVino Default layout is   NCHW
--- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
@ -0,0 +1,49 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include <legacy/ie_layers.h>
+#include "../gna_graph_tools.hpp"
+
+namespace GNAPluginNS {
+struct GNAConvolutionLayer {
+    static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
+        return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
+    }
+
+    // 3D input or 2D kernel
+    static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
+                     const uint32_t kernelHeight, const uint32_t kernelWidth) {
+        return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
+    }
+
+    static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
+        using KRT = std::pair<uint32_t, double>;
+        // Empirically determined weights reducers for 2D Convolution
+        // i.e.:
+        // for kernelSize >= 9       -> 1.3
+        // for kernelSize in {7, 8}  -> 1.2
+        const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
+        auto reducer = 1.0;
+        const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
+        const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
+        const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
+        if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
+             !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
+            const auto kernelSize = conv._kernel_x * conv._kernel_y;
+            auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
+                [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
+            if (r != reducers.end())
+                reducer = r->second;
+        }
+        return reducer;
+    }
+};
+}  // namespace GNAPluginNS
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
@ -71,8 +71,8 @@ const std::vector<std::vector<size_t >> kernels2D = {
                                                          {1, 3},
                                                          {1, 2},
                                                          {2, 2},
-                                                          // {7, 1}, TODO: fix accuracy failures, see issue 54140
-                                                          // {3, 3}, TODO: fix accuracy failures, see issue 54140
+                                                          {7, 1},
+                                                          {3, 3},
 };
 const std::vector<std::vector<size_t >> strides2D = {
                                                          {1, 1},