From 05b62258c78e55543f3864be4260745dc2013045 Mon Sep 17 00:00:00 2001
From: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com>
Date: Thu, 13 May 2021 15:31:23 +0200
Subject: [PATCH] [GNA] Improve CNN2D kernels/weights quantization (#5370)

* Introduce heuristic for weight reducer
     * handle kernels 3x3 and 7x1 to avoid saturation
     * enable tests
     * issue 54140
---
 .../gna_plugin/frontend/scale_factor_calc.hpp |  6 ++-
 .../src/gna_plugin/gna_graph_compiler.cpp     |  7 ++-
 .../layers/gna_convolution_layer.hpp          | 49 +++++++++++++++++++
 .../single_layer_tests/convolution.cpp        |  4 +-
 4 files changed, 58 insertions(+), 8 deletions(-)
 create mode 100644 inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp

diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 1a4dabd2db6..be31c9fabd1 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -14,6 +14,7 @@
 #include <legacy/ie_layers.h>
 #include "gna_upstream_iterator.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "layers/gna_convolution_layer.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_slope_scale.h"
 #include "runtime/pwl.h"
@@ -1107,8 +1108,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
             double weights_reducer = 1.0;
             auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
             if (conv) {
-                auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
-                weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
+                const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+                weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
+                weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
                 weights_reducer = std::max(1.0, weights_reducer);
             }
             quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 541bd142c3e..b085dcef7f4 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -30,6 +30,7 @@
 #include "frontend/model_quantizer.hpp"
 #include "layers/layers_builder.hpp"
 #include "layers/gna_concat_layer.hpp"
+#include "layers/gna_convolution_layer.hpp"
 #include "layers/gna_crop_layer.hpp"
 #include "layers/gna_fake_quantize_layer.hpp"
 #include "round_float_define.hpp"
@@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
     }
 
     // Map 2d convolution to 1d if it's possible
-    if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) {
+    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
         in_width *= in_height;
         in_height = 1;
         out_width *= out_height;
@@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
         dnn->new_num_conv_columns = 0;
     }
 
-    // TODO: refine following condition
-    if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
-        (convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
+    if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
         in_height != 1) {
         // TensorFlow default layout is NHWC
         // OpenVino Default layout is   NCHW
diff --git a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
new file mode 100644
index 00000000000..e83d9b6c535
--- /dev/null
+++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include <legacy/ie_layers.h>
+#include "../gna_graph_tools.hpp"
+
+namespace GNAPluginNS {
+struct GNAConvolutionLayer {
+    static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
+        return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
+    }
+
+    // 3D input or 2D kernel
+    static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
+                     const uint32_t kernelHeight, const uint32_t kernelWidth) {
+        return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
+    }
+
+    static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
+        using KRT = std::pair<uint32_t, double>;
+        // Empirically determined weights reducers for 2D Convolution
+        // i.e.:
+        // for kernelSize >= 9       -> 1.3
+        // for kernelSize in {7, 8}  -> 1.2
+        const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
+        auto reducer = 1.0;
+        const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
+        const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
+        const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
+        if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
+             !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
+            const auto kernelSize = conv._kernel_x * conv._kernel_y;
+            auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
+                [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
+            if (r != reducers.end())
+                reducer = r->second;
+        }
+        return reducer;
+    }
+};
+}  // namespace GNAPluginNS
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
index 79151f34c3b..6605857612a 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -71,8 +71,8 @@ const std::vector<std::vector<size_t >> kernels2D = {
                                                           {1, 3},
                                                           {1, 2},
                                                           {2, 2},
-                                                          // {7, 1}, TODO: fix accuracy failures, see issue 54140
-                                                          // {3, 3}, TODO: fix accuracy failures, see issue 54140
+                                                          {7, 1},
+                                                          {3, 3},
 };
 const std::vector<std::vector<size_t >> strides2D = {
                                                           {1, 1},