From 05b62258c78e55543f3864be4260745dc2013045 Mon Sep 17 00:00:00 2001 From: Krzysztof Bruniecki Date: Thu, 13 May 2021 15:31:23 +0200 Subject: [PATCH] [GNA] Improve CNN2D kernels/weights quantization (#5370) * Introduce heuristic for weight reducer * handle kernels 3x3 and 7x1 to avoid saturation * enable tests * issue 54140 --- .../gna_plugin/frontend/scale_factor_calc.hpp | 6 ++- .../src/gna_plugin/gna_graph_compiler.cpp | 7 ++- .../layers/gna_convolution_layer.hpp | 49 +++++++++++++++++++ .../single_layer_tests/convolution.cpp | 4 +- 4 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index 1a4dabd2db6..be31c9fabd1 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -14,6 +14,7 @@ #include #include "gna_upstream_iterator.hpp" #include "layers/gna_layer_info.hpp" +#include "layers/gna_convolution_layer.hpp" #include "gna_plugin_log.hpp" #include "gna_slope_scale.h" #include "runtime/pwl.h" @@ -1107,8 +1108,9 @@ class ScaleFactorPerLayer { double weights_reducer = 1.0; auto conv = dynamic_cast(wl); if (conv) { - auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C); - weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits::max(); + const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C); + weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv); + weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits::max(); weights_reducer = std::max(1.0, weights_reducer); } quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer); diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 541bd142c3e..b085dcef7f4 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -30,6 +30,7 @@ #include "frontend/model_quantizer.hpp" #include "layers/layers_builder.hpp" #include "layers/gna_concat_layer.hpp" +#include "layers/gna_convolution_layer.hpp" #include "layers/gna_crop_layer.hpp" #include "layers/gna_fake_quantize_layer.hpp" #include "round_float_define.hpp" @@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) } // Map 2d convolution to 1d if it's possible - if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) { + if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) { in_width *= in_height; in_height = 1; out_width *= out_height; @@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) dnn->new_num_conv_columns = 0; } - // TODO: refine following condition - if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input - (convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel + if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) || in_height != 1) { // TensorFlow default layout is NHWC // OpenVino Default layout is NCHW diff --git a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp new file mode 100644 index 00000000000..e83d9b6c535 --- /dev/null +++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include "../gna_graph_tools.hpp" + +namespace GNAPluginNS { +struct GNAConvolutionLayer { + static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) { + return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1; + } + + // 3D input or 2D kernel + static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth, + const uint32_t kernelHeight, const uint32_t kernelWidth) { + return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1); + } + + static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) { + using KRT = std::pair; + // Empirically determined weights reducers for 2D Convolution + // i.e.: + // for kernelSize >= 9 -> 1.3 + // for kernelSize in {7, 8} -> 1.2 + const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} }; + auto reducer = 1.0; + const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C); + const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H); + const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W); + if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) && + !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) { + const auto kernelSize = conv._kernel_x * conv._kernel_y; + auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize, + [](const KRT& l, const KRT::first_type& r) {return l.first > r; }); + if (r != reducers.end()) + reducer = r->second; + } + return reducer; + } +}; +} // namespace GNAPluginNS diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp index 79151f34c3b..6605857612a 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp @@ -71,8 +71,8 @@ const std::vector> kernels2D = { {1, 3}, {1, 2}, {2, 2}, - // {7, 1}, TODO: fix accuracy failures, see issue 54140 - // {3, 3}, TODO: fix accuracy failures, see issue 54140 + {7, 1}, + {3, 3}, }; const std::vector> strides2D = { {1, 1},