[GNA] Improve CNN2D kernels/weights quantization (#5370)

* Introduce heuristic for weight reducer
     * handle kernels 3x3 and 7x1 to avoid saturation
     * enable tests
     * issue 54140
This commit is contained in:
Krzysztof Bruniecki 2021-05-13 15:31:23 +02:00 committed by GitHub
parent 9248a5887d
commit 05b62258c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 58 additions and 8 deletions

View File

@ -14,6 +14,7 @@
#include <legacy/ie_layers.h>
#include "gna_upstream_iterator.hpp"
#include "layers/gna_layer_info.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "gna_plugin_log.hpp"
#include "gna_slope_scale.h"
#include "runtime/pwl.h"
@ -1107,8 +1108,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
double weights_reducer = 1.0;
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv) {
auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer);
}
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);

View File

@ -30,6 +30,7 @@
#include "frontend/model_quantizer.hpp"
#include "layers/layers_builder.hpp"
#include "layers/gna_concat_layer.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "layers/gna_crop_layer.hpp"
#include "layers/gna_fake_quantize_layer.hpp"
#include "round_float_define.hpp"
@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
}
// Map 2d convolution to 1d if it's possible
if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) {
if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
in_width *= in_height;
in_height = 1;
out_width *= out_height;
@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
dnn->new_num_conv_columns = 0;
}
// TODO: refine following condition
if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
(convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
in_height != 1) {
// TensorFlow default layout is NHWC
// OpenVino Default layout is NCHW

View File

@ -0,0 +1,49 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <algorithm>
#include <cmath>
#include <utility>
#include <vector>
#include <legacy/ie_layers.h>
#include "../gna_graph_tools.hpp"
namespace GNAPluginNS {
struct GNAConvolutionLayer {
static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
}
// 3D input or 2D kernel
static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
}
static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
using KRT = std::pair<uint32_t, double>;
// Empirically determined weights reducers for 2D Convolution
// i.e.:
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
auto reducer = 1.0;
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
if (r != reducers.end())
reducer = r->second;
}
return reducer;
}
};
} // namespace GNAPluginNS

View File

@ -71,8 +71,8 @@ const std::vector<std::vector<size_t >> kernels2D = {
{1, 3},
{1, 2},
{2, 2},
// {7, 1}, TODO: fix accuracy failures, see issue 54140
// {3, 3}, TODO: fix accuracy failures, see issue 54140
{7, 1},
{3, 3},
};
const std::vector<std::vector<size_t >> strides2D = {
{1, 1},