[GNA] Improve CNN2D kernels/weights quantization (#5370)
* Introduce heuristic for weight reducer * handle kernels 3x3 and 7x1 to avoid saturation * enable tests * issue 54140
This commit is contained in:
parent
9248a5887d
commit
05b62258c7
@ -14,6 +14,7 @@
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "gna_upstream_iterator.hpp"
|
||||
#include "layers/gna_layer_info.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
#include "gna_slope_scale.h"
|
||||
#include "runtime/pwl.h"
|
||||
@ -1107,8 +1108,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
double weights_reducer = 1.0;
|
||||
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
|
||||
if (conv) {
|
||||
auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
|
||||
const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
|
||||
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
|
||||
weights_reducer = std::max(1.0, weights_reducer);
|
||||
}
|
||||
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "frontend/model_quantizer.hpp"
|
||||
#include "layers/layers_builder.hpp"
|
||||
#include "layers/gna_concat_layer.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
#include "layers/gna_crop_layer.hpp"
|
||||
#include "layers/gna_fake_quantize_layer.hpp"
|
||||
#include "round_float_define.hpp"
|
||||
@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
}
|
||||
|
||||
// Map 2d convolution to 1d if it's possible
|
||||
if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) {
|
||||
if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
|
||||
in_width *= in_height;
|
||||
in_height = 1;
|
||||
out_width *= out_height;
|
||||
@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
dnn->new_num_conv_columns = 0;
|
||||
}
|
||||
|
||||
// TODO: refine following condition
|
||||
if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
|
||||
(convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
|
||||
if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
|
||||
in_height != 1) {
|
||||
// TensorFlow default layout is NHWC
|
||||
// OpenVino Default layout is NCHW
|
||||
|
@ -0,0 +1,49 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "../gna_graph_tools.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
struct GNAConvolutionLayer {
|
||||
static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
|
||||
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
|
||||
}
|
||||
|
||||
// 3D input or 2D kernel
|
||||
static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth) {
|
||||
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
|
||||
}
|
||||
|
||||
static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
using KRT = std::pair<uint32_t, double>;
|
||||
// Empirically determined weights reducers for 2D Convolution
|
||||
// i.e.:
|
||||
// for kernelSize >= 9 -> 1.3
|
||||
// for kernelSize in {7, 8} -> 1.2
|
||||
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
|
||||
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
|
||||
const auto kernelSize = conv._kernel_x * conv._kernel_y;
|
||||
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
|
||||
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
|
||||
if (r != reducers.end())
|
||||
reducer = r->second;
|
||||
}
|
||||
return reducer;
|
||||
}
|
||||
};
|
||||
} // namespace GNAPluginNS
|
@ -71,8 +71,8 @@ const std::vector<std::vector<size_t >> kernels2D = {
|
||||
{1, 3},
|
||||
{1, 2},
|
||||
{2, 2},
|
||||
// {7, 1}, TODO: fix accuracy failures, see issue 54140
|
||||
// {3, 3}, TODO: fix accuracy failures, see issue 54140
|
||||
{7, 1},
|
||||
{3, 3},
|
||||
};
|
||||
const std::vector<std::vector<size_t >> strides2D = {
|
||||
{1, 1},
|
||||
|
Loading…
Reference in New Issue
Block a user