[GNA] Remove internal overload correction algorithm (#14428)

This commit is contained in:
Szymon Irzabek 2022-12-13 06:02:03 +01:00 committed by GitHub
parent 9cdea2aa73
commit 8ccabc546f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 105 additions and 88 deletions

View File

@ -24,6 +24,7 @@ constexpr float k_identity = 6;
constexpr double pow_domain = 16;
constexpr float min_search_weights_val = 1.0f;
constexpr float max_search_weights_val = 1024.0f;
constexpr double initial_weights_reducer_val = 1.0;
float GetScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
IE_ASSERT(layer != nullptr);
@ -207,10 +208,9 @@ std::vector<float> ScaleFactorCalculator::generateScaleFactors(float startRange,
double ScaleFactorCalculator::calculateWeightsReducerFromDstStats(QuantizationParams dst_quant) {
auto maxAbsVal = std::max(std::abs(dst_quant.GetMinValues().front()),
std::abs(dst_quant.GetMaxValues().front()));
auto maxIntVal = static_cast<int64_t>(maxAbsVal * dst_quant.GetScale() + 0.5f);
double weightsReducer = static_cast<double>(maxIntVal) / std::numeric_limits<int32_t>::max();
weightsReducer = std::max(1.0, weightsReducer);
weightsReducer = std::max(initial_weights_reducer_val, weightsReducer);
return weightsReducer;
}
@ -894,16 +894,15 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
return true;
}
// we unable to rescale the input - results might be bad
// Unable to rescale the input - results might be bad
log::warning() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
}
if (!quantData->_dst_quant.IsStatsSet()) {
return true;
}
auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
if (!common::fp32eq(weightsReducer, 1.0f)) {
if (weightsReducer > initial_weights_reducer_val) {
float newOutputScale = quantParams1->_dst_quant.GetScale() / weightsReducer;
if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
return true;
@ -912,8 +911,10 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
}
}
break;
default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
}
return true;
}
@ -1277,68 +1278,34 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
}
}
auto weightsReducer = calculateWeightsReducerFromDstStats(quant->_dst_quant);
if (!common::fp32eq(weightsReducer, 1.0f)) {
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weightsReducer);
if (calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
log::warning() << "Potential overload correction issue at layer " << wl->name;
}
if (common::fp32eq(quant->_weights_quant.GetScale(), 0.0f) || std::isinf(quant->_weights_quant.GetScale())) {
quant->_weights_quant.SetScale(1.0f);
}
quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
}
return true;
}
bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gl,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const Config& gna_config) const {
if ( !gemmLayer ) {
if (!gl) {
THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n";
}
auto in0 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 0);
auto in1 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 1);
auto in0 = InferenceEngine::CNNNetPrevLayer(gl, 0);
auto in1 = InferenceEngine::CNNNetPrevLayer(gl, 1);
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gemmLayer);
auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gl);
auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
quantData->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
quantData->_dst_quant.SetScale(
quantData->_src_quant.GetScale() * quantData->_weights_quant.GetScale());
quant->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
quant->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
quant->_dst_quant.SetScale(quant->_src_quant.GetScale() * quant->_weights_quant.GetScale());
if (!quantData->_dst_quant.IsStatsSet()) {
return true;
}
// Adjust weights scale factor if output values exceed int32 maximum value
auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
if (LayerInfo(in0).isConst()) {
if (!common::fp32eq(weightsReducer, 1.0f)) {
quantParams0->_dst_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
quantData->_src_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
}
if (common::fp32eq(quantData->_src_quant.GetScale(), 0.0f) || std::isinf(quantData->_src_quant.GetScale())) {
quantParams0->_dst_quant.SetScale(1.0f);
quantData->_src_quant.SetScale(1.0f);
}
quantData->_dst_quant.SetScale(quantData->_weights_quant.GetScale() * quantData->_src_quant.GetScale());
} else {
if (!common::fp32eq(weightsReducer, 1.0f)) {
for (int i = 0; i < 2; ++i) {
auto input = InferenceEngine::CNNNetPrevLayer(gemmLayer, i);
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(input);
float newOutputScale = quantParams->_dst_quant.GetScale() / weightsReducer;
if (requantizeInput(input, newOutputScale, result, infiniteLoopCount)) {
return true;
}
}
THROW_GNA_EXCEPTION << "Unable to quantize " << gemmLayer->name;
}
if (quant->_dst_quant.IsStatsSet() &&
calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
log::warning() << "Potential overload correction issue at layer " << gl->name;
}
return true;

View File

@ -8,6 +8,8 @@
#include <vector>
#include <string>
#include <gmock/gmock.h>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
@ -32,7 +34,7 @@ typedef std::tuple<
std::map<std::string, std::string>, // Configuration
std::vector<size_t>, // Input Shape
std::pair<float, float> // Input Min and Max
> convertMatmulToPointwiseConvWithFqParams;
> ConvertMatmulToPointwiseConvWithFqNegParams;
namespace LayerTestsDefinitions {
@ -96,14 +98,14 @@ protected:
}
};
class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface<convertMatmulToPointwiseConvWithFqParams>,
class ConvertMatmulToPointwiseConvWithFqNeg : public testing::WithParamInterface<ConvertMatmulToPointwiseConvWithFqNegParams>,
public LayerTestsUtils::LayerTestsCommon {
float inputDataMin = -10.0f;
float inputDataMax = 10.0f;
float inputDataResolution = 1.0f;
public:
static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvWithFqParams> obj) {
static std::string getTestCaseName(testing::TestParamInfo<ConvertMatmulToPointwiseConvWithFqNegParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
@ -184,8 +186,17 @@ TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) {
Run();
};
TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) {
Run();
TEST_P(ConvertMatmulToPointwiseConvWithFqNeg, CompareWithRefImpl) {
std::stringstream what;
std::streambuf* sbuf = std::cout.rdbuf();
std::streambuf* ebuf = std::cerr.rdbuf();
std::cout.rdbuf(what.rdbuf());
std::cerr.rdbuf(what.rdbuf());
LoadNetwork();
const auto expected = "Potential overload correction issue at layer ";
EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
std::cout.rdbuf(sbuf);
std::cerr.rdbuf(ebuf);
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
@ -194,8 +205,15 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
}
};
const std::vector<std::map<std::string, std::string>> configs_neg = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"LOG_LEVEL", "LOG_WARNING"}
}
};
@ -217,13 +235,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulTo
::testing::ValuesIn(inputShape)),
ConvertMatmulToPointwiseConv::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq,
INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFqNeg,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(configs_neg),
::testing::ValuesIn(inputShape),
::testing::ValuesIn(fqStats)),
ConvertMatmulToPointwiseConvWithFq::getTestCaseName);
ConvertMatmulToPointwiseConvWithFqNeg::getTestCaseName);
} // namespace LayerTestsDefinitions

View File

@ -72,8 +72,8 @@ protected:
auto weights = ngraph::builder::makeConstant<float>(ngPrc, {outChannels, inputShape[1], 1, kernelSize},
CommonTestUtils::generate_float_numbers(outChannels * inputShape[1] * kernelSize,
weightsMinMax.first, weightsMinMax.second));
auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first });
auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second });
auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first * 2 });
auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second * 2 });
auto weightsFQ = std::make_shared<ngraph::opset7::FakeQuantize>(weights,
weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, levels);

View File

@ -96,8 +96,8 @@ protected:
inputLowNode1, inputHighNode1, inputLowNode1, inputHighNode1, levels);
auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1});
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1});
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1 * 35});
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1 * 35});
auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
convLowNode, convHighNode, convLowNode, convHighNode, levels);
auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
@ -148,7 +148,7 @@ const std::vector<std::map<std::string, std::string>> configs = {
const std::vector<std::vector<size_t>> inputShape = {
{1, 1, 1, 1024},
{1, 8, 1, 168},
{1, 8, 1, 168}
};
const std::vector<std::pair<float, float>> inputMinMax = {
@ -156,11 +156,11 @@ const std::vector<std::pair<float, float>> inputMinMax = {
{-2, 2},
{-8, 8},
{-5, 5},
{-17.5, 17.5},
{-17.5, 17.5}
};
const std::vector<size_t> levels = {
65535,
65535
};
INSTANTIATE_TEST_SUITE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,

View File

@ -10,6 +10,8 @@
#include <ie_core.hpp>
#include <gmock/gmock.h>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
@ -30,7 +32,7 @@ typedef std::tuple<
namespace LayerTestsDefinitions {
class MatMulOverloadCorrectionTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
class MatMulOverloadCorrectionNegTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<matmulOverloadCorrectionParams> obj) {
@ -111,8 +113,17 @@ protected:
const size_t levels32 = std::numeric_limits<uint32_t>::max();
};
TEST_P(MatMulOverloadCorrectionTest, CompareWithRefImpl) {
Run();
TEST_P(MatMulOverloadCorrectionNegTest, CompareWithRefImpl) {
std::stringstream what;
std::streambuf* sbuf = std::cout.rdbuf();
std::streambuf* ebuf = std::cerr.rdbuf();
std::cout.rdbuf(what.rdbuf());
std::cerr.rdbuf(what.rdbuf());
LoadNetwork();
const auto expected = "Potential overload correction issue at layer ";
EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
std::cout.rdbuf(sbuf);
std::cerr.rdbuf(ebuf);
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
@ -122,7 +133,8 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"LOG_LEVEL", "LOG_WARNING"}
}
};
@ -131,13 +143,13 @@ const std::vector<std::vector<size_t>> inputShapes = {
{1, 256}
};
INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionTest,
INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionNegTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(inputShapes),
::testing::ValuesIn({true, false}),
::testing::ValuesIn({true}),
::testing::ValuesIn({true, false})),
MatMulOverloadCorrectionTest::getTestCaseName);
MatMulOverloadCorrectionNegTest::getTestCaseName);
} // namespace LayerTestsDefinitions

View File

@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
@ -29,9 +30,12 @@ const size_t levels = 65535;
const std::vector<std::vector<float>> inputParams = {{-10, 10, 1}};
const float convFQValue = 2.0f;
const auto fqParams = ::testing::Combine(
::testing::Values(levels),
::testing::ValuesIn(inputParams)
::testing::ValuesIn(inputParams),
::testing::Values(convFQValue)
);
const std::vector<std::vector<size_t>> kernels = {{1, 3}};

View File

@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
};
const std::vector<std::map<std::string, std::string>> configs = {
@ -29,9 +30,12 @@ const size_t levels = 65535;
const std::vector<std::vector<float>> inputParams = {{-100, 100, 1}};
const float convFQValue = 2.0f;
const auto fqParams = ::testing::Combine(
::testing::Values(levels),
::testing::ValuesIn(inputParams)
::testing::ValuesIn(inputParams),
::testing::Values(convFQValue)
);
const std::vector<std::vector<size_t>> kernels = {{1, 3}};

View File

@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {
typedef std::tuple<
size_t, // levels
std::vector<float> // input generator data: low, high, resolution
std::vector<float>, // input generator data: low, high, resolution
float // convolution weights' FQ min and max value
> FqSpecificParams;
typedef std::tuple<

View File

@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {
typedef std::tuple<
size_t, // levels
std::vector<float> // input generator data: low, high, resolution
std::vector<float>, // input generator data: low, high, resolution
float // convolution weights' FQ min and max value
> FqSpecificParams;
typedef std::tuple<

View File

@ -17,7 +17,8 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv
size_t levels;
std::vector<float> inputArg;
std::tie(levels, inputArg) = fqParams;
float convFQValue;
std::tie(levels, inputArg, convFQValue) = fqParams;
std::vector<size_t> kernelShape;
std::vector<size_t> strides;
@ -36,6 +37,7 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv
if (inputArg.size() == 3) {
result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
}
result << "_convFQ=" << convFQValue;
result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
result << "IC=" << inputChannels << "_";
@ -54,7 +56,8 @@ void ConvFqEltwiseTest::SetUp() {
size_t levels;
std::vector<float> inputArg;
std::tie(levels, inputArg) = fqParams;
float convFQValue;
std::tie(levels, inputArg, convFQValue) = fqParams;
if (inputArg.size() == 3) {
inputDataMin = inputArg[0];
inputDataMax = inputArg[1];
@ -80,8 +83,10 @@ void ConvFqEltwiseTest::SetUp() {
float weightVal = 0.2;
auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
{ weightVal });
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
auto convLowNode =
ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
auto convHighNode =
ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
convLowNode, convHighNode, convLowNode, convHighNode, levels);
auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);

View File

@ -17,7 +17,8 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR
size_t levels;
std::vector<float> inputArg;
std::tie(levels, inputArg) = fqParams;
float convFQValue;
std::tie(levels, inputArg, convFQValue) = fqParams;
std::vector<size_t> kernelShape;
std::vector<size_t> strides;
@ -36,6 +37,7 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR
if (inputArg.size() == 3) {
result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
}
result << "_convFQ=" << convFQValue;
result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
result << "IC=" << inputChannels << "_";
@ -54,7 +56,8 @@ void ConvFqReluTest::SetUp() {
size_t levels;
std::vector<float> inputArg;
std::tie(levels, inputArg) = fqParams;
float convFQValue;
std::tie(levels, inputArg, convFQValue) = fqParams;
if (inputArg.size() == 3) {
inputDataMin = inputArg[0];
inputDataMax = inputArg[1];
@ -80,8 +83,10 @@ void ConvFqReluTest::SetUp() {
float weightVal = 0.2;
auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
{ weightVal });
auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
auto convLowNode =
ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
auto convHighNode =
ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
convLowNode, convHighNode, convLowNode, convHighNode, levels);
auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);