[LPT] Zero point insertion in case of zero value on FQ output high (#5467)

* [LPT] Zero point insertion in case of zero value on FQ output high

* [LPT] Change precision in test on the real default precision[0]
This commit is contained in:
Vladimir Zinoviev 2021-05-06 21:48:36 +03:00 committed by GitHub
parent a8289b58c4
commit 8645c08396
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 35 deletions

View File

@ -221,18 +221,20 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
bool hasZeroPoint = false; bool hasZeroPoint = false;
for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) { for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) {
const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]); const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]);
const bool boundaryValuesAreNotZero = const bool outputLowValueIsNotZero = std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold;
(std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold) && if (signedInterval && outputLowValueIsNotZero) {
(std::fabs(quantizationDetails.outputHighValues[i]) >= zeroThreshold);
if (signedInterval && boundaryValuesAreNotZero) {
// signed // signed
unsignedPrecision = false; unsignedPrecision = false;
hasNegative = true; hasNegative = true;
const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f; if (quantizationDetails.outputHighValues[i] != 0.f) {
const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i]; const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f;
const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio)); const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i];
if (actual > quantizationIntervalAsymmetryThreshold) { const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
if (actual > quantizationIntervalAsymmetryThreshold) {
hasZeroPoint = true;
}
} else {
hasZeroPoint = true; hasZeroPoint = true;
} }
#ifdef LPT_PRINT_DEQUANTIZATION_INFO #ifdef LPT_PRINT_DEQUANTIZATION_INFO
@ -244,8 +246,8 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
} else { } else {
// unsigned // unsigned
signedPrecision = false; signedPrecision = false;
if (boundaryValuesAreNotZero) { if (outputLowValueIsNotZero) {
hasZeroPoint = boundaryValuesAreNotZero; hasZeroPoint = outputLowValueIsNotZero;
} }
#ifdef LPT_PRINT_DEQUANTIZATION_INFO #ifdef LPT_PRINT_DEQUANTIZATION_INFO

View File

@ -42,6 +42,10 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
{ 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } }, { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
"Pooling", "U8" "Pooling", "U8"
}, },
{
{ 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
"Pooling", "U8"
},
{ {
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
"Pooling", "FP32" "Pooling", "FP32"
@ -50,7 +54,7 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
{ 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
"Pooling", "FP32" "Pooling", "FP32"
}, },
// nGraph: I8->FP32 Convert is not supported // nGraph: I8->FP32 Convert is not supported
// { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } }, // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
// { 256ul, { 1ul }, { -1.28f} , { 1.27f } } // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }
}; };

View File

@ -26,30 +26,34 @@ const std::vector<LayerTransformation::Params> trasformationParamValues = {
}; };
const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = { const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
{ {
{256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
"Pooling", "U8" "Pooling", "U8"
}, },
{ {
{ 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
"Pooling", "U8" "Pooling", "U8"
}, },
{ {
{ 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } }, { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } },
"Pooling", "I8" "Pooling", "I8"
}, },
{ {
{ 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } }, { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
"Pooling", "U8" "Pooling", "U8"
}, },
{ {
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, { 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
"Pooling", "FP32" "Pooling", "U8"
}, },
{ {
{ 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
"Pooling", "FP32" "Pooling", "FP32"
}, },
{
{ 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
"Pooling", "FP32"
},
// nGraph: I8->FP32 Convert is not supported // nGraph: I8->FP32 Convert is not supported
// { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } }, // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
// { 256ul, { 1ul }, { -1.28f} , { 1.27f } } // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }