[LPT] int4 inference via 16 levels int8 (#5249)
This commit is contained in:
parent
5d86cce4eb
commit
5e0daae87e
@ -80,28 +80,17 @@ public:
|
||||
return -8.f;
|
||||
case element::i8:
|
||||
switch (levels) {
|
||||
case 16:
|
||||
return -8.f;
|
||||
case 255:
|
||||
return -127.f;
|
||||
case 256:
|
||||
default:
|
||||
return -128.f;
|
||||
}
|
||||
break;
|
||||
case element::i16:
|
||||
switch (levels) {
|
||||
case 65536:
|
||||
return -32768.f;
|
||||
case 65535:
|
||||
return -32767.f;
|
||||
}
|
||||
break;
|
||||
return levels == 65535 ? -32767.f : -32768.f;
|
||||
case element::i32:
|
||||
switch (levels) {
|
||||
case static_cast<size_t>(4294967296):
|
||||
return -2147483648.f;
|
||||
case 4294967295:
|
||||
return -2147483647.f;
|
||||
}
|
||||
break;
|
||||
return -2147483647.f; // -2147483647.f == -2147483648.f
|
||||
case element::f16:
|
||||
return -1.0e15f;
|
||||
case element::f32:
|
||||
@ -117,19 +106,29 @@ public:
|
||||
case element::u4:
|
||||
return 15.f;
|
||||
case element::u8:
|
||||
return 255.f;
|
||||
switch (levels) {
|
||||
case 16:
|
||||
return 15.f;
|
||||
default:
|
||||
return 255.f;
|
||||
}
|
||||
case element::u16:
|
||||
return 65535.f;
|
||||
case element::u32:
|
||||
return 4294967296.f;
|
||||
return 4294967296.f; // 4294967296.f == 4294967295.f
|
||||
case element::i4:
|
||||
return 7.f;
|
||||
case element::i8:
|
||||
return 127.f;
|
||||
switch (levels) {
|
||||
case 16:
|
||||
return 7.f;
|
||||
default:
|
||||
return 127.f;
|
||||
}
|
||||
case element::i16:
|
||||
return 32767.f;
|
||||
case element::i32:
|
||||
return 2147483647.f;
|
||||
return 2147483648.f; // 2147483648.f == 2147483647.f
|
||||
case element::f16:
|
||||
return 1.0e15f;
|
||||
case element::f32:
|
||||
@ -145,6 +144,10 @@ public:
|
||||
return 254.f;
|
||||
} else if (maxLevelsForPrecision == 256ul) {
|
||||
return 255.f;
|
||||
} else if (maxLevelsForPrecision == 16ul) {
|
||||
return 15.f;
|
||||
} else if (maxLevelsForPrecision == 15ul) {
|
||||
return 14.f;
|
||||
} else {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision;
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ FakeQuantizeDecompositionTransformation::FakeQuantizeDecompositionTransformation
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
|
@ -226,8 +226,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
hasNegative = true;
|
||||
|
||||
if (outputHighValues[i] != 0.f) {
|
||||
const float expectedRatio = (quantizationLevels == 256 || quantizationLevels == 65536 || quantizationLevels == 4294967296) ?
|
||||
asymmetricIntervalSideRatio : -1.f;
|
||||
const float expectedRatio =
|
||||
(quantizationLevels == 16 || quantizationLevels == 256 ||
|
||||
quantizationLevels == 65536 || quantizationLevels == 4294967296) ? asymmetricIntervalSideRatio : -1.f;
|
||||
const float actualRatio = outputLowValues[i] / outputHighValues[i];
|
||||
const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
|
||||
if (actual > quantizationIntervalAsymmetryThreshold) {
|
||||
@ -273,6 +274,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
switch (quantizationLevels) {
|
||||
case 256:
|
||||
case 255:
|
||||
case 16:
|
||||
resultPrecision = element::i8;
|
||||
break;
|
||||
case 65536:
|
||||
@ -290,6 +292,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
switch (quantizationLevels) {
|
||||
case 256:
|
||||
case 255:
|
||||
case 16:
|
||||
resultPrecision = element::u8;
|
||||
break;
|
||||
case 65536:
|
||||
|
@ -162,7 +162,7 @@ bool QuantizationDetails::empty() const noexcept {
|
||||
}
|
||||
|
||||
bool QuantizationDetails::isSupportedLevel(const size_t level) {
|
||||
static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
|
||||
static const std::unordered_set<size_t> supported_levels = { 16, 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
|
||||
return supported_levels.find(level) != supported_levels.end();
|
||||
}
|
||||
|
||||
|
@ -166,25 +166,26 @@ void VariantWrapper<IntervalsAlignmentAttributePtr>::merge(
|
||||
const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low);
|
||||
if (resultSize > size) {
|
||||
resultSharedValue->minInterval = sharedValue->minInterval;
|
||||
if (resultAttribute->levels != 0ul) {
|
||||
float dequantizationMul;
|
||||
float dequantizationSub;
|
||||
float updatedOutputLowValue;
|
||||
float updatedOutputHighValue;
|
||||
|
||||
float dequantizationMul;
|
||||
float dequantizationSub;
|
||||
float updatedOutputLowValue;
|
||||
float updatedOutputHighValue;
|
||||
const size_t minLevels = NetworkHelper::calculateLevels(
|
||||
0.f,
|
||||
DataPrecision::getMaxValue(resultAttribute->levels),
|
||||
resultSharedValue->combinedInterval.low,
|
||||
resultSharedValue->combinedInterval.high,
|
||||
resultSharedValue->minInterval.low,
|
||||
resultSharedValue->minInterval.high,
|
||||
dequantizationMul,
|
||||
dequantizationSub,
|
||||
updatedOutputLowValue,
|
||||
updatedOutputHighValue);
|
||||
|
||||
const size_t minLevels = NetworkHelper::calculateLevels(
|
||||
0.f,
|
||||
DataPrecision::getMaxValue(resultAttribute->levels),
|
||||
resultSharedValue->combinedInterval.low,
|
||||
resultSharedValue->combinedInterval.high,
|
||||
resultSharedValue->minInterval.low,
|
||||
resultSharedValue->minInterval.high,
|
||||
dequantizationMul,
|
||||
dequantizationSub,
|
||||
updatedOutputLowValue,
|
||||
updatedOutputHighValue);
|
||||
|
||||
resultSharedValue->minLevels = minLevels;
|
||||
resultSharedValue->minLevels = minLevels;
|
||||
}
|
||||
|
||||
#ifdef LPT_DEBUG
|
||||
resultSharedValue->minLevelsOperation = sharedValue->minLevelsOperation;
|
||||
|
@ -967,7 +967,7 @@ const std::vector<ConcatTransformationTestValues> testValues = {
|
||||
{ {element::f32}, {}, { 0.01f } },
|
||||
}
|
||||
},
|
||||
// unexpected quantization levels, concat
|
||||
// INT4+INT8 quantization levels, concat
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
false,
|
||||
@ -990,16 +990,16 @@ const std::vector<ConcatTransformationTestValues> testValues = {
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
},
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
},
|
||||
// unexpected quantization levels, concat multi channels
|
||||
// INT4+INT8 quantization levels, concat multi channels
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
true,
|
||||
1,
|
||||
{
|
||||
{ 16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f} },
|
||||
{ 16ul, {}, {0.f}, {1.5f}, {0.f}, {1.5f} },
|
||||
{},
|
||||
{},
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} },
|
||||
@ -1007,16 +1007,16 @@ const std::vector<ConcatTransformationTestValues> testValues = {
|
||||
{}
|
||||
},
|
||||
{
|
||||
{ 16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f} },
|
||||
{16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f}, ngraph::element::u8},
|
||||
{},
|
||||
{},
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} },
|
||||
{256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8},
|
||||
{},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::u8,
|
||||
{ ngraph::element::f32, {}, {{ 0.1f, 0.1f, 0.1f, 0.01f, 0.01f, 0.01f }} }
|
||||
},
|
||||
false,
|
||||
true,
|
||||
false
|
||||
}
|
||||
};
|
||||
|
@ -322,6 +322,28 @@ const std::vector<FakeQuantizeTransformationTestValues> fakeQuantizeTransformati
|
||||
// { ngraph::element::f16, { {ngraph::element::f16}, {}, { {0.01f, 0.1f, 1.f} }} }
|
||||
// }
|
||||
//},
|
||||
// u4 through u8
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
|
||||
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 15.f } },
|
||||
ngraph::element::u8,
|
||||
{
|
||||
{ ngraph::element::f32, { {ngraph::element::f32}, {}, { 0.1f }} },
|
||||
{ ngraph::element::f16, { {ngraph::element::f16}, {}, { 0.1f }} }
|
||||
}
|
||||
},
|
||||
// i4 through i8
|
||||
{
|
||||
LayerTransformation::createParamsI8I8(),
|
||||
{ 16ul, {}, { -0.8f }, { 0.7f }, { -0.8f }, { 0.7f } },
|
||||
{ 16ul, {}, { -0.8f }, { 0.7f }, { -8.f }, { 7.f } },
|
||||
ngraph::element::i8,
|
||||
{
|
||||
{ ngraph::element::f32, {{ngraph::element::f32}, { }, { 0.1f }} },
|
||||
{ ngraph::element::f16, {{ngraph::element::f16}, { }, { 0.1f }} }
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
|
@ -38,10 +38,20 @@ const std::vector<ConcatTransformationTestValues> testValues = {
|
||||
{ 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} }
|
||||
},
|
||||
// FQ with unexpected quantizationLevels
|
||||
{
|
||||
{ 14ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} },
|
||||
{ 14ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} }
|
||||
},
|
||||
// FQ with INT4 quantizationLevels
|
||||
{
|
||||
{ 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} },
|
||||
{ 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} }
|
||||
},
|
||||
// FQ with INT4+INT8 quantizationLevels
|
||||
{
|
||||
{ 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} },
|
||||
{ 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} }
|
||||
},
|
||||
};
|
||||
|
||||
const std::vector<ngraph::PartialShape> shapes = {
|
||||
|
@ -54,15 +54,15 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
{ 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
{ 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{ 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
@ -72,7 +72,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
{ 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
|
@ -46,14 +46,13 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
|
||||
{ 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
|
||||
"Pooling", "U8"
|
||||
},
|
||||
// INT4 FQ's are not transformed and inferred via FP32
|
||||
{
|
||||
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
|
||||
"Pooling", "FP32"
|
||||
"Pooling", "U8"
|
||||
},
|
||||
{
|
||||
{ 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
|
||||
"Pooling", "FP32"
|
||||
{ 16ul, {}, { -0.8f }, { 0.7f }, { -0.8f }, { 0.7f } },
|
||||
"Pooling", "I8"
|
||||
},
|
||||
// INT16, INT32 FQ's are transformed, but updatePrecision = false for inference on CPU Plugin and inferred via FP32
|
||||
{
|
||||
|
@ -48,11 +48,11 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
|
||||
},
|
||||
{
|
||||
{ 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
|
||||
"Pooling", "FP32"
|
||||
"Pooling", "U8"
|
||||
},
|
||||
{
|
||||
{ 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
|
||||
"Pooling", "FP32"
|
||||
"Pooling", "I8"
|
||||
},
|
||||
// nGraph: I8->FP32 Convert is not supported
|
||||
// { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
|
||||
|
Loading…
Reference in New Issue
Block a user