[LPT] Unexpected quantisation level fix (#18888)

* [LPT] Unexpected quantisation level support

* [LPT] Unexpected quantisation level support - extension for weights

* [LPT] Unexpected quantisation level support - extension for weights + tests

* refactoring: compilation fix
This commit is contained in:
Edward Shogulin 2023-08-04 15:02:12 +01:00 committed by GitHub
parent 74c778e7ee
commit b76fc24824
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 192 additions and 21 deletions

View File

@ -90,6 +90,25 @@ public:
return lowPrecision.find(precision) != lowPrecision.end();
}
static bool check(const element::Type precision, const size_t levels) {
switch (precision) {
case element::i4:
case element::u4:
return (levels == low_precision::levels::int4) || (levels == low_precision::levels::int4_narrow_range);
case element::i8:
case element::u8:
return (levels == low_precision::levels::int8) || (levels == low_precision::levels::int8_narrow_range);
case element::i16:
case element::u16:
return (levels == low_precision::levels::int16) || (levels == low_precision::levels::int16_narrow_range);
case element::i32:
case element::u32:
return (levels == low_precision::levels::int32) || (levels == low_precision::levels::int32_narrow_range);
default:
return false;
}
}
static float getMinValue(const element::Type precision, const size_t levels) {
switch (precision) {
case element::u4:

View File

@ -134,6 +134,9 @@ DataPrecision getDataPrecisionByOutputPort(std::shared_ptr<opset1::FakeQuantize>
precisionsForLevels = {element::u8, element::i8};
}
const auto resultPrecisions = NetworkHelper::precisionIntersection(precisions, precisionsForLevels);
if (resultPrecisions.empty()) {
return DataPrecision();
}
ngraph::element::Type precision;
bool hasZeroPoint;
@ -315,11 +318,16 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c
return rewritten;
}
// check if level is supported in LPT
if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) {
return rewritten;
}
// check if level is supported in plugin
DataPrecision dataPrecision = fq_decomposition::getDataPrecisionByOutputPort(layer);
if (dataPrecision.empty()) {
return rewritten;
}
PrecisionsAttribute precisionsAttribute(defaultPrecisions);
{

View File

@ -338,6 +338,10 @@ DataPrecision LayerTransformation::getDataPrecision(
precisionDetailsAtOutputIntervals.precision :
*requiredPrecisions.begin();
if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
return DataPrecision();
}
return DataPrecision(
resultPrecision,
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
@ -348,6 +352,11 @@ DataPrecision LayerTransformation::getDataPrecision(
// FakeQuantize optimal precision is not deined
if (!requiredPrecisions.empty()) {
const element::Type resultPrecision = *requiredPrecisions.begin();
if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
return DataPrecision();
}
return DataPrecision(
resultPrecision,
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),

View File

@ -108,6 +108,10 @@ bool RecurrentCellTransformation::transform(TransformationContext& context, ngra
? defaultPrecisions
: precisionsAttribute.as<PrecisionsAttribute>().value();
const DataPrecision dataPrecision = getDataPrecision(lstm_parent, quantizationDetails, precisions);
if (dataPrecision.empty()) {
return false;
}
auto QDQ = NetworkHelper::decomposeFakeQuantize(fq_node,
dataPrecision.precision,
dataPrecision.min,

View File

@ -70,7 +70,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
const auto dataPrecision = getDataPrecisionOnWeights(layer, defaultPrecisions);
if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
if ((dataPrecision.empty()) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
return false;
}
} else {

View File

@ -0,0 +1,59 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <gtest/gtest.h>
#include <ie_blob.h>
#include <low_precision/layer_transformation.hpp>
#include <low_precision/network_helper.hpp>
#include "ngraph_functions/builders.hpp"
using namespace ngraph;
TEST(smoke_LPT_DataPrecision, check) {
using namespace ngraph::pass::low_precision;
const std::vector<element::Type> type_items = {
element::i4,
element::u4,
element::i8,
element::u8,
element::i16,
element::u16,
element::i32,
element::u32
};
const std::vector<levels> level_items = {
int4,
int4_narrow_range,
int8,
int8_narrow_range,
int16,
int16_narrow_range,
int32,
int32_narrow_range
};
const std::map<element::Type, std::set<levels>> items = {
{element::i4, {levels::int4, levels::int4_narrow_range}},
{element::u4, {levels::int4, levels::int4_narrow_range}},
{element::i8, {levels::int8, levels::int8_narrow_range}},
{element::u8, {levels::int8, levels::int8_narrow_range}},
{element::i16, {levels::int16, levels::int16_narrow_range}},
{element::u16, {levels::int16, levels::int16_narrow_range}},
{element::i32, {levels::int32, levels::int32_narrow_range}},
{element::u32, {levels::int32, levels::int32_narrow_range}},
};
for (const auto type_item : type_items) {
for (const auto level_item : level_items) {
const auto& levels = items.find(type_item)->second;
if (levels.find(level_item) == levels.end()) {
ASSERT_FALSE(DataPrecision::check(type_item, level_item));
} else {
ASSERT_TRUE(DataPrecision::check(type_item, level_item));
}
}
}
}

View File

@ -11,7 +11,7 @@
using namespace ngraph;
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_U8_to_U8) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
@ -27,7 +27,19 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_65535_to_U8) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
const auto fakeQuantize = std::make_shared<ov::op::v0::FakeQuantize>(input, low, high, low, high, 65535);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
ASSERT_TRUE(precisionDetails.empty());
}
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_I8_to_I8) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
@ -44,7 +56,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8_to_U8zp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
@ -60,7 +72,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8_to_I8zp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
@ -76,7 +88,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8zp_to_U8zp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -92,7 +104,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8zp_to_I8zp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -108,7 +120,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_I8zp_to_undefzp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -124,7 +136,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
ASSERT_EQ(true, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) {
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_U8zp_to_undefzp) {
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});

View File

@ -116,6 +116,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
"Convolution",
"U8"
},
// not supported quantization level on data
{
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
false,
{ 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
},
// not supported quantization level on data & weights
{
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
false,
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
},
// not supported quantization level on weights
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
false,
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
}
};
const std::vector<ngraph::Shape> shapes = {

View File

@ -84,6 +84,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
"Convolution",
"U8"
},
// not supported quantization level on data
{
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
},
// not supported quantization level on data & weights
{
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
false,
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
},
// not supported quantization level on weights
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
false,
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
false,
"Convolution",
"FP32"
}
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, ConvolutionTransformation,

View File

@ -29,10 +29,10 @@ std::string ConvolutionTransformation::getTestCaseName(const testing::TestParamI
std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
std::ostringstream result;
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
inputShape.rank().get_length() << "D_" <<
param.fakeQuantizeOnData << "_" <<
param.fakeQuantizeOnWeights;
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
"_rank=" << inputShape.rank().get_length() <<
"D_fq_on_data={" << param.fakeQuantizeOnData <<
"}_fq_on_weights={" << param.fakeQuantizeOnWeights << "}";
return result.str();
}

View File

@ -58,9 +58,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnData& dat
if (data.empty()) {
return out << "{}";
}
return out << "_" << data.quantizationLevel << data.constantShape << "_" << data.inputLowValues << "_" << data.inputHighValues <<
"_" << data.outputLowValues << "_" << data.outputHighValues << "_" <<
(data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
return out << "level=" << data.quantizationLevel <<
"_shape=" << data.constantShape <<
"_input_low=" << data.inputLowValues <<
"_input_high=" << data.inputHighValues <<
"_output_low=" << data.outputLowValues <<
"_output_high" << data.outputHighValues <<
"_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
}
class FakeQuantizeOnDataWithConstant {
@ -96,11 +100,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnDataWithC
if (data.empty()) {
return out << "{}";
}
return out << "_" << data.quantizationLevel <<
(data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) << "_" <<
data.inputLowValues << "_" << data.inputHighValues << "_" <<
data.outputLowValues << "_" << data.outputHighValues << "_" <<
(data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
return out << "level=" << data.quantizationLevel <<
"_shape=" <<(data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) <<
"_input_low=" << data.inputLowValues <<
"_input_high=" << data.inputHighValues <<
"_output_low=" << data.outputLowValues <<
"_output_high=" << data.outputHighValues <<
"_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
}
} // namespace subgraph