[LPT] Unexpected quantisation level fix (#18888)
* [LPT] Unexpected quantisation level support * [LPT] Unexpected quantisation level support - extension for weights * [LPT] Unexpected quantisation level support - extension for weights + tests * refactoring: compilation fix
This commit is contained in:
parent
74c778e7ee
commit
b76fc24824
@ -90,6 +90,25 @@ public:
|
||||
return lowPrecision.find(precision) != lowPrecision.end();
|
||||
}
|
||||
|
||||
static bool check(const element::Type precision, const size_t levels) {
|
||||
switch (precision) {
|
||||
case element::i4:
|
||||
case element::u4:
|
||||
return (levels == low_precision::levels::int4) || (levels == low_precision::levels::int4_narrow_range);
|
||||
case element::i8:
|
||||
case element::u8:
|
||||
return (levels == low_precision::levels::int8) || (levels == low_precision::levels::int8_narrow_range);
|
||||
case element::i16:
|
||||
case element::u16:
|
||||
return (levels == low_precision::levels::int16) || (levels == low_precision::levels::int16_narrow_range);
|
||||
case element::i32:
|
||||
case element::u32:
|
||||
return (levels == low_precision::levels::int32) || (levels == low_precision::levels::int32_narrow_range);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static float getMinValue(const element::Type precision, const size_t levels) {
|
||||
switch (precision) {
|
||||
case element::u4:
|
||||
|
@ -134,6 +134,9 @@ DataPrecision getDataPrecisionByOutputPort(std::shared_ptr<opset1::FakeQuantize>
|
||||
precisionsForLevels = {element::u8, element::i8};
|
||||
}
|
||||
const auto resultPrecisions = NetworkHelper::precisionIntersection(precisions, precisionsForLevels);
|
||||
if (resultPrecisions.empty()) {
|
||||
return DataPrecision();
|
||||
}
|
||||
|
||||
ngraph::element::Type precision;
|
||||
bool hasZeroPoint;
|
||||
@ -315,11 +318,16 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
// check if level is supported in LPT
|
||||
if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) {
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
// check if level is supported in plugin
|
||||
DataPrecision dataPrecision = fq_decomposition::getDataPrecisionByOutputPort(layer);
|
||||
if (dataPrecision.empty()) {
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
PrecisionsAttribute precisionsAttribute(defaultPrecisions);
|
||||
{
|
||||
|
@ -338,6 +338,10 @@ DataPrecision LayerTransformation::getDataPrecision(
|
||||
precisionDetailsAtOutputIntervals.precision :
|
||||
*requiredPrecisions.begin();
|
||||
|
||||
if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
|
||||
return DataPrecision();
|
||||
}
|
||||
|
||||
return DataPrecision(
|
||||
resultPrecision,
|
||||
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
|
||||
@ -348,6 +352,11 @@ DataPrecision LayerTransformation::getDataPrecision(
|
||||
// FakeQuantize optimal precision is not deined
|
||||
if (!requiredPrecisions.empty()) {
|
||||
const element::Type resultPrecision = *requiredPrecisions.begin();
|
||||
|
||||
if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
|
||||
return DataPrecision();
|
||||
}
|
||||
|
||||
return DataPrecision(
|
||||
resultPrecision,
|
||||
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
|
||||
|
@ -108,6 +108,10 @@ bool RecurrentCellTransformation::transform(TransformationContext& context, ngra
|
||||
? defaultPrecisions
|
||||
: precisionsAttribute.as<PrecisionsAttribute>().value();
|
||||
const DataPrecision dataPrecision = getDataPrecision(lstm_parent, quantizationDetails, precisions);
|
||||
if (dataPrecision.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto QDQ = NetworkHelper::decomposeFakeQuantize(fq_node,
|
||||
dataPrecision.precision,
|
||||
dataPrecision.min,
|
||||
|
@ -70,7 +70,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
||||
if (dequantization.empty()) {
|
||||
const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
|
||||
const auto dataPrecision = getDataPrecisionOnWeights(layer, defaultPrecisions);
|
||||
if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
|
||||
if ((dataPrecision.empty()) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -0,0 +1,59 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <memory>
|
||||
#include <gtest/gtest.h>
|
||||
#include <ie_blob.h>
|
||||
#include <low_precision/layer_transformation.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
TEST(smoke_LPT_DataPrecision, check) {
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
const std::vector<element::Type> type_items = {
|
||||
element::i4,
|
||||
element::u4,
|
||||
element::i8,
|
||||
element::u8,
|
||||
element::i16,
|
||||
element::u16,
|
||||
element::i32,
|
||||
element::u32
|
||||
};
|
||||
|
||||
const std::vector<levels> level_items = {
|
||||
int4,
|
||||
int4_narrow_range,
|
||||
int8,
|
||||
int8_narrow_range,
|
||||
int16,
|
||||
int16_narrow_range,
|
||||
int32,
|
||||
int32_narrow_range
|
||||
};
|
||||
|
||||
const std::map<element::Type, std::set<levels>> items = {
|
||||
{element::i4, {levels::int4, levels::int4_narrow_range}},
|
||||
{element::u4, {levels::int4, levels::int4_narrow_range}},
|
||||
{element::i8, {levels::int8, levels::int8_narrow_range}},
|
||||
{element::u8, {levels::int8, levels::int8_narrow_range}},
|
||||
{element::i16, {levels::int16, levels::int16_narrow_range}},
|
||||
{element::u16, {levels::int16, levels::int16_narrow_range}},
|
||||
{element::i32, {levels::int32, levels::int32_narrow_range}},
|
||||
{element::u32, {levels::int32, levels::int32_narrow_range}},
|
||||
};
|
||||
for (const auto type_item : type_items) {
|
||||
for (const auto level_item : level_items) {
|
||||
const auto& levels = items.find(type_item)->second;
|
||||
if (levels.find(level_item) == levels.end()) {
|
||||
ASSERT_FALSE(DataPrecision::check(type_item, level_item));
|
||||
} else {
|
||||
ASSERT_TRUE(DataPrecision::check(type_item, level_item));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -11,7 +11,7 @@
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_U8_to_U8) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
|
||||
@ -27,7 +27,19 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_65535_to_U8) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
|
||||
const auto fakeQuantize = std::make_shared<ov::op::v0::FakeQuantize>(input, low, high, low, high, 65535);
|
||||
|
||||
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||
|
||||
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
|
||||
ASSERT_TRUE(precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_I8_to_I8) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
|
||||
@ -44,7 +56,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8_to_U8zp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
|
||||
@ -60,7 +72,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8_to_I8zp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
|
||||
@ -76,7 +88,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8zp_to_U8zp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||
@ -92,7 +104,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8zp_to_I8zp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||
@ -108,7 +120,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
|
||||
ASSERT_EQ(false, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_I8zp_to_undefzp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||
@ -124,7 +136,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
|
||||
ASSERT_EQ(true, precisionDetails.empty());
|
||||
}
|
||||
|
||||
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) {
|
||||
TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_U8zp_to_undefzp) {
|
||||
const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||
const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
|
||||
const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
@ -116,6 +116,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
// not supported quantization level on data
|
||||
{
|
||||
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
// not supported quantization level on data & weights
|
||||
{
|
||||
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
// not supported quantization level on weights
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> shapes = {
|
||||
|
@ -84,6 +84,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
// not supported quantization level on data
|
||||
{
|
||||
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
|
||||
false,
|
||||
{ 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
// not supported quantization level on data & weights
|
||||
{
|
||||
{ 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
// not supported quantization level on weights
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
|
||||
false,
|
||||
"Convolution",
|
||||
"FP32"
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LPT, ConvolutionTransformation,
|
||||
|
@ -29,10 +29,10 @@ std::string ConvolutionTransformation::getTestCaseName(const testing::TestParamI
|
||||
std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
|
||||
inputShape.rank().get_length() << "D_" <<
|
||||
param.fakeQuantizeOnData << "_" <<
|
||||
param.fakeQuantizeOnWeights;
|
||||
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
|
||||
"_rank=" << inputShape.rank().get_length() <<
|
||||
"D_fq_on_data={" << param.fakeQuantizeOnData <<
|
||||
"}_fq_on_weights={" << param.fakeQuantizeOnWeights << "}";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
|
@ -58,9 +58,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnData& dat
|
||||
if (data.empty()) {
|
||||
return out << "{}";
|
||||
}
|
||||
return out << "_" << data.quantizationLevel << data.constantShape << "_" << data.inputLowValues << "_" << data.inputHighValues <<
|
||||
"_" << data.outputLowValues << "_" << data.outputHighValues << "_" <<
|
||||
(data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
|
||||
return out << "level=" << data.quantizationLevel <<
|
||||
"_shape=" << data.constantShape <<
|
||||
"_input_low=" << data.inputLowValues <<
|
||||
"_input_high=" << data.inputHighValues <<
|
||||
"_output_low=" << data.outputLowValues <<
|
||||
"_output_high" << data.outputHighValues <<
|
||||
"_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
|
||||
}
|
||||
|
||||
class FakeQuantizeOnDataWithConstant {
|
||||
@ -96,11 +100,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnDataWithC
|
||||
if (data.empty()) {
|
||||
return out << "{}";
|
||||
}
|
||||
return out << "_" << data.quantizationLevel <<
|
||||
(data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) << "_" <<
|
||||
data.inputLowValues << "_" << data.inputHighValues << "_" <<
|
||||
data.outputLowValues << "_" << data.outputHighValues << "_" <<
|
||||
(data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
|
||||
return out << "level=" << data.quantizationLevel <<
|
||||
"_shape=" <<(data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) <<
|
||||
"_input_low=" << data.inputLowValues <<
|
||||
"_input_high=" << data.inputHighValues <<
|
||||
"_output_low=" << data.outputLowValues <<
|
||||
"_output_high=" << data.outputHighValues <<
|
||||
"_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
|
||||
}
|
||||
|
||||
} // namespace subgraph
|
||||
|
Loading…
Reference in New Issue
Block a user