[LPT] Unexpected quantisation level fix (#18888)

* [LPT] Unexpected quantisation level support * [LPT] Unexpected quantisation level support - extension for weights * [LPT] Unexpected quantisation level support - extension for weights + tests * refactoring: compilation fix
2023-08-04 15:02:12 +01:00 · 2023-08-04 15:02:12 +01:00 · b76fc24824
commit b76fc24824
parent 74c778e7ee
11 changed files with 192 additions and 21 deletions
--- a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp
@ -90,6 +90,25 @@ public:
        return lowPrecision.find(precision) != lowPrecision.end();
    }

+    static bool check(const element::Type precision, const size_t levels) {
+        switch (precision) {
+            case element::i4:
+            case element::u4:
+                return (levels == low_precision::levels::int4) || (levels == low_precision::levels::int4_narrow_range);
+            case element::i8:
+            case element::u8:
+                return (levels == low_precision::levels::int8) || (levels == low_precision::levels::int8_narrow_range);
+            case element::i16:
+            case element::u16:
+                return (levels == low_precision::levels::int16) || (levels == low_precision::levels::int16_narrow_range);
+            case element::i32:
+            case element::u32:
+                return (levels == low_precision::levels::int32) || (levels == low_precision::levels::int32_narrow_range);
+            default:
+                return false;
+        }
+    }
+
    static float getMinValue(const element::Type precision, const size_t levels) {
        switch (precision) {
            case element::u4:
--- a/src/common/low_precision_transformations/src/fake_quantize_decomposition.cpp
+++ b/src/common/low_precision_transformations/src/fake_quantize_decomposition.cpp
@ -134,6 +134,9 @@ DataPrecision getDataPrecisionByOutputPort(std::shared_ptr<opset1::FakeQuantize>
            precisionsForLevels = {element::u8, element::i8};
    }
    const auto resultPrecisions = NetworkHelper::precisionIntersection(precisions, precisionsForLevels);
+    if (resultPrecisions.empty()) {
+        return DataPrecision();
+    }

    ngraph::element::Type precision;
    bool hasZeroPoint;
@ -315,11 +318,16 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c
        return rewritten;
    }

+    // check if level is supported in LPT
    if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) {
        return rewritten;
    }

+    // check if level is supported in plugin
    DataPrecision dataPrecision = fq_decomposition::getDataPrecisionByOutputPort(layer);
+    if (dataPrecision.empty()) {
+        return rewritten;
+    }

    PrecisionsAttribute precisionsAttribute(defaultPrecisions);
    {
--- a/src/common/low_precision_transformations/src/layer_transformation.cpp
+++ b/src/common/low_precision_transformations/src/layer_transformation.cpp
@ -338,6 +338,10 @@ DataPrecision LayerTransformation::getDataPrecision(
                precisionDetailsAtOutputIntervals.precision :
                *requiredPrecisions.begin();

+            if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
+                return DataPrecision();
+            }
+
            return DataPrecision(
                resultPrecision,
                DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
@ -348,6 +352,11 @@ DataPrecision LayerTransformation::getDataPrecision(
        // FakeQuantize optimal precision is not deined
        if (!requiredPrecisions.empty()) {
            const element::Type resultPrecision = *requiredPrecisions.begin();
+
+            if (!DataPrecision::check(resultPrecision, quantizationDetails.levels)) {
+                return DataPrecision();
+            }
+
            return DataPrecision(
                resultPrecision,
                DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
--- a/src/common/low_precision_transformations/src/recurrent_cell.cpp
+++ b/src/common/low_precision_transformations/src/recurrent_cell.cpp
@ -108,6 +108,10 @@ bool RecurrentCellTransformation::transform(TransformationContext& context, ngra
                                            ? defaultPrecisions
                                            : precisionsAttribute.as<PrecisionsAttribute>().value();
                const DataPrecision dataPrecision = getDataPrecision(lstm_parent, quantizationDetails, precisions);
+                if (dataPrecision.empty()) {
+                    return false;
+                }
+
                auto QDQ = NetworkHelper::decomposeFakeQuantize(fq_node,
                                                                  dataPrecision.precision,
                                                                  dataPrecision.min,
--- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp
@ -70,7 +70,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
    if (dequantization.empty()) {
        const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
        const auto dataPrecision = getDataPrecisionOnWeights(layer, defaultPrecisions);
-        if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
+        if ((dataPrecision.empty()) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
            return false;
        }
    } else {
--- a/src/common/low_precision_transformations/tests/unit/data_precision_check.cpp
+++ b/src/common/low_precision_transformations/tests/unit/data_precision_check.cpp
@ -0,0 +1,59 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <gtest/gtest.h>
+#include <ie_blob.h>
+#include <low_precision/layer_transformation.hpp>
+#include <low_precision/network_helper.hpp>
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+
+TEST(smoke_LPT_DataPrecision, check) {
+    using namespace ngraph::pass::low_precision;
+
+    const std::vector<element::Type> type_items = {
+        element::i4,
+        element::u4,
+        element::i8,
+        element::u8,
+        element::i16,
+        element::u16,
+        element::i32,
+        element::u32
+    };
+
+    const std::vector<levels> level_items = {
+        int4,
+        int4_narrow_range,
+        int8,
+        int8_narrow_range,
+        int16,
+        int16_narrow_range,
+        int32,
+        int32_narrow_range
+    };
+
+    const std::map<element::Type, std::set<levels>> items = {
+        {element::i4, {levels::int4, levels::int4_narrow_range}},
+        {element::u4, {levels::int4, levels::int4_narrow_range}},
+        {element::i8, {levels::int8, levels::int8_narrow_range}},
+        {element::u8, {levels::int8, levels::int8_narrow_range}},
+        {element::i16, {levels::int16, levels::int16_narrow_range}},
+        {element::u16, {levels::int16, levels::int16_narrow_range}},
+        {element::i32, {levels::int32, levels::int32_narrow_range}},
+        {element::u32, {levels::int32, levels::int32_narrow_range}},
+    };
+    for (const auto type_item : type_items) {
+        for (const auto level_item : level_items) {
+            const auto& levels = items.find(type_item)->second;
+            if (levels.find(level_item) == levels.end()) {
+                ASSERT_FALSE(DataPrecision::check(type_item, level_item));
+            } else {
+                ASSERT_TRUE(DataPrecision::check(type_item, level_item));
+            }
+        }
+    }
+}
--- a/src/common/low_precision_transformations/tests/unit/layer_transformation_get_data_precision.cpp
+++ b/src/common/low_precision_transformations/tests/unit/layer_transformation_get_data_precision.cpp
@ -11,7 +11,7 @@

 using namespace ngraph;

-TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_U8_to_U8) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
@ -27,7 +27,19 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_65535_to_U8) {
+    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
+    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
+    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
+    const auto fakeQuantize = std::make_shared<ov::op::v0::FakeQuantize>(input, low, high, low, high, 65535);
+
+    auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
+
+    auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
+    ASSERT_TRUE(precisionDetails.empty());
+}
+
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_I8_to_I8) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
@ -44,7 +56,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8_to_U8zp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
@ -60,7 +72,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8_to_I8zp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
@ -76,7 +88,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqU8_I8zp_to_U8zp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -92,7 +104,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqI8_U8zp_to_I8zp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -108,7 +120,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
    ASSERT_EQ(false, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_I8zp_to_undefzp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
@ -124,7 +136,7 @@ TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
    ASSERT_EQ(true, precisionDetails.empty());
 }

-TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) {
+TEST(smoke_LPT_LayerTransformation, getDataPrecision_reqNone_U8zp_to_undefzp) {
    const auto input = std::make_shared<ov::op::v0::Parameter>(element::f32, Shape{1, 3, 299, 299});
    const auto low = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
    const auto high = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
@ -116,6 +116,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
        "Convolution",
        "U8"
    },
+    // not supported quantization level on data
+    {
+        { 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        false,
+        { 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    },
+    // not supported quantization level on data & weights
+    {
+        { 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        false,
+        { 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    },
+    // not supported quantization level on weights
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        false,
+        { 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    }
 };

 const std::vector<ngraph::Shape> shapes = {
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp
@ -84,6 +84,33 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
        "Convolution",
        "U8"
    },
+    // not supported quantization level on data
+    {
+        { 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
+        false,
+        { 255ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    },
+    // not supported quantization level on data & weights
+    {
+        { 65536ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
+        false,
+        { 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    },
+    // not supported quantization level on weights
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
+        false,
+        { 65536ul, ngraph::Shape{1, 1, 1, 1}, {0.f}, {254.f}, {-12.7f}, {12.7f}},
+        false,
+        "Convolution",
+        "FP32"
+    }
 };

 INSTANTIATE_TEST_SUITE_P(smoke_LPT, ConvolutionTransformation,
--- a/src/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp
+++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp
@ -29,10 +29,10 @@ std::string ConvolutionTransformation::getTestCaseName(const testing::TestParamI
    std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;

    std::ostringstream result;
-    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
-        inputShape.rank().get_length() << "D_" <<
-        param.fakeQuantizeOnData << "_" <<
-        param.fakeQuantizeOnWeights;
+    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
+        "_rank=" << inputShape.rank().get_length() <<
+        "D_fq_on_data={" << param.fakeQuantizeOnData <<
+        "}_fq_on_weights={" << param.fakeQuantizeOnWeights << "}";
    return result.str();
 }

--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp
@ -58,9 +58,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnData& dat
    if (data.empty()) {
        return out << "{}";
    }
-    return out <<  "_" << data.quantizationLevel << data.constantShape << "_" << data.inputLowValues << "_" << data.inputHighValues <<
-        "_" << data.outputLowValues << "_" << data.outputHighValues << "_" <<
-        (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
+    return out << "level=" << data.quantizationLevel <<
+        "_shape=" << data.constantShape <<
+        "_input_low=" << data.inputLowValues <<
+        "_input_high=" << data.inputHighValues <<
+        "_output_low=" << data.outputLowValues <<
+        "_output_high" << data.outputHighValues <<
+        "_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
 }

 class FakeQuantizeOnDataWithConstant {
@ -96,11 +100,13 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnDataWithC
    if (data.empty()) {
        return out << "{}";
    }
-    return out <<  "_" << data.quantizationLevel <<
-        (data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) << "_" <<
-        data.inputLowValues << "_" << data.inputHighValues << "_" <<
-        data.outputLowValues << "_" << data.outputHighValues << "_" <<
-        (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
+    return out << "level=" << data.quantizationLevel <<
+        "_shape=" <<(data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) <<
+        "_input_low=" << data.inputLowValues <<
+        "_input_high=" << data.inputHighValues <<
+        "_output_low=" << data.outputLowValues <<
+        "_output_high=" << data.outputHighValues <<
+        "_precision=" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name());
 }

 }  // namespace subgraph