[LPT] Fixes for the cases with convert before subtraction constant (#12835)

2022-09-06 19:41:29 +02:00 · 2022-09-06 19:41:29 +02:00 · 88e4ac5e53
commit 88e4ac5e53
parent 9d55355daf
12 changed files with 224 additions and 5 deletions
--- a/src/common/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/network_helper.hpp
@ -128,6 +128,10 @@ public:
        const element::Type deqPrecision = element::f32,
        std::shared_ptr<ngraph::Node> input = nullptr);

+    static std::shared_ptr<ngraph::Node> makeDequantizationSubtract(
+        const ngraph::Output<ngraph::Node>& parent,
+        const ngraph::Output<ngraph::Node>& subtract_constant);
+
    static FakeQuantizeDequantization createDequantizationFromFakeQuantize(
        std::shared_ptr<opset1::FakeQuantize> fq,
        element::Type precision,
@ -156,7 +160,7 @@ public:

    static std::shared_ptr<opset1::Constant> normalizeDequantizationShape(
            const std::shared_ptr<Node>& eltwise,
-            const bool convertIsExpected = false);
+            const bool convertIsExpected = true);

    // 1. remove Convert if possible
    // 2. optimize Constant if possible
--- a/src/common/low_precision_transformations/src/network_helper.cpp
+++ b/src/common/low_precision_transformations/src/network_helper.cpp
@ -1187,6 +1187,16 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
    return FakeQuantizeDequantization(input, convert, subtract, nullptr, subtractConstant, multiply, multiplyConstant);
 }

+std::shared_ptr<ov::Node> NetworkHelper::makeDequantizationSubtract(
+    const ov::Output<ov::Node>& parent,
+    const ov::Output<ov::Node>& subtract_constant) {
+    return subtract_constant.get_element_type() != parent.get_element_type()
+               ? std::dynamic_pointer_cast<ov::Node>(std::make_shared<opset1::Subtract>(
+                     parent,
+                     std::make_shared<opset1::Convert>(subtract_constant, parent.get_element_type())))
+               : std::make_shared<opset1::Subtract>(parent, subtract_constant);
+}
+
 FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
    std::shared_ptr<opset1::FakeQuantize> fq,
    element::Type precision,
@ -1644,6 +1654,9 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
                op::TemporaryReplaceOutputType(foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get());
            ngraph::copy_runtime_info({ newOperation, parent }, parent);
        } else {
+            // Subtract constant could be changed (including a shape) before propagation in some cases
+            // so it's necessary to compute the shape for a subtractConvert before creating a new subtract
+            dequantization.subtractConvert->validate_and_infer_types();
            parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
            ngraph::copy_runtime_info({ newOperation, parent }, parent);
        }
@ -1736,6 +1749,9 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationBefor
                        foldConvert(subtractConstant, parentPrecision), element::f32).get());
                parent->set_friendly_name(dequantization.subtract->get_friendly_name() + "_" + std::to_string(i + 1));
            } else {
+                // Subtract constant could be changed (including a shape) before propagation in some cases
+                // so it's necessary to compute the shape for a subtractConvert before creating a new subtract
+                dequantization.subtractConvert->validate_and_infer_types();
                parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
            }
            ngraph::copy_runtime_info(dequantization.subtract, parent);
--- a/src/common/low_precision_transformations/src/shuffle_channels.cpp
+++ b/src/common/low_precision_transformations/src/shuffle_channels.cpp
@ -42,7 +42,7 @@ bool ShuffleChannelsTransformation::transform(TransformationContext& context, ng
    auto dequantization = NetworkHelper::getDequantization(shuffleChannels, defaultPrecisions);

    const auto shuffleDequantizationConstant = [&](const std::shared_ptr<Node>& eltwise) {
-        const auto normalizedConst = NetworkHelper::normalizeDequantizationShape(eltwise);
+        const auto normalizedConst = NetworkHelper::normalizeDequantizationShape(eltwise, true);
        const auto constShape = normalizedConst->get_shape();

        if (shape_size(constShape) == 1ul) {
--- a/src/common/low_precision_transformations/src/split.cpp
+++ b/src/common/low_precision_transformations/src/split.cpp
@ -90,7 +90,7 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
        }

        if (dequantization.subtract) {
-            const auto subtract = std::make_shared<opset1::Subtract>(parent, splitedSub[i]);
+            const auto subtract = NetworkHelper::makeDequantizationSubtract(parent, splitedSub[i]);
            copy_runtime_info({ newSplit, subtract }, subtract);
            parent = subtract;
        }
--- a/src/common/low_precision_transformations/src/transpose.cpp
+++ b/src/common/low_precision_transformations/src/transpose.cpp
@ -125,8 +125,9 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont
        return true;
    }();

-    const auto values = constant->cast_vector<float>();
+    // TODO: remove legacy limitation
    if (!isPerTensor) {
+        const auto values = constant->cast_vector<float>();
        if ((values.size() < 2ul) || (values[0] != 0) || (values[1] != 1)) {
            return false;
        }
--- a/src/tests/functional/inference_engine/lp_transformations/pad_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/pad_transformation.cpp
@ -315,6 +315,28 @@ const std::vector<PadTransformationTestValues> deqWithSub = {
            ngraph::element::u8,
            {{ngraph::element::f32}, {{128.f, 64.f, 32.f}}, {{3.f, 1.f, 2.f}}}
        }
+    },
+    // int8 subtraction with Convert from u8 to fp32
+    {
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {{}, {}, {}},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        }
    }
 };

--- a/src/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp
@ -250,6 +250,31 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
            }
        }
    },
+    // U8: no subtract 3D -> 4D: channels are not affected:
+    // per-channel subtraction with Convert from u8 to fp32 and identical values
+    {
+        { 1, 4, 10, 10 },
+        { 1, 2, 2, 10, 10},
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 4, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {{}, {}, {}},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        }
+    },
    // U8: with subtract 3D -> 4D: channels are not affected, dynamic batch
    {
        { -1, 3, 20 },
--- a/src/tests/functional/inference_engine/lp_transformations/shuffle_channels_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/shuffle_channels_transformation.cpp
@ -139,6 +139,30 @@ const std::vector<ShuffleChannelsTransformationTestValues> testValues = {
            {{ngraph::element::f32}, {{128.f, 64.f, 32.f}}, {{0.01f, 0.02f, 0.03f}}}
        }
    },
+    // subtraction with Convert from u8 to fp32
+    {
+        LayerTransformation::createParamsU8I8(),
+        1,
+        1,
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        }
+    },
    // U8 quantization by spatial dimension, shuffling by the same dimension
    {
        LayerTransformation::createParamsU8I8(),
--- a/src/tests/functional/inference_engine/lp_transformations/split_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/split_transformation.cpp
@ -136,6 +136,46 @@ const std::vector<SplitTransformationTestValues> testValues = {
            }
        }
    },
+    // U8 per tensor quantization / int8 subtraction with Convert from u8 to fp32
+    {
+        { 1, 3, 16, 16 }, std::int64_t{2}, size_t{2},
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::u8, true}, {3.f}}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::u8, true}, {3.f}},
+                {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::u8, true}, {3.f}},
+            }
+        }
+    },
+    // U8 per tensor quantization / int8 subtraction with Convert from fp16 -> fp32
+    {
+        { 1, 3, 16, 16 }, std::int64_t{2}, size_t{2},
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::f16, true}, {3.f}}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::f16, true}, {3.f}},
+                {{ngraph::element::f32}, {{128.f}, element::undefined, {}, false, 1ul, element::f16, true}, {3.f}},
+            }
+        }
+    },
    {
        { -1, -1, -1, -1 }, std::int64_t{2}, size_t{2},
        LayerTransformation::createParamsU8I8(),
--- a/src/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp
@ -228,6 +228,29 @@ const std::vector<StridedSliceTransformationTestValues> stridedSliceTransformati
            {{ngraph::element::f32}, { 128.f }, { 0.1f }}
        }
    },
+    // U8: channel slice, per-channel quantization with the same values, subtraction with Convert from u8 to fp32
+    {
+        LayerTransformation::createParamsU8I8(),
+        channelSlice,
+        {
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{128.f}, element::undefined, {}, false, 1ul, element::u8, true},
+                {3.f}
+            }
+        }
+    },
    // U8: channel slice, per-channel quantization with different values
    {
        LayerTransformation::createParamsU8I8(),
--- a/src/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp
+++ b/src/tests/functional/inference_engine/lp_transformations/transpose_transformation.cpp
@ -168,6 +168,68 @@ const std::vector<TransposeTransformationTestValues> testValues = {
            }
        }
    },
+    // U8: per-channel quantization with the same values,
+    // subtraction with Convert from u8 to fp32, transpose channel dimension
+    {
+        { 0, 3, 1, 2 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32 },
+                {{128.f}, element::undefined, {1, 3, 1, 1}, false, 1ul, element::u8, true},
+                {{0.1}, ngraph::element::f32, { 1, 3, 1, 1 }}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {{}, {}, {}},
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32 },
+                {{128.f}, element::undefined, {1, 1, 3, 1}, false, 1ul, element::u8, true},
+                {{0.1}, ngraph::element::f32, {1, 1, 3, 1}}
+            }
+        }
+    },
+    // U8: per-tensor quantization, transpose channel dimension
+    {
+        { 0, 3, 1, 2 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128}, {0.1f}}
+        },
+        {
+            ngraph::element::u8,
+            {{}, {}, {}},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128}, {0.1f}}
+        }
+    },
+    // U8: per-channel quantization, transpose channel dimension
+    {
+        { 0, 2, 1, 3 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32 },
+                {{ 128, 64, 32 }, ngraph::element::f32, { 1, 3, 1, 1 }},
+                {{ 0.3f, 0.2f, 0.1f }, ngraph::element::f32, { 1, 3, 1, 1 }}
+            }
+        },
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32 },
+                {{ 128, 64, 32 }, ngraph::element::f32, { 1, 3, 1, 1 }},
+                {{ 0.3f, 0.2f, 0.1f }, ngraph::element::f32, { 1, 3, 1, 1 }}
+            },
+            ngraph::element::f32,
+            {{}, {}, {}},
+        }
+    },
    // empty
    {
        { 0, 1, 3, 2 },
--- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp
+++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp
@ -60,7 +60,9 @@ std::shared_ptr<Node> makeDequantization(
        if (dequantizationOperations.subtract.addConvert) {
            std::shared_ptr<Node> subtractConstConvert = std::make_shared<ngraph::opset1::Convert>(
                subtractConst,
-                dequantizationOperations.subtract.outPrecision);
+                dequantizationOperations.subtract.outPrecision == element::undefined ?
+                    parent.get_element_type() :
+                    dequantizationOperations.subtract.outPrecision);

            auto& rt = subtractConstConvert->get_rt_info();
            for (const auto& attribute : dequantizationOperations.subtract.convertAttributes) {