[LPT] isAsymmetricQuantization & isAsymmetricOnWeights validation (#8316)

* [LPT] isAsymmetricQuantization & isAsymmetricOnWeights * [LPT] isAsymmetricOnWeights tests * [LPT] tests improvements: comments fixes
2021-11-05 13:24:18 +03:00 · 2021-11-05 13:24:18 +03:00 · 5f7e3cdfb9
commit 5f7e3cdfb9
parent 2ed4e9c05f
10 changed files with 206 additions and 13 deletions
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
@ -29,7 +29,7 @@ public:
        const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
        const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);

-    bool empty() const;
+    bool empty() const noexcept;
    bool multiplyHasZeroOrDenormal() const;
    bool isShared() const;
    bool isLowPrecision() const;
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@ -56,6 +56,10 @@ public:
            max(max),
            hasZeroPoint(hasZeroPoint) {}

+    bool empty() const noexcept {
+        return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
+    }
+
    static bool isSupported(const element::Type& precision) {
        static const std::set<element::Type_t> lowPrecision = {
                element::i8, element::u8,
--- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
@ -51,6 +51,8 @@ public:
    float getOutputLowValue(const size_t channel) const;
    float getOutputHighValue(const size_t channel) const;

+    bool empty() const noexcept;
+
    static bool isSupportedLevel(const size_t level);

    const size_t levels;
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization(
    multiplyConstant(multiplyConstant) {
 }

-bool FakeQuantizeDequantization::empty() const {
+bool FakeQuantizeDequantization::empty() const noexcept {
    return (subtract == nullptr) && (multiply == nullptr);
 }

--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
 bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
    const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
    const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
+    if (dequantization.empty()) {
+        return false;
+    }
    return dequantization.subtract != nullptr;
 }

--- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
+++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals(
 }

 QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
+    if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
+        return QuantizationDetails();
+    }
+
    const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
    const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();

@ -153,6 +157,10 @@ std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> const
    return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
 }

+bool QuantizationDetails::empty() const noexcept {
+    return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
+}
+
 bool QuantizationDetails::isSupportedLevel(const size_t level) {
    static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
    return supported_levels.find(level) != supported_levels.end();
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@ -361,6 +361,9 @@ std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuan
 DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) {
    const auto fq = getFakeQuantizeOnWeights(node);
    const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
+    if (quantizationDetails.empty()) {
+        return DataPrecision();
+    }

    const auto precisionsAttribute = getAttributeFromOutput<PrecisionsAttributePtr>(fq);
    const auto precisions = precisionsAttribute == nullptr ?
@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<

    if (dequantization.empty()) {
        const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n);
+        if (dataPrecision.empty()) {
+            return false;
+        }
+
        if (dataPrecision.hasZeroPoint) {
            return true;
        }
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp
@ -0,0 +1,142 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <sstream>
+#include <memory>
+#include <utility>
+
+#include <gtest/gtest.h>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/weightable_layer_transformation.hpp>
+#include "lpt_ngraph_functions/convolution_function.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+class IsAsymmetricOnWeightsTestValues {
+public:
+    class Actual {
+    public:
+        ngraph::element::Type precisionBeforeDequantization;
+        ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
+        std::shared_ptr<ngraph::opset1::Constant> weights;
+        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
+    };
+
+    TestTransformationParams params;
+    Actual actual;
+};
+
+typedef std::tuple<
+    element::Type,
+    ngraph::PartialShape,
+    IsAsymmetricOnWeightsTestValues,
+    std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;
+
+class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
+public:
+    void SetUp() override {
+        const auto netPrecision = std::get<0>(GetParam());
+        const auto inputShape = std::get<1>(GetParam());
+        auto testValues = std::get<2>(GetParam());
+        std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
+
+        actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
+            netPrecision,
+            testValues.actual.precisionBeforeDequantization,
+            inputShape,
+            testValues.actual.dequantizationOnActivations,
+            testValues.actual.weights,
+            testValues.actual.fakeQuantizeOnWeights,
+            transposeAndIsAsymmetricOnWeights.first[0],
+            transposeAndIsAsymmetricOnWeights.first[1],
+            transposeAndIsAsymmetricOnWeights.first[2],
+            transposeAndIsAsymmetricOnWeights.first[3],
+            transposeAndIsAsymmetricOnWeights.first[4]);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
+        const auto netPrecision = std::get<0>(obj.param);
+        auto inputShape = std::get<1>(obj.param);
+        IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
+        std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
+
+        std::ostringstream result;
+        result << toString(testValues.params) << "_" <<
+            netPrecision << "_" <<
+            inputShape << "_" <<
+            testValues.actual.precisionBeforeDequantization << "_" <<
+            testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
+            testValues.actual.weights->get_element_type() << "_" << "{ " <<
+            testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
+            testValues.actual.fakeQuantizeOnWeights << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[3] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[4];
+        return result.str();
+    }
+};
+
+TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+
+    const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
+    ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
+
+    const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
+    std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
+    ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights);
+}
+
+const std::vector<element::Type> netPrecisions = {
+    element::f32
+};
+
+const std::vector<ngraph::PartialShape> suitablePartialShapes = {
+    ngraph::PartialShape({ 1, 3, 72, 48 }),
+    ngraph::PartialShape({ 4, 3, 72, 48 }),
+    ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
+    ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
+};
+
+const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
+        }
+    }
+};
+
+const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
+    // asymmetric quantization
+    {{false, false, false, false, false}, true},
+    {{true, false, false, false, false}, true},
+
+    // not supported FakeQuantize
+    {{false, true, false, false, false}, false},
+    {{false, false, true, false, false}, false},
+    {{false, false, false, true, false}, false},
+    {{false, false, false, false, true}, false}
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LPT,
+    IsAsymmetricOnWeightsTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(suitablePartialShapes),
+        ::testing::ValuesIn(testValues),
+        ::testing::ValuesIn(transposeFlags)),
+    IsAsymmetricOnWeightsTransformation::getTestCaseName);
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
@ -24,7 +24,12 @@ public:
        const ngraph::PartialShape& inputShape,
        const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
        std::shared_ptr<ngraph::opset1::Constant> weights,
-        const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights);
+        const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
+        const bool fqOnWeightsTransposeOnData = false,
+        const bool fqOnWeightsTransposeOnInputLow = false,
+        const bool fqOnWeightsTransposeOnInputHigh = false,
+        const bool fqOnWeightsTransposeOnOutputLow = false,
+        const bool fqOnWeightsTransposeOnOutputHigh = false);

    static std::shared_ptr<ngraph::Function> getOriginalWithIncorrectWeights(
        const ngraph::Shape& inputShape,
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
@ -27,7 +27,12 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
    const ngraph::PartialShape& inputShape,
    const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
    std::shared_ptr<ngraph::opset1::Constant> weights,
-    const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
+    const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
+    const bool transposeOnData,
+    const bool transposeOnInputLow,
+    const bool transposeOnInputHigh,
+    const bool transposeOnOutputLow,
+    const bool transposeOnOutputHigh) {
    const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
    auto dequantizationStructure = dequantizationBefore;
    dequantizationStructure.multiply.outPrecision = netPrecision;
@ -53,15 +58,32 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
    convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
    const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();

-    const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights :
-        ngraph::builder::makeFakeQuantize(
-            convertedWeights, netPrecision,
-            fakeQuantizeOnWeights.quantizationLevel,
-            fakeQuantizeOnWeights.constantShape,
-            fakeQuantizeOnWeights.inputLowValues,
-            fakeQuantizeOnWeights.inputHighValues,
-            fakeQuantizeOnWeights.outputLowValues,
-            fakeQuantizeOnWeights.outputHighValues);
+    const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
+    const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
+        convertedWeights :
+        std::make_shared<opset1::FakeQuantize>(
+            transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
+            transposeOnInputLow ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
+            transposeOnInputHigh ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
+            transposeOnOutputLow ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
+            transposeOnOutputHigh ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
+            fqOnWeights.quantizationLevel);

    auto convolutionOriginal = ngraph::opset1::Convolution(
        ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),