From 5f7e3cdfb9331cf46f845289f98edcd6efaf8eb7 Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Fri, 5 Nov 2021 13:24:18 +0300
Subject: [PATCH] [LPT] isAsymmetricQuantization & isAsymmetricOnWeights
 validation (#8316)

* [LPT] isAsymmetricQuantization & isAsymmetricOnWeights

* [LPT] isAsymmetricOnWeights tests

* [LPT] tests improvements: comments fixes
---
 .../common/fake_quantize_dequantization.hpp   |   2 +-
 .../low_precision/layer_transformation.hpp    |   4 +
 .../low_precision/quantization_details.hpp    |   2 +
 .../src/fake_quantize_dequantization.cpp      |   2 +-
 .../src/layer_transformation.cpp              |   3 +
 .../src/quantization_details.cpp              |   8 +
 .../src/weightable_layer_transformation.cpp   |   7 +
 .../is_asymmetric_on_weights.cpp              | 142 ++++++++++++++++++
 .../convolution_function.hpp                  |   7 +-
 .../src/convolution_function.cpp              |  42 ++++--
 10 files changed, 206 insertions(+), 13 deletions(-)
 create mode 100644 inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp

diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
index a9fba5234d1..0da82810c97 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
@@ -29,7 +29,7 @@ public:
         const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
         const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);
 
-    bool empty() const;
+    bool empty() const noexcept;
     bool multiplyHasZeroOrDenormal() const;
     bool isShared() const;
     bool isLowPrecision() const;
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
index 17a5cf567e1..f08931a77a5 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@@ -56,6 +56,10 @@ public:
             max(max),
             hasZeroPoint(hasZeroPoint) {}
 
+    bool empty() const noexcept {
+        return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
+    }
+
     static bool isSupported(const element::Type& precision) {
         static const std::set<element::Type_t> lowPrecision = {
                 element::i8, element::u8,
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
index a1c2f1ca497..975dca8cc53 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
@@ -51,6 +51,8 @@ public:
     float getOutputLowValue(const size_t channel) const;
     float getOutputHighValue(const size_t channel) const;
 
+    bool empty() const noexcept;
+
     static bool isSupportedLevel(const size_t level);
 
     const size_t levels;
diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
index 52e651b6ca0..46b56716081 100644
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
@@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization(
     multiplyConstant(multiplyConstant) {
 }
 
-bool FakeQuantizeDequantization::empty() const {
+bool FakeQuantizeDequantization::empty() const noexcept {
     return (subtract == nullptr) && (multiply == nullptr);
 }
 
diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
index 64363ace33c..928403133fa 100644
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
 bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
     const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
     const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
+    if (dequantization.empty()) {
+        return false;
+    }
     return dequantization.subtract != nullptr;
 }
 
diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
index 84028c99a15..6ea8d159cd8 100644
--- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
+++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp
@@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals(
 }
 
 QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
+    if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
+        return QuantizationDetails();
+    }
+
     const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
     const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();
 
@@ -153,6 +157,10 @@ std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> const
     return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
 }
 
+bool QuantizationDetails::empty() const noexcept {
+    return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
+}
+
 bool QuantizationDetails::isSupportedLevel(const size_t level) {
     static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
     return supported_levels.find(level) != supported_levels.end();
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index a8aee81aca6..ac6d00ef420 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -361,6 +361,9 @@ std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuan
 DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) {
     const auto fq = getFakeQuantizeOnWeights(node);
     const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
+    if (quantizationDetails.empty()) {
+        return DataPrecision();
+    }
 
     const auto precisionsAttribute = getAttributeFromOutput<PrecisionsAttributePtr>(fq);
     const auto precisions = precisionsAttribute == nullptr ?
@@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<
 
     if (dequantization.empty()) {
         const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n);
+        if (dataPrecision.empty()) {
+            return false;
+        }
+
         if (dataPrecision.hasZeroPoint) {
             return true;
         }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp
new file mode 100644
index 00000000000..e9b4a4a8c41
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp
@@ -0,0 +1,142 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <sstream>
+#include <memory>
+#include <utility>
+
+#include <gtest/gtest.h>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/weightable_layer_transformation.hpp>
+#include "lpt_ngraph_functions/convolution_function.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+class IsAsymmetricOnWeightsTestValues {
+public:
+    class Actual {
+    public:
+        ngraph::element::Type precisionBeforeDequantization;
+        ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
+        std::shared_ptr<ngraph::opset1::Constant> weights;
+        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
+    };
+
+    TestTransformationParams params;
+    Actual actual;
+};
+
+typedef std::tuple<
+    element::Type,
+    ngraph::PartialShape,
+    IsAsymmetricOnWeightsTestValues,
+    std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;
+
+class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
+public:
+    void SetUp() override {
+        const auto netPrecision = std::get<0>(GetParam());
+        const auto inputShape = std::get<1>(GetParam());
+        auto testValues = std::get<2>(GetParam());
+        std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
+
+        actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
+            netPrecision,
+            testValues.actual.precisionBeforeDequantization,
+            inputShape,
+            testValues.actual.dequantizationOnActivations,
+            testValues.actual.weights,
+            testValues.actual.fakeQuantizeOnWeights,
+            transposeAndIsAsymmetricOnWeights.first[0],
+            transposeAndIsAsymmetricOnWeights.first[1],
+            transposeAndIsAsymmetricOnWeights.first[2],
+            transposeAndIsAsymmetricOnWeights.first[3],
+            transposeAndIsAsymmetricOnWeights.first[4]);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
+        const auto netPrecision = std::get<0>(obj.param);
+        auto inputShape = std::get<1>(obj.param);
+        IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
+        std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
+
+        std::ostringstream result;
+        result << toString(testValues.params) << "_" <<
+            netPrecision << "_" <<
+            inputShape << "_" <<
+            testValues.actual.precisionBeforeDequantization << "_" <<
+            testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
+            testValues.actual.weights->get_element_type() << "_" << "{ " <<
+            testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
+            testValues.actual.fakeQuantizeOnWeights << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[3] << "_" <<
+            transposeAndIsAsymmetricOnWeights.first[4];
+        return result.str();
+    }
+};
+
+TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+
+    const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
+    ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
+
+    const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
+    std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
+    ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights);
+}
+
+const std::vector<element::Type> netPrecisions = {
+    element::f32
+};
+
+const std::vector<ngraph::PartialShape> suitablePartialShapes = {
+    ngraph::PartialShape({ 1, 3, 72, 48 }),
+    ngraph::PartialShape({ 4, 3, 72, 48 }),
+    ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
+    ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
+};
+
+const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
+        }
+    }
+};
+
+const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
+    // asymmetric quantization
+    {{false, false, false, false, false}, true},
+    {{true, false, false, false, false}, true},
+
+    // not supported FakeQuantize
+    {{false, true, false, false, false}, false},
+    {{false, false, true, false, false}, false},
+    {{false, false, false, true, false}, false},
+    {{false, false, false, false, true}, false}
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LPT,
+    IsAsymmetricOnWeightsTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(suitablePartialShapes),
+        ::testing::ValuesIn(testValues),
+        ::testing::ValuesIn(transposeFlags)),
+    IsAsymmetricOnWeightsTransformation::getTestCaseName);
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
index 325b981ec16..f552cf8503d 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
@@ -24,7 +24,12 @@ public:
         const ngraph::PartialShape& inputShape,
         const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
         std::shared_ptr<ngraph::opset1::Constant> weights,
-        const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights);
+        const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
+        const bool fqOnWeightsTransposeOnData = false,
+        const bool fqOnWeightsTransposeOnInputLow = false,
+        const bool fqOnWeightsTransposeOnInputHigh = false,
+        const bool fqOnWeightsTransposeOnOutputLow = false,
+        const bool fqOnWeightsTransposeOnOutputHigh = false);
 
     static std::shared_ptr<ngraph::Function> getOriginalWithIncorrectWeights(
         const ngraph::Shape& inputShape,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
index 1d2dc22bb86..cc50bce4546 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
@@ -27,7 +27,12 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
     const ngraph::PartialShape& inputShape,
     const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
     std::shared_ptr<ngraph::opset1::Constant> weights,
-    const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
+    const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
+    const bool transposeOnData,
+    const bool transposeOnInputLow,
+    const bool transposeOnInputHigh,
+    const bool transposeOnOutputLow,
+    const bool transposeOnOutputHigh) {
     const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
     auto dequantizationStructure = dequantizationBefore;
     dequantizationStructure.multiply.outPrecision = netPrecision;
@@ -53,15 +58,32 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
     convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
     const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
 
-    const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights :
-        ngraph::builder::makeFakeQuantize(
-            convertedWeights, netPrecision,
-            fakeQuantizeOnWeights.quantizationLevel,
-            fakeQuantizeOnWeights.constantShape,
-            fakeQuantizeOnWeights.inputLowValues,
-            fakeQuantizeOnWeights.inputHighValues,
-            fakeQuantizeOnWeights.outputLowValues,
-            fakeQuantizeOnWeights.outputHighValues);
+    const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
+    const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
+        convertedWeights :
+        std::make_shared<opset1::FakeQuantize>(
+            transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
+            transposeOnInputLow ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
+            transposeOnInputHigh ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
+            transposeOnOutputLow ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
+            transposeOnOutputHigh ?
+                std::make_shared<opset1::Transpose>(
+                    makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
+                    constant->clone_with_new_inputs({})) :
+                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
+            fqOnWeights.quantizationLevel);
 
     auto convolutionOriginal = ngraph::opset1::Convolution(
         ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),