[LPT] isAsymmetricOnWeights fix: small zero points ignoring (#8429)

2021-11-11 22:58:56 +03:00 · 2021-11-11 22:58:56 +03:00 · 8686100c80
commit 8686100c80
parent f46e8bb3f8
5 changed files with 181 additions and 49 deletions
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@ -42,7 +42,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
    if (dequantization.empty()) {
        const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
        const auto dataPrecision = getDataPrecisionOnWeights(layer);
-        if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
+        if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
            return false;
        }
    } else {
@ -391,7 +391,7 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<
            return true;
        }
    } else {
-        if (dequantization.subtract != nullptr) {
+        if ((dequantization.subtract != nullptr) && (NetworkHelper::optimizeSubtract(dequantization.subtract) != nullptr)) {
            return true;
        }
    }
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_dequantization.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_dequantization.cpp
@ -0,0 +1,128 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "layer_transformation.hpp"
 #include <sstream>
 #include <memory>
 #include <utility>
 #include <gtest/gtest.h>
 #include <transformations/utils/utils.hpp>
 #include <transformations/init_node_info.hpp>
 #include <low_precision/weightable_layer_transformation.hpp>
 #include "lpt_ngraph_functions/convolution_function.hpp"
 using namespace testing;
 using namespace ngraph;
 using namespace ngraph::pass;
 class IsAsymmetricOnWeightsDequantizationTestValues {
 public:
    ngraph::element::Type precisionBeforeDequantization;
    ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
    std::shared_ptr<ngraph::opset1::Constant> weights;
    builder::subgraph::DequantizationOperations dequantizationOnWeights;
    bool isAsymmetricOnWeights;
 };
 typedef std::tuple<
    element::Type,
    ngraph::PartialShape,
    IsAsymmetricOnWeightsDequantizationTestValues> IsAsymmetricOnWeightsDequantizationParams;
 class IsAsymmetricOnWeightsDequantizationTransformation :
    public LayerTransformation,
    public testing::WithParamInterface<IsAsymmetricOnWeightsDequantizationParams> {
 public:
    void SetUp() override {
        const auto netPrecision = std::get<0>(GetParam());
        const auto inputShape = std::get<1>(GetParam());
        auto testValues = std::get<2>(GetParam());
        actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
            netPrecision,
            testValues.precisionBeforeDequantization,
            inputShape,
            testValues.dequantizationOnActivations,
            testValues.weights,
            {},
            testValues.dequantizationOnWeights);
    }
    static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsDequantizationParams> obj) {
        const auto netPrecision = std::get<0>(obj.param);
        auto inputShape = std::get<1>(obj.param);
        IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(obj.param);
        std::ostringstream result;
        result <<
            netPrecision << "_" <<
            inputShape << "_" <<
            testValues.precisionBeforeDequantization << "_" <<
            testValues.dequantizationOnActivations << "_" << "_weights_" <<
            testValues.weights->get_element_type() << "_" << "{ " <<
            testValues.weights->cast_vector<float>()[0] << " }_" <<
            testValues.dequantizationOnWeights;
        return result.str();
    }
 };
 TEST_P(IsAsymmetricOnWeightsDequantizationTransformation, CompareFunctions) {
    actualFunction->validate_nodes_and_infer_types();
    const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
    ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
    IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(GetParam());
    const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
    ASSERT_EQ(testValues.isAsymmetricOnWeights, isAsymmetricOnWeights);
 }
 const std::vector<element::Type> netPrecisions = {
    element::f32
 };
 const std::vector<ngraph::PartialShape> suitablePartialShapes = {
    ngraph::PartialShape({ 1, 3, 72, 48 }),
    ngraph::PartialShape({ 4, 3, 72, 48 }),
    ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
    ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
 };
 const std::vector<IsAsymmetricOnWeightsDequantizationTestValues> testValues = {
    {
        ngraph::element::u8,
        {{ngraph::element::f32}, { 128.f }, { 0.02f }},
        op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
        {
            {ngraph::element::f32},
            {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}},
            {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}}
        },
        true
    },
    {
        ngraph::element::u8,
        {{ngraph::element::f32}, { 128.f }, { 0.02f }},
        op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
        {
            {ngraph::element::f32},
            {{0, 0, 1.e-7, 0, 0, 0}, ngraph::element::f32, {6, 1, 1, 1}},
            {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}}
        },
        false
    }
 };
 INSTANTIATE_TEST_SUITE_P(
    smoke_LPT,
    IsAsymmetricOnWeightsDequantizationTransformation,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::ValuesIn(suitablePartialShapes),
        ::testing::ValuesIn(testValues)),
    IsAsymmetricOnWeightsDequantizationTransformation::getTestCaseName);
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_fq.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_fq.cpp
@ -19,27 +19,23 @@ using namespace testing;
 using namespace ngraph;
 using namespace ngraph::pass;
-class IsAsymmetricOnWeightsTestValues {
+class IsAsymmetricOnWeightsFakeQuantizeTestValues {
 public:
-    class Actual {
+    ngraph::element::Type precisionBeforeDequantization;
-    public:
+    ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
-        ngraph::element::Type precisionBeforeDequantization;
+    std::shared_ptr<ngraph::opset1::Constant> weights;
-        ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
+    builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
        std::shared_ptr<ngraph::opset1::Constant> weights;
        builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
    };
    TestTransformationParams params;
    Actual actual;
 };
 typedef std::tuple<
    element::Type,
    ngraph::PartialShape,
-    IsAsymmetricOnWeightsTestValues,
+    IsAsymmetricOnWeightsFakeQuantizeTestValues,
-    std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;
+    std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsFakeQuantizeParams;
-class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
+class IsAsymmetricOnWeightsFakeQuantizeTransformation :
    public LayerTransformation,
    public testing::WithParamInterface<IsAsymmetricOnWeightsFakeQuantizeParams> {
 public:
    void SetUp() override {
        const auto netPrecision = std::get<0>(GetParam());
@ -49,11 +45,12 @@ public:
        actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
            netPrecision,
-            testValues.actual.precisionBeforeDequantization,
+            testValues.precisionBeforeDequantization,
            inputShape,
-            testValues.actual.dequantizationOnActivations,
+            testValues.dequantizationOnActivations,
-            testValues.actual.weights,
+            testValues.weights,
-            testValues.actual.fakeQuantizeOnWeights,
+            testValues.fakeQuantizeOnWeights,
            {},
            transposeAndIsAsymmetricOnWeights.first[0],
            transposeAndIsAsymmetricOnWeights.first[1],
            transposeAndIsAsymmetricOnWeights.first[2],
@ -61,21 +58,21 @@ public:
            transposeAndIsAsymmetricOnWeights.first[4]);
    }
-    static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
+    static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsFakeQuantizeParams> obj) {
        const auto netPrecision = std::get<0>(obj.param);
        auto inputShape = std::get<1>(obj.param);
-        IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
+        IsAsymmetricOnWeightsFakeQuantizeTestValues testValues = std::get<2>(obj.param);
        std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
        std::ostringstream result;
-        result << toString(testValues.params) << "_" <<
+        result <<
            netPrecision << "_" <<
            inputShape << "_" <<
-            testValues.actual.precisionBeforeDequantization << "_" <<
+            testValues.precisionBeforeDequantization << "_" <<
-            testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
+            testValues.dequantizationOnActivations << "_" << "_weights_" <<
-            testValues.actual.weights->get_element_type() << "_" << "{ " <<
+            testValues.weights->get_element_type() << "_" << "{ " <<
-            testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
+            testValues.weights->cast_vector<float>()[0] << " }_" <<
-            testValues.actual.fakeQuantizeOnWeights << "_" <<
+            testValues.fakeQuantizeOnWeights << "_" <<
            transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
            transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
            transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
@ -85,7 +82,7 @@ public:
    }
 };
-TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
+TEST_P(IsAsymmetricOnWeightsFakeQuantizeTransformation, CompareFunctions) {
    actualFunction->validate_nodes_and_infer_types();
    const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
@ -107,15 +104,12 @@ const std::vector<ngraph::PartialShape> suitablePartialShapes = {
    ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
 };
-const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
+const std::vector<IsAsymmetricOnWeightsFakeQuantizeTestValues> testValues = {
    {
-        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        ngraph::element::u8,
-        {
+        {{ngraph::element::f32}, { 128.f }, { 0.02f }},
-            ngraph::element::u8,
+        op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
-            {{ngraph::element::f32}, { 128.f }, { 0.02f }},
+        { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
        }
    }
 };
@ -133,10 +127,10 @@ const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
 INSTANTIATE_TEST_SUITE_P(
    smoke_LPT,
-    IsAsymmetricOnWeightsTransformation,
+    IsAsymmetricOnWeightsFakeQuantizeTransformation,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::ValuesIn(suitablePartialShapes),
        ::testing::ValuesIn(testValues),
        ::testing::ValuesIn(transposeFlags)),
-    IsAsymmetricOnWeightsTransformation::getTestCaseName);
+    IsAsymmetricOnWeightsFakeQuantizeTransformation::getTestCaseName);
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp
@ -22,9 +22,10 @@ public:
        const ngraph::element::Type netPrecision,
        const ngraph::element::Type inputPrecision,
        const ngraph::PartialShape& inputShape,
-        const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
+        const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
        std::shared_ptr<ngraph::opset1::Constant> weights,
        const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
        const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights = DequantizationOperations(),
        const bool fqOnWeightsTransposeOnData = false,
        const bool fqOnWeightsTransposeOnInputLow = false,
        const bool fqOnWeightsTransposeOnInputHigh = false,
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
@ -25,16 +25,17 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
    const ngraph::element::Type netPrecision,
    const ngraph::element::Type inputPrecision,
    const ngraph::PartialShape& inputShape,
-    const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
+    const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
    std::shared_ptr<ngraph::opset1::Constant> weights,
    const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
    const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights,
    const bool transposeOnData,
    const bool transposeOnInputLow,
    const bool transposeOnInputHigh,
    const bool transposeOnOutputLow,
    const bool transposeOnOutputHigh) {
    const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
-    auto dequantizationStructure = dequantizationBefore;
+    auto dequantizationStructure = dequantizationOnActivations;
    dequantizationStructure.multiply.outPrecision = netPrecision;
    const auto dequantization = makeDequantization(input, dequantizationStructure);
@ -53,15 +54,22 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
            weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape)));
    }
-    const auto convertOnWeights = std::make_shared<opset1::Convert>(weights, netPrecision);
+    std::shared_ptr<Node> convertedWeights;
-    OutputVector convertedOutput(1);
+    if (dequantizationOnWeights.empty()) {
-    convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
+        const auto convertOnWeights = std::make_shared<opset1::Convert>(weights, netPrecision);
-    const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
+        OutputVector convertedOutput(1);
        convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
        convertedWeights = convertedOutput[0].get_node_shared_ptr();
    } else {
        convertedWeights = weights;
    }
    const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
-    const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
+    std::shared_ptr<Node> onWeights;
-        convertedWeights :
+    if (fqOnWeights.empty()) {
-        std::make_shared<opset1::FakeQuantize>(
+        onWeights = dequantizationOnWeights.empty() ? convertedWeights : makeDequantization(convertedWeights, dequantizationOnWeights);
    } else {
        onWeights = std::make_shared<opset1::FakeQuantize>(
            transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
            transposeOnInputLow ?
                std::make_shared<opset1::Transpose>(
@ -84,6 +92,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
                    constant->clone_with_new_inputs({})) :
                makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
            fqOnWeights.quantizationLevel);
    }
    auto convolutionOriginal = ngraph::opset1::Convolution(
        ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),