From 5f7e3cdfb9331cf46f845289f98edcd6efaf8eb7 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Fri, 5 Nov 2021 13:24:18 +0300 Subject: [PATCH] [LPT] isAsymmetricQuantization & isAsymmetricOnWeights validation (#8316) * [LPT] isAsymmetricQuantization & isAsymmetricOnWeights * [LPT] isAsymmetricOnWeights tests * [LPT] tests improvements: comments fixes --- .../common/fake_quantize_dequantization.hpp | 2 +- .../low_precision/layer_transformation.hpp | 4 + .../low_precision/quantization_details.hpp | 2 + .../src/fake_quantize_dequantization.cpp | 2 +- .../src/layer_transformation.cpp | 3 + .../src/quantization_details.cpp | 8 + .../src/weightable_layer_transformation.cpp | 7 + .../is_asymmetric_on_weights.cpp | 142 ++++++++++++++++++ .../convolution_function.hpp | 7 +- .../src/convolution_function.cpp | 42 ++++-- 10 files changed, 206 insertions(+), 13 deletions(-) create mode 100644 inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp index a9fba5234d1..0da82810c97 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp @@ -29,7 +29,7 @@ public: const std::shared_ptr& multiply, const std::shared_ptr& multiplyConstant); - bool empty() const; + bool empty() const noexcept; bool multiplyHasZeroOrDenormal() const; bool isShared() const; bool isLowPrecision() const; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index 17a5cf567e1..f08931a77a5 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -56,6 +56,10 @@ public: max(max), hasZeroPoint(hasZeroPoint) {} + bool empty() const noexcept { + return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint); + } + static bool isSupported(const element::Type& precision) { static const std::set lowPrecision = { element::i8, element::u8, diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp index a1c2f1ca497..975dca8cc53 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp @@ -51,6 +51,8 @@ public: float getOutputLowValue(const size_t channel) const; float getOutputHighValue(const size_t channel) const; + bool empty() const noexcept; + static bool isSupportedLevel(const size_t level); const size_t levels; diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp index 52e651b6ca0..46b56716081 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp @@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization( multiplyConstant(multiplyConstant) { } -bool FakeQuantizeDequantization::empty() const { +bool FakeQuantizeDequantization::empty() const noexcept { return (subtract == nullptr) && (multiply == nullptr); } diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index 64363ace33c..928403133fa 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr& layer) { const auto nonConstNode = const_cast(layer.get())->shared_from_this(); const auto dequantization = NetworkHelper::getDequantization(nonConstNode); + if (dequantization.empty()) { + return false; + } return dequantization.subtract != nullptr; } diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index 84028c99a15..6ea8d159cd8 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals( } QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr quantize) { + if (!QuantizationDetails::outputLayoutIsSupported(quantize)) { + return QuantizationDetails(); + } + const std::vector inputLowValues = ov::as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector(); const std::vector inputHighValues = ov::as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector(); @@ -153,6 +157,10 @@ std::vector QuantizationDetails::getBlobValue(std::shared_ptr const return ov::as_type_ptr(constantLayer)->cast_vector(); } +bool QuantizationDetails::empty() const noexcept { + return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty(); +} + bool QuantizationDetails::isSupportedLevel(const size_t level) { static const std::unordered_set supported_levels = { 255, 256, 65536, 65535, static_cast(4294967296), 4294967295 }; return supported_levels.find(level) != supported_levels.end(); diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index a8aee81aca6..ac6d00ef420 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -361,6 +361,9 @@ std::shared_ptr WeightableLayerTransformation::getFakeQuan DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr& node) { const auto fq = getFakeQuantizeOnWeights(node); const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); + if (quantizationDetails.empty()) { + return DataPrecision(); + } const auto precisionsAttribute = getAttributeFromOutput(fq); const auto precisions = precisionsAttribute == nullptr ? @@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr< if (dequantization.empty()) { const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n); + if (dataPrecision.empty()) { + return false; + } + if (dataPrecision.hasZeroPoint) { return true; } diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp new file mode 100644 index 00000000000..e9b4a4a8c41 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp @@ -0,0 +1,142 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include "lpt_ngraph_functions/convolution_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class IsAsymmetricOnWeightsTestValues { +public: + class Actual { + public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + std::shared_ptr weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + }; + + TestTransformationParams params; + Actual actual; +}; + +typedef std::tuple< + element::Type, + ngraph::PartialShape, + IsAsymmetricOnWeightsTestValues, + std::pair, bool> > IsAsymmetricOnWeightsParams; + +class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const auto netPrecision = std::get<0>(GetParam()); + const auto inputShape = std::get<1>(GetParam()); + auto testValues = std::get<2>(GetParam()); + std::pair, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam()); + + actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal( + netPrecision, + testValues.actual.precisionBeforeDequantization, + inputShape, + testValues.actual.dequantizationOnActivations, + testValues.actual.weights, + testValues.actual.fakeQuantizeOnWeights, + transposeAndIsAsymmetricOnWeights.first[0], + transposeAndIsAsymmetricOnWeights.first[1], + transposeAndIsAsymmetricOnWeights.first[2], + transposeAndIsAsymmetricOnWeights.first[3], + transposeAndIsAsymmetricOnWeights.first[4]); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const auto netPrecision = std::get<0>(obj.param); + auto inputShape = std::get<1>(obj.param); + IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param); + std::pair, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param); + + std::ostringstream result; + result << toString(testValues.params) << "_" << + netPrecision << "_" << + inputShape << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationOnActivations << "_" << "_weights_" << + testValues.actual.weights->get_element_type() << "_" << "{ " << + testValues.actual.weights->cast_vector()[0] << " }_" << + testValues.actual.fakeQuantizeOnWeights << "_" << + transposeAndIsAsymmetricOnWeights.first[0] << "_" << + transposeAndIsAsymmetricOnWeights.first[1] << "_" << + transposeAndIsAsymmetricOnWeights.first[2] << "_" << + transposeAndIsAsymmetricOnWeights.first[3] << "_" << + transposeAndIsAsymmetricOnWeights.first[4]; + return result.str(); + } +}; + +TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + + const auto convolutions = LayerTransformation::get(actualFunction); + ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found"; + + const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]); + std::pair, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam()); + ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights); +} + +const std::vector netPrecisions = { + element::f32 +}; + +const std::vector suitablePartialShapes = { + ngraph::PartialShape({ 1, 3, 72, 48 }), + ngraph::PartialShape({ 4, 3, 72, 48 }), + ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }), + ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }), +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } }, + } + } +}; + +const std::vector, bool> > transposeFlags = { + // asymmetric quantization + {{false, false, false, false, false}, true}, + {{true, false, false, false, false}, true}, + + // not supported FakeQuantize + {{false, true, false, false, false}, false}, + {{false, false, true, false, false}, false}, + {{false, false, false, true, false}, false}, + {{false, false, false, false, true}, false} +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + IsAsymmetricOnWeightsTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(suitablePartialShapes), + ::testing::ValuesIn(testValues), + ::testing::ValuesIn(transposeFlags)), + IsAsymmetricOnWeightsTransformation::getTestCaseName); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp index 325b981ec16..f552cf8503d 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp @@ -24,7 +24,12 @@ public: const ngraph::PartialShape& inputShape, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weights, - const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights); + const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, + const bool fqOnWeightsTransposeOnData = false, + const bool fqOnWeightsTransposeOnInputLow = false, + const bool fqOnWeightsTransposeOnInputHigh = false, + const bool fqOnWeightsTransposeOnOutputLow = false, + const bool fqOnWeightsTransposeOnOutputHigh = false); static std::shared_ptr getOriginalWithIncorrectWeights( const ngraph::Shape& inputShape, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp index 1d2dc22bb86..cc50bce4546 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp @@ -27,7 +27,12 @@ std::shared_ptr ConvolutionFunction::getOriginal( const ngraph::PartialShape& inputShape, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weights, - const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) { + const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, + const bool transposeOnData, + const bool transposeOnInputLow, + const bool transposeOnInputHigh, + const bool transposeOnOutputLow, + const bool transposeOnOutputHigh) { const auto input = std::make_shared(inputPrecision, inputShape); auto dequantizationStructure = dequantizationBefore; dequantizationStructure.multiply.outPrecision = netPrecision; @@ -53,15 +58,32 @@ std::shared_ptr ConvolutionFunction::getOriginal( convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values()); const auto convertedWeights = convertedOutput[0].get_node_shared_ptr(); - const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights : - ngraph::builder::makeFakeQuantize( - convertedWeights, netPrecision, - fakeQuantizeOnWeights.quantizationLevel, - fakeQuantizeOnWeights.constantShape, - fakeQuantizeOnWeights.inputLowValues, - fakeQuantizeOnWeights.inputHighValues, - fakeQuantizeOnWeights.outputLowValues, - fakeQuantizeOnWeights.outputHighValues); + const std::shared_ptr constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3}); + const std::shared_ptr onWeights = fqOnWeights.empty() ? + convertedWeights : + std::make_shared( + transposeOnData ? std::make_shared(convertedWeights, constant) : convertedWeights, + transposeOnInputLow ? + std::make_shared( + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()), + constant->clone_with_new_inputs({})) : + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()), + transposeOnInputHigh ? + std::make_shared( + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()), + constant->clone_with_new_inputs({})) : + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()), + transposeOnOutputLow ? + std::make_shared( + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()), + constant->clone_with_new_inputs({})) : + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()), + transposeOnOutputHigh ? + std::make_shared( + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()), + constant->clone_with_new_inputs({})) : + makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()), + fqOnWeights.quantizationLevel); auto convolutionOriginal = ngraph::opset1::Convolution( ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),