diff --git a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp index 0fb0725209f..3a9e27e3358 100644 --- a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -76,6 +77,9 @@ public: hasZeroPoint(hasZeroPoint) {} bool empty() const noexcept { + assert( + ((precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint)) || + ((precision != element::undefined) && (max != 0.f))); return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint); } @@ -310,7 +314,7 @@ public: static DataPrecision getDataPrecision( const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const std::vector& precisions); + const std::vector& requiredPrecisions); static void setDefaultPrecisions(const std::vector& precisions); static std::vector getDefaultPrecisions(); diff --git a/src/common/low_precision_transformations/src/convolution.cpp b/src/common/low_precision_transformations/src/convolution.cpp index 3c3966eab20..7c8cf5ef66a 100644 --- a/src/common/low_precision_transformations/src/convolution.cpp +++ b/src/common/low_precision_transformations/src/convolution.cpp @@ -85,6 +85,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph } convolution = NetworkHelper::separateInStandaloneBranch(convolution); + + const bool fqOnWeightsWasDecomposed = decomposeFakeQuantizeForWeightsPath(convolution); + if (updatePrecisions && !fqOnWeightsWasDecomposed) { + return false; + } + FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution); std::shared_ptr newMultiplyAfter; @@ -199,9 +205,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph } { - const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution); - assert((updatePrecisions && decomposed) || (!updatePrecisions)); - if (!updatePrecisions && !decomposed) { + if (!updatePrecisions && !fqOnWeightsWasDecomposed) { // TODO: LPT: issue #58685 return false; } diff --git a/src/common/low_precision_transformations/src/layer_transformation.cpp b/src/common/low_precision_transformations/src/layer_transformation.cpp index 6deb6c25eca..d1d8f6f0c0e 100644 --- a/src/common/low_precision_transformations/src/layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/layer_transformation.cpp @@ -264,6 +264,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( } element::Type resultPrecision = element::undefined; + // if zero point exists then result precision has to be defined by client code if (!hasZeroPoint) { if (signedPrecision && (!unsignedPrecision)) { switch (quantizationLevels) { @@ -323,49 +324,47 @@ bool LayerTransformation::isQuantized(const std::shared_ptr& layer) DataPrecision LayerTransformation::getDataPrecision( const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const std::vector& precisions) { + const std::vector& requiredPrecisions) { #ifdef LPT_PRINT_DEQUANTIZATION_INFO printDequantizationInfo(layer); #endif - std::vector resultPrecisions = precisions; - std::vector FQPrecisions; - switch (quantizationDetails.levels) { - case levels::int8: - case levels::int8_narrow_range: - FQPrecisions = {element::u8, element::i8}; - break; - case levels::int16: - case levels::int16_narrow_range: - FQPrecisions = {element::u16, element::i16}; - break; - case levels::int32: - case levels::int32_narrow_range: - FQPrecisions = {element::u32, element::i32}; - } - resultPrecisions = NetworkHelper::precisionIntersection(precisions, FQPrecisions); PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails); if (precisionDetailsAtOutputIntervals.precision != element::undefined) { - // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision - if (!precisions.empty()) { - const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); - const element::Type resultPrecision = foundIt != precisions.end() ? + // FakeQuantize optimal precision not deined + if (!requiredPrecisions.empty()) { + const auto foundIt = std::find(requiredPrecisions.begin(), requiredPrecisions.end(), precisionDetailsAtOutputIntervals.precision); + const element::Type resultPrecision = foundIt != requiredPrecisions.end() ? precisionDetailsAtOutputIntervals.precision : - *precisions.begin(); + *requiredPrecisions.begin(); - const DataPrecision dataPrecision( + return DataPrecision( resultPrecision, DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), - foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); - - return dataPrecision; + foundIt != requiredPrecisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); + } + } else { + // FakeQuantize optimal precision is not deined + if (!requiredPrecisions.empty()) { + const element::Type resultPrecision = *requiredPrecisions.begin(); + return DataPrecision( + resultPrecision, + DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), + DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), + true); + } else { + // required precisions are not defined, not possible to get precision from FakeQuantize: something wrong + // return not valid value + return DataPrecision(); } } + + // if required precisions is empty then use FakeQuantize optimal precision return DataPrecision( precisionDetailsAtOutputIntervals.precision, - 0.f, - 0.f, + DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), + DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), precisionDetailsAtOutputIntervals.hasZeroPoint); } diff --git a/src/common/low_precision_transformations/src/mat_mul.cpp b/src/common/low_precision_transformations/src/mat_mul.cpp index 0dde0c45440..0f396bf4444 100644 --- a/src/common/low_precision_transformations/src/mat_mul.cpp +++ b/src/common/low_precision_transformations/src/mat_mul.cpp @@ -59,6 +59,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat getDefaultPrecisions() : precisionsAttribute.as().value(); const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); + if (dataPrecision.empty()) { + return false; + } auto tuple = NetworkHelper::decomposeFakeQuantize( fakeQuantize, @@ -261,7 +264,7 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context precisionsAttribute.as().value(); const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); - if (dataPrecision.hasZeroPoint) { + if (dataPrecision.hasZeroPoint || dataPrecision.empty()) { return false; } diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp index c57ffd5d9b4..544dc3fc869 100644 --- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -304,6 +304,9 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st precisionsAttribute.as().value(); const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); + if (dataPrecision.empty()) { + return false; + } auto tuple = NetworkHelper::decomposeFakeQuantize( fq, diff --git a/src/tests/unit/inference_engine/transformations/low_precision/get_data_precision.cpp b/src/tests/unit/inference_engine/transformations/low_precision/get_data_precision.cpp new file mode 100644 index 00000000000..d6f6c0a2e28 --- /dev/null +++ b/src/tests/unit/inference_engine/transformations/low_precision/get_data_precision.cpp @@ -0,0 +1,141 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include "ngraph_functions/builders.hpp" + +using namespace ngraph; + +TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{0.f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{2.55f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8}); + ASSERT_EQ(element::u8, precisionDetails.precision); + ASSERT_EQ(0.f, precisionDetails.min); + ASSERT_EQ(255.f, precisionDetails.max); + ASSERT_EQ(false, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{-1.28f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{1.27f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = + ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8}); + ASSERT_EQ(element::i8, precisionDetails.precision); + ASSERT_EQ(-128.f, precisionDetails.min); + ASSERT_EQ(127.f, precisionDetails.max); + ASSERT_EQ(false, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{-1.28f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{1.27f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8}); + ASSERT_EQ(element::u8, precisionDetails.precision); + ASSERT_EQ(0.f, precisionDetails.min); + ASSERT_EQ(255.f, precisionDetails.max); + ASSERT_EQ(true, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{0.f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{2.55f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8}); + ASSERT_EQ(element::i8, precisionDetails.precision); + ASSERT_EQ(-128.f, precisionDetails.min); + ASSERT_EQ(127.f, precisionDetails.max); + ASSERT_EQ(true, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{-0.875227511f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{0.882119000f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8}); + ASSERT_EQ(element::u8, precisionDetails.precision); + ASSERT_EQ(0.f, precisionDetails.min); + ASSERT_EQ(255.f, precisionDetails.max); + ASSERT_EQ(true, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{0.875227511f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{0.882119000f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8}); + ASSERT_EQ(element::i8, precisionDetails.precision); + ASSERT_EQ(-128.f, precisionDetails.min); + ASSERT_EQ(127.f, precisionDetails.max); + ASSERT_EQ(true, precisionDetails.hasZeroPoint); + ASSERT_EQ(false, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{-0.875227511f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{0.882119000f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {}); + ASSERT_EQ(element::undefined, precisionDetails.precision); + ASSERT_EQ(0.f, precisionDetails.min); + ASSERT_EQ(0.f, precisionDetails.max); + ASSERT_EQ(false, precisionDetails.hasZeroPoint); + ASSERT_EQ(true, precisionDetails.empty()); +} + +TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) { + const auto input = std::make_shared(element::f32, Shape{1, 3, 299, 299}); + const auto low = std::make_shared(element::f32, Shape{}, std::vector{0.875227511f}); + const auto high = std::make_shared(element::f32, Shape{}, std::vector{0.882119000f}); + const auto fakeQuantize = std::make_shared(input, low, high, low, high, 256); + + auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize); + + auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {}); + ASSERT_EQ(element::undefined, precisionDetails.precision); + ASSERT_EQ(0.f, precisionDetails.min); + ASSERT_EQ(0.f, precisionDetails.max); + ASSERT_EQ(false, precisionDetails.hasZeroPoint); + ASSERT_EQ(true, precisionDetails.empty()); +}