diff --git a/src/common/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/src/common/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp index 09e0f552b54..fd9bf5b2eb7 100644 --- a/src/common/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp +++ b/src/common/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp @@ -36,7 +36,9 @@ public: const std::vector& defaultPrecisions = precision_set::int8_support); protected: - bool decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; + std::tuple, std::shared_ptr> decomposeFakeQuantizeForWeightsPath( + const std::shared_ptr& weightableLayer, + size_t outChannelsShapeIndex = 0ul) const; static bool isGroup(const std::shared_ptr& node); static bool isDepthwise(const std::shared_ptr& node); virtual size_t getInputChannels(const std::shared_ptr conv) const = 0; diff --git a/src/common/low_precision_transformations/src/convolution.cpp b/src/common/low_precision_transformations/src/convolution.cpp index 5fd0abf6075..c46b84cd196 100644 --- a/src/common/low_precision_transformations/src/convolution.cpp +++ b/src/common/low_precision_transformations/src/convolution.cpp @@ -88,7 +88,14 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph convolution = NetworkHelper::separateInStandaloneBranch(convolution, defaultPrecisions); - const bool fqOnWeightsWasDecomposed = decomposeFakeQuantizeForWeightsPath(convolution); + const auto& res_tuple = decomposeFakeQuantizeForWeightsPath(convolution); + + auto fqOnWeightsWasDecomposed = std::get<0>(res_tuple); + auto newFQ = std::get<1>(res_tuple); + auto dequantize = std::get<2>(res_tuple); + if (newFQ != nullptr && dequantize != nullptr) + updateOutput(context, dequantize, newFQ); + if (updatePrecisions && !fqOnWeightsWasDecomposed) { return false; } diff --git a/src/common/low_precision_transformations/src/convolution_backprop_data.cpp b/src/common/low_precision_transformations/src/convolution_backprop_data.cpp index 62d8f3c3179..122b831a9a7 100644 --- a/src/common/low_precision_transformations/src/convolution_backprop_data.cpp +++ b/src/common/low_precision_transformations/src/convolution_backprop_data.cpp @@ -141,7 +141,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con } { - decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul); + const auto& res_tuple = decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul); + auto newFQ = std::get<1>(res_tuple); + auto dequantize = std::get<2>(res_tuple); + if (newFQ != nullptr && dequantize != nullptr) + updateOutput(context, dequantize, newFQ); + dequantization = NetworkHelper::getDequantization(convolutionBackpropData, defaultPrecisions, 1ul); if (const auto fq = ov::as_type_ptr(dequantization.data.get_node_shared_ptr())) { diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp index aac6ae58e90..b9a500e5d50 100644 --- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -324,11 +324,13 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr l return false; } -bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { +std::tuple, std::shared_ptr> WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath( + const std::shared_ptr& node, + const size_t outChannelsShapeIndex) const { const auto fq = getFakeQuantizeOnWeights(node); if (fq == nullptr) { // FakeQuantize has been decomposed already - return true; + return std::make_tuple(true, nullptr, nullptr); } const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); @@ -339,7 +341,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); if (dataPrecision.empty()) { - return false; + return std::make_tuple(false, nullptr, nullptr); } auto tuple = NetworkHelper::decomposeFakeQuantize( @@ -352,17 +354,19 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st element::f32, outChannelsShapeIndex); - std::shared_ptr fqOnWeights = std::get<0>(tuple); + std::shared_ptr fqOnWeights = std::get<0>(tuple); + std::shared_ptr dequantize = std::get<1>(tuple); + // TODO: LPT: issue #58685 if ((!updatePrecisions) && (fqOnWeights == nullptr)) { - return false; + return std::make_tuple(false, nullptr, nullptr); } if (ov::as_type_ptr(fqOnWeights) == nullptr) { THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant"; } - return true; + return std::make_tuple(true, fqOnWeights, dequantize); } bool WeightableLayerTransformation::isGroup(const std::shared_ptr& layer) {