[LPT] getDataPrecision extending (#10071)

* [LPT] getDataPrecision extending

* [LPT] getDataPrecision unit tests addition
This commit is contained in:
Edward Shogulin 2022-02-07 19:49:01 +03:00 committed by GitHub
parent e34ff009e0
commit c6c9a06d41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 187 additions and 33 deletions

View File

@ -5,6 +5,7 @@
#pragma once
#include <algorithm>
#include <cassert>
#include <limits>
#include <list>
#include <memory>
@ -76,6 +77,9 @@ public:
hasZeroPoint(hasZeroPoint) {}
bool empty() const noexcept {
assert(
((precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint)) ||
((precision != element::undefined) && (max != 0.f)));
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
}
@ -310,7 +314,7 @@ public:
static DataPrecision getDataPrecision(
const std::shared_ptr<Node>& layer,
const QuantizationDetails& quantizationDetails,
const std::vector<element::Type>& precisions);
const std::vector<element::Type>& requiredPrecisions);
static void setDefaultPrecisions(const std::vector<ngraph::element::Type>& precisions);
static std::vector<ngraph::element::Type> getDefaultPrecisions();

View File

@ -85,6 +85,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
}
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
const bool fqOnWeightsWasDecomposed = decomposeFakeQuantizeForWeightsPath(convolution);
if (updatePrecisions && !fqOnWeightsWasDecomposed) {
return false;
}
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
std::shared_ptr<Node> newMultiplyAfter;
@ -199,9 +205,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
}
{
const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution);
assert((updatePrecisions && decomposed) || (!updatePrecisions));
if (!updatePrecisions && !decomposed) {
if (!updatePrecisions && !fqOnWeightsWasDecomposed) {
// TODO: LPT: issue #58685
return false;
}

View File

@ -264,6 +264,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
}
element::Type resultPrecision = element::undefined;
// if zero point exists then result precision has to be defined by client code
if (!hasZeroPoint) {
if (signedPrecision && (!unsignedPrecision)) {
switch (quantizationLevels) {
@ -323,49 +324,47 @@ bool LayerTransformation::isQuantized(const std::shared_ptr<const Node>& layer)
DataPrecision LayerTransformation::getDataPrecision(
const std::shared_ptr<Node>& layer,
const QuantizationDetails& quantizationDetails,
const std::vector<element::Type>& precisions) {
const std::vector<element::Type>& requiredPrecisions) {
#ifdef LPT_PRINT_DEQUANTIZATION_INFO
printDequantizationInfo(layer);
#endif
std::vector<element::Type> resultPrecisions = precisions;
std::vector<element::Type> FQPrecisions;
switch (quantizationDetails.levels) {
case levels::int8:
case levels::int8_narrow_range:
FQPrecisions = {element::u8, element::i8};
break;
case levels::int16:
case levels::int16_narrow_range:
FQPrecisions = {element::u16, element::i16};
break;
case levels::int32:
case levels::int32_narrow_range:
FQPrecisions = {element::u32, element::i32};
}
resultPrecisions = NetworkHelper::precisionIntersection(precisions, FQPrecisions);
PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails);
if (precisionDetailsAtOutputIntervals.precision != element::undefined) {
// if supportedPrecisions is empty then use the first available, not supported layer will be in original precision
if (!precisions.empty()) {
const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision);
const element::Type resultPrecision = foundIt != precisions.end() ?
// FakeQuantize optimal precision not deined
if (!requiredPrecisions.empty()) {
const auto foundIt = std::find(requiredPrecisions.begin(), requiredPrecisions.end(), precisionDetailsAtOutputIntervals.precision);
const element::Type resultPrecision = foundIt != requiredPrecisions.end() ?
precisionDetailsAtOutputIntervals.precision :
*precisions.begin();
*requiredPrecisions.begin();
const DataPrecision dataPrecision(
return DataPrecision(
resultPrecision,
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels),
foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true);
return dataPrecision;
foundIt != requiredPrecisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true);
}
} else {
// FakeQuantize optimal precision is not deined
if (!requiredPrecisions.empty()) {
const element::Type resultPrecision = *requiredPrecisions.begin();
return DataPrecision(
resultPrecision,
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels),
true);
} else {
// required precisions are not defined, not possible to get precision from FakeQuantize: something wrong
// return not valid value
return DataPrecision();
}
}
// if required precisions is empty then use FakeQuantize optimal precision
return DataPrecision(
precisionDetailsAtOutputIntervals.precision,
0.f,
0.f,
DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels),
DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels),
precisionDetailsAtOutputIntervals.hasZeroPoint);
}

View File

@ -59,6 +59,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
getDefaultPrecisions() :
precisionsAttribute.as<PrecisionsAttribute>().value();
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
if (dataPrecision.empty()) {
return false;
}
auto tuple = NetworkHelper::decomposeFakeQuantize(
fakeQuantize,
@ -261,7 +264,7 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context
precisionsAttribute.as<PrecisionsAttribute>().value();
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
if (dataPrecision.hasZeroPoint) {
if (dataPrecision.hasZeroPoint || dataPrecision.empty()) {
return false;
}

View File

@ -304,6 +304,9 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
precisionsAttribute.as<PrecisionsAttribute>().value();
const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions);
if (dataPrecision.empty()) {
return false;
}
auto tuple = NetworkHelper::decomposeFakeQuantize(
fq,

View File

@ -0,0 +1,141 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <gtest/gtest.h>
#include <ie_blob.h>
#include <low_precision/layer_transformation.hpp>
#include <low_precision/network_helper.hpp>
#include "ngraph_functions/builders.hpp"
using namespace ngraph;
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
ASSERT_EQ(element::u8, precisionDetails.precision);
ASSERT_EQ(0.f, precisionDetails.min);
ASSERT_EQ(255.f, precisionDetails.max);
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails =
ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
ASSERT_EQ(element::i8, precisionDetails.precision);
ASSERT_EQ(-128.f, precisionDetails.min);
ASSERT_EQ(127.f, precisionDetails.max);
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
ASSERT_EQ(element::u8, precisionDetails.precision);
ASSERT_EQ(0.f, precisionDetails.min);
ASSERT_EQ(255.f, precisionDetails.max);
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
ASSERT_EQ(element::i8, precisionDetails.precision);
ASSERT_EQ(-128.f, precisionDetails.min);
ASSERT_EQ(127.f, precisionDetails.max);
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
ASSERT_EQ(element::u8, precisionDetails.precision);
ASSERT_EQ(0.f, precisionDetails.min);
ASSERT_EQ(255.f, precisionDetails.max);
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
ASSERT_EQ(element::i8, precisionDetails.precision);
ASSERT_EQ(-128.f, precisionDetails.min);
ASSERT_EQ(127.f, precisionDetails.max);
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
ASSERT_EQ(false, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {});
ASSERT_EQ(element::undefined, precisionDetails.precision);
ASSERT_EQ(0.f, precisionDetails.min);
ASSERT_EQ(0.f, precisionDetails.max);
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
ASSERT_EQ(true, precisionDetails.empty());
}
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) {
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {});
ASSERT_EQ(element::undefined, precisionDetails.precision);
ASSERT_EQ(0.f, precisionDetails.min);
ASSERT_EQ(0.f, precisionDetails.max);
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
ASSERT_EQ(true, precisionDetails.empty());
}