[LPT] getDataPrecision extending (#10071)
* [LPT] getDataPrecision extending * [LPT] getDataPrecision unit tests addition
This commit is contained in:
parent
e34ff009e0
commit
c6c9a06d41
@ -5,6 +5,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -76,6 +77,9 @@ public:
|
|||||||
hasZeroPoint(hasZeroPoint) {}
|
hasZeroPoint(hasZeroPoint) {}
|
||||||
|
|
||||||
bool empty() const noexcept {
|
bool empty() const noexcept {
|
||||||
|
assert(
|
||||||
|
((precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint)) ||
|
||||||
|
((precision != element::undefined) && (max != 0.f)));
|
||||||
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
|
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -310,7 +314,7 @@ public:
|
|||||||
static DataPrecision getDataPrecision(
|
static DataPrecision getDataPrecision(
|
||||||
const std::shared_ptr<Node>& layer,
|
const std::shared_ptr<Node>& layer,
|
||||||
const QuantizationDetails& quantizationDetails,
|
const QuantizationDetails& quantizationDetails,
|
||||||
const std::vector<element::Type>& precisions);
|
const std::vector<element::Type>& requiredPrecisions);
|
||||||
|
|
||||||
static void setDefaultPrecisions(const std::vector<ngraph::element::Type>& precisions);
|
static void setDefaultPrecisions(const std::vector<ngraph::element::Type>& precisions);
|
||||||
static std::vector<ngraph::element::Type> getDefaultPrecisions();
|
static std::vector<ngraph::element::Type> getDefaultPrecisions();
|
||||||
|
@ -85,6 +85,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
|
|
||||||
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
||||||
|
|
||||||
|
const bool fqOnWeightsWasDecomposed = decomposeFakeQuantizeForWeightsPath(convolution);
|
||||||
|
if (updatePrecisions && !fqOnWeightsWasDecomposed) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
|
||||||
|
|
||||||
std::shared_ptr<Node> newMultiplyAfter;
|
std::shared_ptr<Node> newMultiplyAfter;
|
||||||
@ -199,9 +205,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution);
|
if (!updatePrecisions && !fqOnWeightsWasDecomposed) {
|
||||||
assert((updatePrecisions && decomposed) || (!updatePrecisions));
|
|
||||||
if (!updatePrecisions && !decomposed) {
|
|
||||||
// TODO: LPT: issue #58685
|
// TODO: LPT: issue #58685
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -264,6 +264,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
|||||||
}
|
}
|
||||||
|
|
||||||
element::Type resultPrecision = element::undefined;
|
element::Type resultPrecision = element::undefined;
|
||||||
|
// if zero point exists then result precision has to be defined by client code
|
||||||
if (!hasZeroPoint) {
|
if (!hasZeroPoint) {
|
||||||
if (signedPrecision && (!unsignedPrecision)) {
|
if (signedPrecision && (!unsignedPrecision)) {
|
||||||
switch (quantizationLevels) {
|
switch (quantizationLevels) {
|
||||||
@ -323,49 +324,47 @@ bool LayerTransformation::isQuantized(const std::shared_ptr<const Node>& layer)
|
|||||||
DataPrecision LayerTransformation::getDataPrecision(
|
DataPrecision LayerTransformation::getDataPrecision(
|
||||||
const std::shared_ptr<Node>& layer,
|
const std::shared_ptr<Node>& layer,
|
||||||
const QuantizationDetails& quantizationDetails,
|
const QuantizationDetails& quantizationDetails,
|
||||||
const std::vector<element::Type>& precisions) {
|
const std::vector<element::Type>& requiredPrecisions) {
|
||||||
#ifdef LPT_PRINT_DEQUANTIZATION_INFO
|
#ifdef LPT_PRINT_DEQUANTIZATION_INFO
|
||||||
printDequantizationInfo(layer);
|
printDequantizationInfo(layer);
|
||||||
#endif
|
#endif
|
||||||
std::vector<element::Type> resultPrecisions = precisions;
|
|
||||||
std::vector<element::Type> FQPrecisions;
|
|
||||||
switch (quantizationDetails.levels) {
|
|
||||||
case levels::int8:
|
|
||||||
case levels::int8_narrow_range:
|
|
||||||
FQPrecisions = {element::u8, element::i8};
|
|
||||||
break;
|
|
||||||
case levels::int16:
|
|
||||||
case levels::int16_narrow_range:
|
|
||||||
FQPrecisions = {element::u16, element::i16};
|
|
||||||
break;
|
|
||||||
case levels::int32:
|
|
||||||
case levels::int32_narrow_range:
|
|
||||||
FQPrecisions = {element::u32, element::i32};
|
|
||||||
}
|
|
||||||
resultPrecisions = NetworkHelper::precisionIntersection(precisions, FQPrecisions);
|
|
||||||
PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails);
|
PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails);
|
||||||
|
|
||||||
if (precisionDetailsAtOutputIntervals.precision != element::undefined) {
|
if (precisionDetailsAtOutputIntervals.precision != element::undefined) {
|
||||||
// if supportedPrecisions is empty then use the first available, not supported layer will be in original precision
|
// FakeQuantize optimal precision not deined
|
||||||
if (!precisions.empty()) {
|
if (!requiredPrecisions.empty()) {
|
||||||
const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision);
|
const auto foundIt = std::find(requiredPrecisions.begin(), requiredPrecisions.end(), precisionDetailsAtOutputIntervals.precision);
|
||||||
const element::Type resultPrecision = foundIt != precisions.end() ?
|
const element::Type resultPrecision = foundIt != requiredPrecisions.end() ?
|
||||||
precisionDetailsAtOutputIntervals.precision :
|
precisionDetailsAtOutputIntervals.precision :
|
||||||
*precisions.begin();
|
*requiredPrecisions.begin();
|
||||||
|
|
||||||
const DataPrecision dataPrecision(
|
return DataPrecision(
|
||||||
resultPrecision,
|
resultPrecision,
|
||||||
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
|
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
|
||||||
DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels),
|
DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels),
|
||||||
foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true);
|
foundIt != requiredPrecisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true);
|
||||||
|
}
|
||||||
return dataPrecision;
|
} else {
|
||||||
|
// FakeQuantize optimal precision is not deined
|
||||||
|
if (!requiredPrecisions.empty()) {
|
||||||
|
const element::Type resultPrecision = *requiredPrecisions.begin();
|
||||||
|
return DataPrecision(
|
||||||
|
resultPrecision,
|
||||||
|
DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels),
|
||||||
|
DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels),
|
||||||
|
true);
|
||||||
|
} else {
|
||||||
|
// required precisions are not defined, not possible to get precision from FakeQuantize: something wrong
|
||||||
|
// return not valid value
|
||||||
|
return DataPrecision();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if required precisions is empty then use FakeQuantize optimal precision
|
||||||
return DataPrecision(
|
return DataPrecision(
|
||||||
precisionDetailsAtOutputIntervals.precision,
|
precisionDetailsAtOutputIntervals.precision,
|
||||||
0.f,
|
DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels),
|
||||||
0.f,
|
DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels),
|
||||||
precisionDetailsAtOutputIntervals.hasZeroPoint);
|
precisionDetailsAtOutputIntervals.hasZeroPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +59,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
getDefaultPrecisions() :
|
getDefaultPrecisions() :
|
||||||
precisionsAttribute.as<PrecisionsAttribute>().value();
|
precisionsAttribute.as<PrecisionsAttribute>().value();
|
||||||
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
|
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
|
||||||
|
if (dataPrecision.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
auto tuple = NetworkHelper::decomposeFakeQuantize(
|
auto tuple = NetworkHelper::decomposeFakeQuantize(
|
||||||
fakeQuantize,
|
fakeQuantize,
|
||||||
@ -261,7 +264,7 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context
|
|||||||
precisionsAttribute.as<PrecisionsAttribute>().value();
|
precisionsAttribute.as<PrecisionsAttribute>().value();
|
||||||
|
|
||||||
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
|
const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions);
|
||||||
if (dataPrecision.hasZeroPoint) {
|
if (dataPrecision.hasZeroPoint || dataPrecision.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,6 +304,9 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
|
|||||||
precisionsAttribute.as<PrecisionsAttribute>().value();
|
precisionsAttribute.as<PrecisionsAttribute>().value();
|
||||||
|
|
||||||
const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions);
|
const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions);
|
||||||
|
if (dataPrecision.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
auto tuple = NetworkHelper::decomposeFakeQuantize(
|
auto tuple = NetworkHelper::decomposeFakeQuantize(
|
||||||
fq,
|
fq,
|
||||||
|
@ -0,0 +1,141 @@
|
|||||||
|
// Copyright (C) 2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <ie_blob.h>
|
||||||
|
#include <low_precision/layer_transformation.hpp>
|
||||||
|
#include <low_precision/network_helper.hpp>
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_U8_to_U8) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
|
||||||
|
ASSERT_EQ(element::u8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(255.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_I8_to_I8) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails =
|
||||||
|
ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
|
||||||
|
ASSERT_EQ(element::i8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(-128.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(127.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8_to_U8zp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-1.28f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.27f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
|
||||||
|
ASSERT_EQ(element::u8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(255.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8_to_I8zp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.55f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
|
||||||
|
ASSERT_EQ(element::i8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(-128.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(127.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqU8_I8zp_to_U8zp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::u8});
|
||||||
|
ASSERT_EQ(element::u8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(255.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqI8_U8zp_to_I8zp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {element::i8});
|
||||||
|
ASSERT_EQ(element::i8, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(-128.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(127.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(true, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(false, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_I8zp_to_undefzp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{-0.875227511f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {});
|
||||||
|
ASSERT_EQ(element::undefined, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(true, precisionDetails.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT_GetDataPrecision, getDataPrecision_reqNone_U8zp_to_undefzp) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3, 299, 299});
|
||||||
|
const auto low = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.875227511f});
|
||||||
|
const auto high = std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.882119000f});
|
||||||
|
const auto fakeQuantize = std::make_shared<opset1::FakeQuantize>(input, low, high, low, high, 256);
|
||||||
|
|
||||||
|
auto const dequantization = pass::low_precision::QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
|
auto const precisionDetails = ngraph::pass::low_precision::LayerTransformation::getDataPrecision(fakeQuantize, dequantization, {});
|
||||||
|
ASSERT_EQ(element::undefined, precisionDetails.precision);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.min);
|
||||||
|
ASSERT_EQ(0.f, precisionDetails.max);
|
||||||
|
ASSERT_EQ(false, precisionDetails.hasZeroPoint);
|
||||||
|
ASSERT_EQ(true, precisionDetails.empty());
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user