[LPT] INT16, INT32 leftovers (#7653)

This commit is contained in:
Vladimir Zinoviev 2022-01-10 21:09:10 +03:00 committed by GitHub
parent 0c2b53eba3
commit b744c11b88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 101 additions and 64 deletions

View File

@ -38,15 +38,34 @@
namespace ngraph {
namespace pass {
namespace low_precision {
namespace precision_set {
const std::vector<element::Type> int8_support = {
ngraph::element::u8, ngraph::element::i8
};
const std::vector<element::Type> int8_int16_int32_support = {
ngraph::element::u8, ngraph::element::i8,
ngraph::element::u16, ngraph::element::i16,
ngraph::element::u32, ngraph::element::i32
};
}
enum levels : size_t {
int4 = 16,
int4_narrow_range = 15,
int8 = 256,
int8_narrow_range = 255,
int16 = 65536,
int16_narrow_range = 65535,
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
int32_narrow_range = 4294967295
};
class LP_TRANSFORMATIONS_API DataPrecision {
public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
explicit DataPrecision(const element::Type& precision) {
this->precision = precision;
min = getMinValue(precision, 256);
max = getMaxValue(precision, 256);
min = getMinValue(precision, levels::int8);
max = getMaxValue(precision, levels::int8);
hasZeroPoint = false;
}
@ -66,7 +85,7 @@ public:
element::i16, element::u16,
element::i32, element::u32
};
return lowPrecision.count(precision) == 1;
return lowPrecision.find(precision) != lowPrecision.end();
}
static float getMinValue(const element::Type precision, const size_t levels) {
@ -80,17 +99,31 @@ public:
return -8.f;
case element::i8:
switch (levels) {
case 16:
case low_precision::levels::int4:
return -8.f;
case 255:
return -127.f;
default:
case low_precision::levels::int4_narrow_range:
return -7.f;
case low_precision::levels::int8:
return -128.f;
case low_precision::levels::int8_narrow_range:
return -127.f;
}
case element::i16:
return levels == 65535 ? -32767.f : -32768.f;
switch (levels) {
case low_precision::levels::int16:
return -32768.f;
case low_precision::levels::int16_narrow_range:
return -32767.f;
}
break;
case element::i32:
return -2147483647.f; // -2147483647.f == -2147483648.f
switch (levels) {
case low_precision::levels::int32:
return -2147483648.f;
case low_precision::levels::int32_narrow_range:
return -2147483647.f;
}
break;
case element::f16:
return -1.0e15f;
case element::f32:
@ -140,14 +173,14 @@ public:
// Return maximum value for quantization level. Quantization level is maximum value for precision.
static float getMaxValue(const size_t maxLevelsForPrecision) {
if (maxLevelsForPrecision == 255ul) {
return 254.f;
} else if (maxLevelsForPrecision == 256ul) {
return 255.f;
} else if (maxLevelsForPrecision == 16ul) {
return 15.f;
} else if (maxLevelsForPrecision == 15ul) {
return 14.f;
std::set<size_t> validLevels = {
levels::int4, levels::int4_narrow_range,
levels::int8, levels::int8_narrow_range,
levels::int16, levels::int16_narrow_range,
levels::int32, levels::int32_narrow_range
};
if (validLevels.find(maxLevelsForPrecision) != validLevels.end()) {
return maxLevelsForPrecision - 1.f;
} else {
THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision;
}

View File

@ -122,12 +122,12 @@ DataPrecision getDataPrecisionByOutputPort(std::shared_ptr<opset1::FakeQuantize>
const auto& precisions = precisionsAttribute.as<PrecisionsAttribute>().value();
std::vector<element::Type> precisionsForLevels{};
switch (levels) {
case 65536:
case 65535:
case low_precision::levels::int16:
case low_precision::levels::int16_narrow_range:
precisionsForLevels = {element::u16, element::i16};
break;
case static_cast<size_t>(4294967296):
case 4294967295:
case low_precision::levels::int32:
case low_precision::levels::int32_narrow_range:
precisionsForLevels = {element::u32, element::i32};
break;
default:

View File

@ -24,7 +24,7 @@ namespace low_precision {
constexpr char LayerTransformation::originalLayerPostfix[];
// order defines default precision
std::vector<ngraph::element::Type> LayerTransformation::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };
std::vector<ngraph::element::Type> LayerTransformation::defaultPrecisions = precision_set::int8_support;
std::mutex LayerTransformation::defaultPrecisionsMutex;
LayerTransformation::LayerTransformation(const Params& params) :
@ -210,6 +210,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
bool hasZeroPoint = false;
bool thereIsAtLeastOneNormalValue = false;
std::vector<size_t> fullRangeLevels = { levels::int4, levels::int8, levels::int16, levels::int32 };
for (size_t i = 0; i < outputLowValues.size(); ++i) {
if ((std::fabs(outputLowValues[i]) < zeroThreshold) && (std::fabs(outputHighValues[i]) < zeroThreshold)) {
// both values are too small to identify preferable precision
@ -226,9 +229,8 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
hasNegative = true;
if (outputHighValues[i] != 0.f) {
const float expectedRatio =
(quantizationLevels == 16 || quantizationLevels == 256 ||
quantizationLevels == 65536 || quantizationLevels == 4294967296) ? asymmetricIntervalSideRatio : -1.f;
auto it = std::find(fullRangeLevels.begin(), fullRangeLevels.end(), quantizationLevels);
const float expectedRatio = it != fullRangeLevels.end() ? asymmetricIntervalSideRatio : -1.f;
const float actualRatio = outputLowValues[i] / outputHighValues[i];
const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
if (actual > quantizationIntervalAsymmetryThreshold) {
@ -272,37 +274,35 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
if (!hasZeroPoint) {
if (signedPrecision && (!unsignedPrecision)) {
switch (quantizationLevels) {
case 256:
case 255:
case 16:
case levels::int4:
case levels::int8:
case levels::int8_narrow_range:
resultPrecision = element::i8;
break;
case 65536:
case 65535:
case levels::int16:
case levels::int16_narrow_range:
resultPrecision = element::i16;
break;
case static_cast<size_t>(4294967296):
case 4294967295:
case levels::int32:
case levels::int32_narrow_range:
resultPrecision = element::i32;
break;
}
}
if ((!signedPrecision) && unsignedPrecision) {
switch (quantizationLevels) {
case 256:
case 255:
case 16:
case levels::int4:
case levels::int8:
case levels::int8_narrow_range:
resultPrecision = element::u8;
break;
case 65536:
case 65535:
case levels::int16:
case levels::int16_narrow_range:
resultPrecision = element::u16;
break;
case static_cast<size_t>(4294967296):
case 4294967295:
case levels::int32:
case levels::int32_narrow_range:
resultPrecision = element::u32;
break;
}
}
}
@ -337,16 +337,16 @@ DataPrecision LayerTransformation::getDataPrecision(
std::vector<element::Type> resultPrecisions = precisions;
std::vector<element::Type> FQPrecisions;
switch (quantizationDetails.levels) {
case 255:
case 256:
case levels::int8:
case levels::int8_narrow_range:
FQPrecisions = {element::u8, element::i8};
break;
case 65535:
case 65536:
case levels::int16:
case levels::int16_narrow_range:
FQPrecisions = {element::u16, element::i16};
break;
case 4294967295:
case static_cast<size_t>(4294967296):
case levels::int32:
case levels::int32_narrow_range:
FQPrecisions = {element::u32, element::i32};
}
resultPrecisions = NetworkHelper::precisionIntersection(precisions, FQPrecisions);

View File

@ -292,12 +292,10 @@ bool ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
const std::set<size_t>& levels) {
std::vector<std::shared_ptr<ngraph::Node>> nodes = function->get_ops();
for (auto& node : nodes) {
for (size_t i = 0; i < node->inputs().size(); ++i) {
const auto fakeQuantize = as_type_ptr<ngraph::opset1::FakeQuantize>(node);
if (fakeQuantize != nullptr) {
if (levels.count(fakeQuantize->get_levels()) == 1) {
return true;
}
const auto fakeQuantize = as_type_ptr<ngraph::opset1::FakeQuantize>(node);
if (fakeQuantize != nullptr) {
if (levels.count(fakeQuantize->get_levels()) == 1) {
return true;
}
}
}

View File

@ -22,6 +22,7 @@
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
#include "ngraph/opsets/opset6.hpp"
namespace ngraph {
namespace pass {
@ -61,7 +62,9 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
ov::is_type<opset1::Convolution>(node) ||
ov::is_type<opset1::GroupConvolution>(node) ||
ov::is_type<opset1::MatMul>(node) ||
ov::is_type<opset1::ConvolutionBackpropData>(node);
ov::is_type<opset1::ConvolutionBackpropData>(node) ||
ov::is_type<opset3::ReadValue>(node) ||
ov::is_type<opset6::ReadValue>(node);
};
if (isNotConstantPathOperation(op)) {
@ -1730,8 +1733,8 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
const auto intNode = ov::is_type<opset1::Convert>(parent) ? parent : node;
const auto type = intNode->get_input_element_type(0);
if (type == element::u8 || type == element::i8) {
min = DataPrecision::getMinValue(type, 256) - 0.5f;
max = DataPrecision::getMaxValue(type, 256) + 0.5f;
min = DataPrecision::getMinValue(type, levels::int8) - 0.5f;
max = DataPrecision::getMaxValue(type, levels::int8) + 0.5f;
} else {
return type == element::f32 || type == element::f16;
}

View File

@ -19,6 +19,7 @@
#include <low_precision/common/ie_lpt_exception.hpp>
#include <low_precision/network_helper.hpp>
#include <low_precision/layer_transformation.hpp>
namespace ngraph {
namespace pass {
@ -162,7 +163,12 @@ bool QuantizationDetails::empty() const noexcept {
}
bool QuantizationDetails::isSupportedLevel(const size_t level) {
static const std::unordered_set<size_t> supported_levels = { 16, 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
static const std::unordered_set<size_t> supported_levels = {
levels::int4, levels::int4_narrow_range,
levels::int8, levels::int8_narrow_range,
levels::int16, levels::int16_narrow_range,
levels::int32, levels::int32_narrow_range
};
return supported_levels.find(level) != supported_levels.end();
}

View File

@ -446,14 +446,11 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
bool updatePrecision = true;
bool hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
nGraphFunc,
{65535, 65536, 4294967295, 4294967296});
{levels::int16, levels::int16_narrow_range,
levels::int32, levels::int32_narrow_range});
if (hasINT16orINT32Levels) {
updatePrecision = false;
LowPrecision::setDefaultPrecisions({
ngraph::element::u8, ngraph::element::i8,
ngraph::element::u16, ngraph::element::i16,
ngraph::element::u32, ngraph::element::i32,
});
LowPrecision::setDefaultPrecisions(precision_set::int8_int16_int32_support);
supportedPrecisions = std::vector<OperationPrecisionRestriction>({});
}