[LPT] INT16, INT32 leftovers (#7653)
This commit is contained in:
parent
0c2b53eba3
commit
b744c11b88
@ -38,15 +38,34 @@
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
namespace precision_set {
|
||||
const std::vector<element::Type> int8_support = {
|
||||
ngraph::element::u8, ngraph::element::i8
|
||||
};
|
||||
const std::vector<element::Type> int8_int16_int32_support = {
|
||||
ngraph::element::u8, ngraph::element::i8,
|
||||
ngraph::element::u16, ngraph::element::i16,
|
||||
ngraph::element::u32, ngraph::element::i32
|
||||
};
|
||||
}
|
||||
enum levels : size_t {
|
||||
int4 = 16,
|
||||
int4_narrow_range = 15,
|
||||
int8 = 256,
|
||||
int8_narrow_range = 255,
|
||||
int16 = 65536,
|
||||
int16_narrow_range = 65535,
|
||||
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
|
||||
int32_narrow_range = 4294967295
|
||||
};
|
||||
class LP_TRANSFORMATIONS_API DataPrecision {
|
||||
public:
|
||||
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
|
||||
|
||||
explicit DataPrecision(const element::Type& precision) {
|
||||
this->precision = precision;
|
||||
min = getMinValue(precision, 256);
|
||||
max = getMaxValue(precision, 256);
|
||||
min = getMinValue(precision, levels::int8);
|
||||
max = getMaxValue(precision, levels::int8);
|
||||
hasZeroPoint = false;
|
||||
}
|
||||
|
||||
@ -66,7 +85,7 @@ public:
|
||||
element::i16, element::u16,
|
||||
element::i32, element::u32
|
||||
};
|
||||
return lowPrecision.count(precision) == 1;
|
||||
return lowPrecision.find(precision) != lowPrecision.end();
|
||||
}
|
||||
|
||||
static float getMinValue(const element::Type precision, const size_t levels) {
|
||||
@ -80,17 +99,31 @@ public:
|
||||
return -8.f;
|
||||
case element::i8:
|
||||
switch (levels) {
|
||||
case 16:
|
||||
case low_precision::levels::int4:
|
||||
return -8.f;
|
||||
case 255:
|
||||
return -127.f;
|
||||
default:
|
||||
case low_precision::levels::int4_narrow_range:
|
||||
return -7.f;
|
||||
case low_precision::levels::int8:
|
||||
return -128.f;
|
||||
case low_precision::levels::int8_narrow_range:
|
||||
return -127.f;
|
||||
}
|
||||
case element::i16:
|
||||
return levels == 65535 ? -32767.f : -32768.f;
|
||||
switch (levels) {
|
||||
case low_precision::levels::int16:
|
||||
return -32768.f;
|
||||
case low_precision::levels::int16_narrow_range:
|
||||
return -32767.f;
|
||||
}
|
||||
break;
|
||||
case element::i32:
|
||||
return -2147483647.f; // -2147483647.f == -2147483648.f
|
||||
switch (levels) {
|
||||
case low_precision::levels::int32:
|
||||
return -2147483648.f;
|
||||
case low_precision::levels::int32_narrow_range:
|
||||
return -2147483647.f;
|
||||
}
|
||||
break;
|
||||
case element::f16:
|
||||
return -1.0e15f;
|
||||
case element::f32:
|
||||
@ -140,14 +173,14 @@ public:
|
||||
|
||||
// Return maximum value for quantization level. Quantization level is maximum value for precision.
|
||||
static float getMaxValue(const size_t maxLevelsForPrecision) {
|
||||
if (maxLevelsForPrecision == 255ul) {
|
||||
return 254.f;
|
||||
} else if (maxLevelsForPrecision == 256ul) {
|
||||
return 255.f;
|
||||
} else if (maxLevelsForPrecision == 16ul) {
|
||||
return 15.f;
|
||||
} else if (maxLevelsForPrecision == 15ul) {
|
||||
return 14.f;
|
||||
std::set<size_t> validLevels = {
|
||||
levels::int4, levels::int4_narrow_range,
|
||||
levels::int8, levels::int8_narrow_range,
|
||||
levels::int16, levels::int16_narrow_range,
|
||||
levels::int32, levels::int32_narrow_range
|
||||
};
|
||||
if (validLevels.find(maxLevelsForPrecision) != validLevels.end()) {
|
||||
return maxLevelsForPrecision - 1.f;
|
||||
} else {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision;
|
||||
}
|
||||
|
@ -122,12 +122,12 @@ DataPrecision getDataPrecisionByOutputPort(std::shared_ptr<opset1::FakeQuantize>
|
||||
const auto& precisions = precisionsAttribute.as<PrecisionsAttribute>().value();
|
||||
std::vector<element::Type> precisionsForLevels{};
|
||||
switch (levels) {
|
||||
case 65536:
|
||||
case 65535:
|
||||
case low_precision::levels::int16:
|
||||
case low_precision::levels::int16_narrow_range:
|
||||
precisionsForLevels = {element::u16, element::i16};
|
||||
break;
|
||||
case static_cast<size_t>(4294967296):
|
||||
case 4294967295:
|
||||
case low_precision::levels::int32:
|
||||
case low_precision::levels::int32_narrow_range:
|
||||
precisionsForLevels = {element::u32, element::i32};
|
||||
break;
|
||||
default:
|
||||
|
@ -24,7 +24,7 @@ namespace low_precision {
|
||||
constexpr char LayerTransformation::originalLayerPostfix[];
|
||||
|
||||
// order defines default precision
|
||||
std::vector<ngraph::element::Type> LayerTransformation::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };
|
||||
std::vector<ngraph::element::Type> LayerTransformation::defaultPrecisions = precision_set::int8_support;
|
||||
std::mutex LayerTransformation::defaultPrecisionsMutex;
|
||||
|
||||
LayerTransformation::LayerTransformation(const Params& params) :
|
||||
@ -210,6 +210,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
|
||||
bool hasZeroPoint = false;
|
||||
bool thereIsAtLeastOneNormalValue = false;
|
||||
|
||||
std::vector<size_t> fullRangeLevels = { levels::int4, levels::int8, levels::int16, levels::int32 };
|
||||
|
||||
for (size_t i = 0; i < outputLowValues.size(); ++i) {
|
||||
if ((std::fabs(outputLowValues[i]) < zeroThreshold) && (std::fabs(outputHighValues[i]) < zeroThreshold)) {
|
||||
// both values are too small to identify preferable precision
|
||||
@ -226,9 +229,8 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
hasNegative = true;
|
||||
|
||||
if (outputHighValues[i] != 0.f) {
|
||||
const float expectedRatio =
|
||||
(quantizationLevels == 16 || quantizationLevels == 256 ||
|
||||
quantizationLevels == 65536 || quantizationLevels == 4294967296) ? asymmetricIntervalSideRatio : -1.f;
|
||||
auto it = std::find(fullRangeLevels.begin(), fullRangeLevels.end(), quantizationLevels);
|
||||
const float expectedRatio = it != fullRangeLevels.end() ? asymmetricIntervalSideRatio : -1.f;
|
||||
const float actualRatio = outputLowValues[i] / outputHighValues[i];
|
||||
const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
|
||||
if (actual > quantizationIntervalAsymmetryThreshold) {
|
||||
@ -272,37 +274,35 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(
|
||||
if (!hasZeroPoint) {
|
||||
if (signedPrecision && (!unsignedPrecision)) {
|
||||
switch (quantizationLevels) {
|
||||
case 256:
|
||||
case 255:
|
||||
case 16:
|
||||
case levels::int4:
|
||||
case levels::int8:
|
||||
case levels::int8_narrow_range:
|
||||
resultPrecision = element::i8;
|
||||
break;
|
||||
case 65536:
|
||||
case 65535:
|
||||
case levels::int16:
|
||||
case levels::int16_narrow_range:
|
||||
resultPrecision = element::i16;
|
||||
break;
|
||||
case static_cast<size_t>(4294967296):
|
||||
case 4294967295:
|
||||
case levels::int32:
|
||||
case levels::int32_narrow_range:
|
||||
resultPrecision = element::i32;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((!signedPrecision) && unsignedPrecision) {
|
||||
switch (quantizationLevels) {
|
||||
case 256:
|
||||
case 255:
|
||||
case 16:
|
||||
case levels::int4:
|
||||
case levels::int8:
|
||||
case levels::int8_narrow_range:
|
||||
resultPrecision = element::u8;
|
||||
break;
|
||||
case 65536:
|
||||
case 65535:
|
||||
case levels::int16:
|
||||
case levels::int16_narrow_range:
|
||||
resultPrecision = element::u16;
|
||||
break;
|
||||
case static_cast<size_t>(4294967296):
|
||||
case 4294967295:
|
||||
case levels::int32:
|
||||
case levels::int32_narrow_range:
|
||||
resultPrecision = element::u32;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -337,16 +337,16 @@ DataPrecision LayerTransformation::getDataPrecision(
|
||||
std::vector<element::Type> resultPrecisions = precisions;
|
||||
std::vector<element::Type> FQPrecisions;
|
||||
switch (quantizationDetails.levels) {
|
||||
case 255:
|
||||
case 256:
|
||||
case levels::int8:
|
||||
case levels::int8_narrow_range:
|
||||
FQPrecisions = {element::u8, element::i8};
|
||||
break;
|
||||
case 65535:
|
||||
case 65536:
|
||||
case levels::int16:
|
||||
case levels::int16_narrow_range:
|
||||
FQPrecisions = {element::u16, element::i16};
|
||||
break;
|
||||
case 4294967295:
|
||||
case static_cast<size_t>(4294967296):
|
||||
case levels::int32:
|
||||
case levels::int32_narrow_range:
|
||||
FQPrecisions = {element::u32, element::i32};
|
||||
}
|
||||
resultPrecisions = NetworkHelper::precisionIntersection(precisions, FQPrecisions);
|
||||
|
@ -292,12 +292,10 @@ bool ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
|
||||
const std::set<size_t>& levels) {
|
||||
std::vector<std::shared_ptr<ngraph::Node>> nodes = function->get_ops();
|
||||
for (auto& node : nodes) {
|
||||
for (size_t i = 0; i < node->inputs().size(); ++i) {
|
||||
const auto fakeQuantize = as_type_ptr<ngraph::opset1::FakeQuantize>(node);
|
||||
if (fakeQuantize != nullptr) {
|
||||
if (levels.count(fakeQuantize->get_levels()) == 1) {
|
||||
return true;
|
||||
}
|
||||
const auto fakeQuantize = as_type_ptr<ngraph::opset1::FakeQuantize>(node);
|
||||
if (fakeQuantize != nullptr) {
|
||||
if (levels.count(fakeQuantize->get_levels()) == 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
|
||||
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
||||
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -61,7 +62,9 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
|
||||
ov::is_type<opset1::Convolution>(node) ||
|
||||
ov::is_type<opset1::GroupConvolution>(node) ||
|
||||
ov::is_type<opset1::MatMul>(node) ||
|
||||
ov::is_type<opset1::ConvolutionBackpropData>(node);
|
||||
ov::is_type<opset1::ConvolutionBackpropData>(node) ||
|
||||
ov::is_type<opset3::ReadValue>(node) ||
|
||||
ov::is_type<opset6::ReadValue>(node);
|
||||
};
|
||||
|
||||
if (isNotConstantPathOperation(op)) {
|
||||
@ -1730,8 +1733,8 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
|
||||
const auto intNode = ov::is_type<opset1::Convert>(parent) ? parent : node;
|
||||
const auto type = intNode->get_input_element_type(0);
|
||||
if (type == element::u8 || type == element::i8) {
|
||||
min = DataPrecision::getMinValue(type, 256) - 0.5f;
|
||||
max = DataPrecision::getMaxValue(type, 256) + 0.5f;
|
||||
min = DataPrecision::getMinValue(type, levels::int8) - 0.5f;
|
||||
max = DataPrecision::getMaxValue(type, levels::int8) + 0.5f;
|
||||
} else {
|
||||
return type == element::f32 || type == element::f16;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <low_precision/common/ie_lpt_exception.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
#include <low_precision/layer_transformation.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -162,7 +163,12 @@ bool QuantizationDetails::empty() const noexcept {
|
||||
}
|
||||
|
||||
bool QuantizationDetails::isSupportedLevel(const size_t level) {
|
||||
static const std::unordered_set<size_t> supported_levels = { 16, 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
|
||||
static const std::unordered_set<size_t> supported_levels = {
|
||||
levels::int4, levels::int4_narrow_range,
|
||||
levels::int8, levels::int8_narrow_range,
|
||||
levels::int16, levels::int16_narrow_range,
|
||||
levels::int32, levels::int32_narrow_range
|
||||
};
|
||||
return supported_levels.find(level) != supported_levels.end();
|
||||
}
|
||||
|
||||
|
@ -446,14 +446,11 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
bool updatePrecision = true;
|
||||
bool hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
|
||||
nGraphFunc,
|
||||
{65535, 65536, 4294967295, 4294967296});
|
||||
{levels::int16, levels::int16_narrow_range,
|
||||
levels::int32, levels::int32_narrow_range});
|
||||
if (hasINT16orINT32Levels) {
|
||||
updatePrecision = false;
|
||||
LowPrecision::setDefaultPrecisions({
|
||||
ngraph::element::u8, ngraph::element::i8,
|
||||
ngraph::element::u16, ngraph::element::i16,
|
||||
ngraph::element::u32, ngraph::element::i32,
|
||||
});
|
||||
LowPrecision::setDefaultPrecisions(precision_set::int8_int16_int32_support);
|
||||
|
||||
supportedPrecisions = std::vector<OperationPrecisionRestriction>({});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user