[LPT] isAsymmetricQuantization & isAsymmetricOnWeights validation (#8316)
* [LPT] isAsymmetricQuantization & isAsymmetricOnWeights * [LPT] isAsymmetricOnWeights tests * [LPT] tests improvements: comments fixes
This commit is contained in:
parent
2ed4e9c05f
commit
5f7e3cdfb9
@ -29,7 +29,7 @@ public:
|
||||
const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
|
||||
const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);
|
||||
|
||||
bool empty() const;
|
||||
bool empty() const noexcept;
|
||||
bool multiplyHasZeroOrDenormal() const;
|
||||
bool isShared() const;
|
||||
bool isLowPrecision() const;
|
||||
|
@ -56,6 +56,10 @@ public:
|
||||
max(max),
|
||||
hasZeroPoint(hasZeroPoint) {}
|
||||
|
||||
bool empty() const noexcept {
|
||||
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
|
||||
}
|
||||
|
||||
static bool isSupported(const element::Type& precision) {
|
||||
static const std::set<element::Type_t> lowPrecision = {
|
||||
element::i8, element::u8,
|
||||
|
@ -51,6 +51,8 @@ public:
|
||||
float getOutputLowValue(const size_t channel) const;
|
||||
float getOutputHighValue(const size_t channel) const;
|
||||
|
||||
bool empty() const noexcept;
|
||||
|
||||
static bool isSupportedLevel(const size_t level);
|
||||
|
||||
const size_t levels;
|
||||
|
@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization(
|
||||
multiplyConstant(multiplyConstant) {
|
||||
}
|
||||
|
||||
bool FakeQuantizeDequantization::empty() const {
|
||||
bool FakeQuantizeDequantization::empty() const noexcept {
|
||||
return (subtract == nullptr) && (multiply == nullptr);
|
||||
}
|
||||
|
||||
|
@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
|
||||
bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
|
||||
const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
|
||||
const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
|
||||
if (dequantization.empty()) {
|
||||
return false;
|
||||
}
|
||||
return dequantization.subtract != nullptr;
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals(
|
||||
}
|
||||
|
||||
QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
|
||||
if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
|
||||
return QuantizationDetails();
|
||||
}
|
||||
|
||||
const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
|
||||
const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();
|
||||
|
||||
@ -153,6 +157,10 @@ std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> const
|
||||
return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
|
||||
}
|
||||
|
||||
bool QuantizationDetails::empty() const noexcept {
|
||||
return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
|
||||
}
|
||||
|
||||
bool QuantizationDetails::isSupportedLevel(const size_t level) {
|
||||
static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
|
||||
return supported_levels.find(level) != supported_levels.end();
|
||||
|
@ -361,6 +361,9 @@ std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuan
|
||||
DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) {
|
||||
const auto fq = getFakeQuantizeOnWeights(node);
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
|
||||
if (quantizationDetails.empty()) {
|
||||
return DataPrecision();
|
||||
}
|
||||
|
||||
const auto precisionsAttribute = getAttributeFromOutput<PrecisionsAttributePtr>(fq);
|
||||
const auto precisions = precisionsAttribute == nullptr ?
|
||||
@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<
|
||||
|
||||
if (dequantization.empty()) {
|
||||
const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n);
|
||||
if (dataPrecision.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dataPrecision.hasZeroPoint) {
|
||||
return true;
|
||||
}
|
||||
|
@ -0,0 +1,142 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <low_precision/weightable_layer_transformation.hpp>
|
||||
#include "lpt_ngraph_functions/convolution_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
class IsAsymmetricOnWeightsTestValues {
|
||||
public:
|
||||
class Actual {
|
||||
public:
|
||||
ngraph::element::Type precisionBeforeDequantization;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
|
||||
std::shared_ptr<ngraph::opset1::Constant> weights;
|
||||
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
|
||||
};
|
||||
|
||||
TestTransformationParams params;
|
||||
Actual actual;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
element::Type,
|
||||
ngraph::PartialShape,
|
||||
IsAsymmetricOnWeightsTestValues,
|
||||
std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;
|
||||
|
||||
class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto netPrecision = std::get<0>(GetParam());
|
||||
const auto inputShape = std::get<1>(GetParam());
|
||||
auto testValues = std::get<2>(GetParam());
|
||||
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
|
||||
|
||||
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
|
||||
netPrecision,
|
||||
testValues.actual.precisionBeforeDequantization,
|
||||
inputShape,
|
||||
testValues.actual.dequantizationOnActivations,
|
||||
testValues.actual.weights,
|
||||
testValues.actual.fakeQuantizeOnWeights,
|
||||
transposeAndIsAsymmetricOnWeights.first[0],
|
||||
transposeAndIsAsymmetricOnWeights.first[1],
|
||||
transposeAndIsAsymmetricOnWeights.first[2],
|
||||
transposeAndIsAsymmetricOnWeights.first[3],
|
||||
transposeAndIsAsymmetricOnWeights.first[4]);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
|
||||
const auto netPrecision = std::get<0>(obj.param);
|
||||
auto inputShape = std::get<1>(obj.param);
|
||||
IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
|
||||
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result << toString(testValues.params) << "_" <<
|
||||
netPrecision << "_" <<
|
||||
inputShape << "_" <<
|
||||
testValues.actual.precisionBeforeDequantization << "_" <<
|
||||
testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
|
||||
testValues.actual.weights->get_element_type() << "_" << "{ " <<
|
||||
testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
|
||||
testValues.actual.fakeQuantizeOnWeights << "_" <<
|
||||
transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
|
||||
transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
|
||||
transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
|
||||
transposeAndIsAsymmetricOnWeights.first[3] << "_" <<
|
||||
transposeAndIsAsymmetricOnWeights.first[4];
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
|
||||
const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
|
||||
ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
|
||||
|
||||
const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
|
||||
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
|
||||
ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights);
|
||||
}
|
||||
|
||||
const std::vector<element::Type> netPrecisions = {
|
||||
element::f32
|
||||
};
|
||||
|
||||
const std::vector<ngraph::PartialShape> suitablePartialShapes = {
|
||||
ngraph::PartialShape({ 1, 3, 72, 48 }),
|
||||
ngraph::PartialShape({ 4, 3, 72, 48 }),
|
||||
ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
|
||||
ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
|
||||
};
|
||||
|
||||
const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
|
||||
{
|
||||
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
|
||||
// asymmetric quantization
|
||||
{{false, false, false, false, false}, true},
|
||||
{{true, false, false, false, false}, true},
|
||||
|
||||
// not supported FakeQuantize
|
||||
{{false, true, false, false, false}, false},
|
||||
{{false, false, true, false, false}, false},
|
||||
{{false, false, false, true, false}, false},
|
||||
{{false, false, false, false, true}, false}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
IsAsymmetricOnWeightsTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(suitablePartialShapes),
|
||||
::testing::ValuesIn(testValues),
|
||||
::testing::ValuesIn(transposeFlags)),
|
||||
IsAsymmetricOnWeightsTransformation::getTestCaseName);
|
@ -24,7 +24,12 @@ public:
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
|
||||
std::shared_ptr<ngraph::opset1::Constant> weights,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights);
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
|
||||
const bool fqOnWeightsTransposeOnData = false,
|
||||
const bool fqOnWeightsTransposeOnInputLow = false,
|
||||
const bool fqOnWeightsTransposeOnInputHigh = false,
|
||||
const bool fqOnWeightsTransposeOnOutputLow = false,
|
||||
const bool fqOnWeightsTransposeOnOutputHigh = false);
|
||||
|
||||
static std::shared_ptr<ngraph::Function> getOriginalWithIncorrectWeights(
|
||||
const ngraph::Shape& inputShape,
|
||||
|
@ -27,7 +27,12 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
|
||||
std::shared_ptr<ngraph::opset1::Constant> weights,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
|
||||
const bool transposeOnData,
|
||||
const bool transposeOnInputLow,
|
||||
const bool transposeOnInputHigh,
|
||||
const bool transposeOnOutputLow,
|
||||
const bool transposeOnOutputHigh) {
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
|
||||
auto dequantizationStructure = dequantizationBefore;
|
||||
dequantizationStructure.multiply.outPrecision = netPrecision;
|
||||
@ -53,15 +58,32 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
|
||||
convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
|
||||
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
|
||||
|
||||
const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights :
|
||||
ngraph::builder::makeFakeQuantize(
|
||||
convertedWeights, netPrecision,
|
||||
fakeQuantizeOnWeights.quantizationLevel,
|
||||
fakeQuantizeOnWeights.constantShape,
|
||||
fakeQuantizeOnWeights.inputLowValues,
|
||||
fakeQuantizeOnWeights.inputHighValues,
|
||||
fakeQuantizeOnWeights.outputLowValues,
|
||||
fakeQuantizeOnWeights.outputHighValues);
|
||||
const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
|
||||
const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
|
||||
convertedWeights :
|
||||
std::make_shared<opset1::FakeQuantize>(
|
||||
transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
|
||||
transposeOnInputLow ?
|
||||
std::make_shared<opset1::Transpose>(
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
|
||||
constant->clone_with_new_inputs({})) :
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
|
||||
transposeOnInputHigh ?
|
||||
std::make_shared<opset1::Transpose>(
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
|
||||
constant->clone_with_new_inputs({})) :
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
|
||||
transposeOnOutputLow ?
|
||||
std::make_shared<opset1::Transpose>(
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
|
||||
constant->clone_with_new_inputs({})) :
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
|
||||
transposeOnOutputHigh ?
|
||||
std::make_shared<opset1::Transpose>(
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
|
||||
constant->clone_with_new_inputs({})) :
|
||||
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
|
||||
fqOnWeights.quantizationLevel);
|
||||
|
||||
auto convolutionOriginal = ngraph::opset1::Convolution(
|
||||
ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),
|
||||
|
Loading…
Reference in New Issue
Block a user