[LPT] isAsymmetricQuantization & isAsymmetricOnWeights validation (#8316)

* [LPT] isAsymmetricQuantization & isAsymmetricOnWeights

* [LPT] isAsymmetricOnWeights tests

* [LPT] tests improvements: comments fixes
This commit is contained in:
Edward Shogulin 2021-11-05 13:24:18 +03:00 committed by GitHub
parent 2ed4e9c05f
commit 5f7e3cdfb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 206 additions and 13 deletions

View File

@ -29,7 +29,7 @@ public:
const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);
bool empty() const;
bool empty() const noexcept;
bool multiplyHasZeroOrDenormal() const;
bool isShared() const;
bool isLowPrecision() const;

View File

@ -56,6 +56,10 @@ public:
max(max),
hasZeroPoint(hasZeroPoint) {}
bool empty() const noexcept {
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
}
static bool isSupported(const element::Type& precision) {
static const std::set<element::Type_t> lowPrecision = {
element::i8, element::u8,

View File

@ -51,6 +51,8 @@ public:
float getOutputLowValue(const size_t channel) const;
float getOutputHighValue(const size_t channel) const;
bool empty() const noexcept;
static bool isSupportedLevel(const size_t level);
const size_t levels;

View File

@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization(
multiplyConstant(multiplyConstant) {
}
bool FakeQuantizeDequantization::empty() const {
bool FakeQuantizeDequantization::empty() const noexcept {
return (subtract == nullptr) && (multiply == nullptr);
}

View File

@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
if (dequantization.empty()) {
return false;
}
return dequantization.subtract != nullptr;
}

View File

@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals(
}
QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
return QuantizationDetails();
}
const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();
@ -153,6 +157,10 @@ std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> const
return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
}
bool QuantizationDetails::empty() const noexcept {
return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
}
bool QuantizationDetails::isSupportedLevel(const size_t level) {
static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
return supported_levels.find(level) != supported_levels.end();

View File

@ -361,6 +361,9 @@ std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuan
DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) {
const auto fq = getFakeQuantizeOnWeights(node);
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
if (quantizationDetails.empty()) {
return DataPrecision();
}
const auto precisionsAttribute = getAttributeFromOutput<PrecisionsAttributePtr>(fq);
const auto precisions = precisionsAttribute == nullptr ?
@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<
if (dequantization.empty()) {
const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n);
if (dataPrecision.empty()) {
return false;
}
if (dataPrecision.hasZeroPoint) {
return true;
}

View File

@ -0,0 +1,142 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "layer_transformation.hpp"
#include <sstream>
#include <memory>
#include <utility>
#include <gtest/gtest.h>
#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <low_precision/weightable_layer_transformation.hpp>
#include "lpt_ngraph_functions/convolution_function.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
class IsAsymmetricOnWeightsTestValues {
public:
class Actual {
public:
ngraph::element::Type precisionBeforeDequantization;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
std::shared_ptr<ngraph::opset1::Constant> weights;
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
};
TestTransformationParams params;
Actual actual;
};
typedef std::tuple<
element::Type,
ngraph::PartialShape,
IsAsymmetricOnWeightsTestValues,
std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;
class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
public:
void SetUp() override {
const auto netPrecision = std::get<0>(GetParam());
const auto inputShape = std::get<1>(GetParam());
auto testValues = std::get<2>(GetParam());
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
netPrecision,
testValues.actual.precisionBeforeDequantization,
inputShape,
testValues.actual.dequantizationOnActivations,
testValues.actual.weights,
testValues.actual.fakeQuantizeOnWeights,
transposeAndIsAsymmetricOnWeights.first[0],
transposeAndIsAsymmetricOnWeights.first[1],
transposeAndIsAsymmetricOnWeights.first[2],
transposeAndIsAsymmetricOnWeights.first[3],
transposeAndIsAsymmetricOnWeights.first[4]);
}
static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
const auto netPrecision = std::get<0>(obj.param);
auto inputShape = std::get<1>(obj.param);
IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
std::ostringstream result;
result << toString(testValues.params) << "_" <<
netPrecision << "_" <<
inputShape << "_" <<
testValues.actual.precisionBeforeDequantization << "_" <<
testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
testValues.actual.weights->get_element_type() << "_" << "{ " <<
testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
testValues.actual.fakeQuantizeOnWeights << "_" <<
transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
transposeAndIsAsymmetricOnWeights.first[3] << "_" <<
transposeAndIsAsymmetricOnWeights.first[4];
return result.str();
}
};
TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types();
const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights);
}
const std::vector<element::Type> netPrecisions = {
element::f32
};
const std::vector<ngraph::PartialShape> suitablePartialShapes = {
ngraph::PartialShape({ 1, 3, 72, 48 }),
ngraph::PartialShape({ 4, 3, 72, 48 }),
ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
};
const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
{
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
}
}
};
const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
// asymmetric quantization
{{false, false, false, false, false}, true},
{{true, false, false, false, false}, true},
// not supported FakeQuantize
{{false, true, false, false, false}, false},
{{false, false, true, false, false}, false},
{{false, false, false, true, false}, false},
{{false, false, false, false, true}, false}
};
INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
IsAsymmetricOnWeightsTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(suitablePartialShapes),
::testing::ValuesIn(testValues),
::testing::ValuesIn(transposeFlags)),
IsAsymmetricOnWeightsTransformation::getTestCaseName);

View File

@ -24,7 +24,12 @@ public:
const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights);
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const bool fqOnWeightsTransposeOnData = false,
const bool fqOnWeightsTransposeOnInputLow = false,
const bool fqOnWeightsTransposeOnInputHigh = false,
const bool fqOnWeightsTransposeOnOutputLow = false,
const bool fqOnWeightsTransposeOnOutputHigh = false);
static std::shared_ptr<ngraph::Function> getOriginalWithIncorrectWeights(
const ngraph::Shape& inputShape,

View File

@ -27,7 +27,12 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const bool transposeOnData,
const bool transposeOnInputLow,
const bool transposeOnInputHigh,
const bool transposeOnOutputLow,
const bool transposeOnOutputHigh) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
auto dequantizationStructure = dequantizationBefore;
dequantizationStructure.multiply.outPrecision = netPrecision;
@ -53,15 +58,32 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights :
ngraph::builder::makeFakeQuantize(
convertedWeights, netPrecision,
fakeQuantizeOnWeights.quantizationLevel,
fakeQuantizeOnWeights.constantShape,
fakeQuantizeOnWeights.inputLowValues,
fakeQuantizeOnWeights.inputHighValues,
fakeQuantizeOnWeights.outputLowValues,
fakeQuantizeOnWeights.outputHighValues);
const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
convertedWeights :
std::make_shared<opset1::FakeQuantize>(
transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
transposeOnInputLow ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
transposeOnInputHigh ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
transposeOnOutputLow ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
transposeOnOutputHigh ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
fqOnWeights.quantizationLevel);
auto convolutionOriginal = ngraph::opset1::Convolution(
ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),