[LPT] isAsymmetricOnWeights fix: small zero points ignoring (#8429)

This commit is contained in:
Edward Shogulin 2021-11-11 22:58:56 +03:00 committed by GitHub
parent f46e8bb3f8
commit 8686100c80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 181 additions and 49 deletions

View File

@ -42,7 +42,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
if (dequantization.empty()) { if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(layer); const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
const auto dataPrecision = getDataPrecisionOnWeights(layer); const auto dataPrecision = getDataPrecisionOnWeights(layer);
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) {
return false; return false;
} }
} else { } else {
@ -391,7 +391,7 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<
return true; return true;
} }
} else { } else {
if (dequantization.subtract != nullptr) { if ((dequantization.subtract != nullptr) && (NetworkHelper::optimizeSubtract(dequantization.subtract) != nullptr)) {
return true; return true;
} }
} }

View File

@ -0,0 +1,128 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "layer_transformation.hpp"
#include <sstream>
#include <memory>
#include <utility>
#include <gtest/gtest.h>
#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <low_precision/weightable_layer_transformation.hpp>
#include "lpt_ngraph_functions/convolution_function.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
class IsAsymmetricOnWeightsDequantizationTestValues {
public:
ngraph::element::Type precisionBeforeDequantization;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
std::shared_ptr<ngraph::opset1::Constant> weights;
builder::subgraph::DequantizationOperations dequantizationOnWeights;
bool isAsymmetricOnWeights;
};
typedef std::tuple<
element::Type,
ngraph::PartialShape,
IsAsymmetricOnWeightsDequantizationTestValues> IsAsymmetricOnWeightsDequantizationParams;
class IsAsymmetricOnWeightsDequantizationTransformation :
public LayerTransformation,
public testing::WithParamInterface<IsAsymmetricOnWeightsDequantizationParams> {
public:
void SetUp() override {
const auto netPrecision = std::get<0>(GetParam());
const auto inputShape = std::get<1>(GetParam());
auto testValues = std::get<2>(GetParam());
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
netPrecision,
testValues.precisionBeforeDequantization,
inputShape,
testValues.dequantizationOnActivations,
testValues.weights,
{},
testValues.dequantizationOnWeights);
}
static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsDequantizationParams> obj) {
const auto netPrecision = std::get<0>(obj.param);
auto inputShape = std::get<1>(obj.param);
IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(obj.param);
std::ostringstream result;
result <<
netPrecision << "_" <<
inputShape << "_" <<
testValues.precisionBeforeDequantization << "_" <<
testValues.dequantizationOnActivations << "_" << "_weights_" <<
testValues.weights->get_element_type() << "_" << "{ " <<
testValues.weights->cast_vector<float>()[0] << " }_" <<
testValues.dequantizationOnWeights;
return result.str();
}
};
TEST_P(IsAsymmetricOnWeightsDequantizationTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types();
const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";
IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(GetParam());
const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
ASSERT_EQ(testValues.isAsymmetricOnWeights, isAsymmetricOnWeights);
}
const std::vector<element::Type> netPrecisions = {
element::f32
};
const std::vector<ngraph::PartialShape> suitablePartialShapes = {
ngraph::PartialShape({ 1, 3, 72, 48 }),
ngraph::PartialShape({ 4, 3, 72, 48 }),
ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
};
const std::vector<IsAsymmetricOnWeightsDequantizationTestValues> testValues = {
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
{
{ngraph::element::f32},
{{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}},
{{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}}
},
true
},
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
{
{ngraph::element::f32},
{{0, 0, 1.e-7, 0, 0, 0}, ngraph::element::f32, {6, 1, 1, 1}},
{{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}}
},
false
}
};
INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
IsAsymmetricOnWeightsDequantizationTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(suitablePartialShapes),
::testing::ValuesIn(testValues)),
IsAsymmetricOnWeightsDequantizationTransformation::getTestCaseName);

View File

@ -19,27 +19,23 @@ using namespace testing;
using namespace ngraph; using namespace ngraph;
using namespace ngraph::pass; using namespace ngraph::pass;
class IsAsymmetricOnWeightsTestValues { class IsAsymmetricOnWeightsFakeQuantizeTestValues {
public: public:
class Actual { ngraph::element::Type precisionBeforeDequantization;
public: ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
ngraph::element::Type precisionBeforeDequantization; std::shared_ptr<ngraph::opset1::Constant> weights;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
std::shared_ptr<ngraph::opset1::Constant> weights;
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
};
TestTransformationParams params;
Actual actual;
}; };
typedef std::tuple< typedef std::tuple<
element::Type, element::Type,
ngraph::PartialShape, ngraph::PartialShape,
IsAsymmetricOnWeightsTestValues, IsAsymmetricOnWeightsFakeQuantizeTestValues,
std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams; std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsFakeQuantizeParams;
class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> { class IsAsymmetricOnWeightsFakeQuantizeTransformation :
public LayerTransformation,
public testing::WithParamInterface<IsAsymmetricOnWeightsFakeQuantizeParams> {
public: public:
void SetUp() override { void SetUp() override {
const auto netPrecision = std::get<0>(GetParam()); const auto netPrecision = std::get<0>(GetParam());
@ -49,11 +45,12 @@ public:
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal( actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
netPrecision, netPrecision,
testValues.actual.precisionBeforeDequantization, testValues.precisionBeforeDequantization,
inputShape, inputShape,
testValues.actual.dequantizationOnActivations, testValues.dequantizationOnActivations,
testValues.actual.weights, testValues.weights,
testValues.actual.fakeQuantizeOnWeights, testValues.fakeQuantizeOnWeights,
{},
transposeAndIsAsymmetricOnWeights.first[0], transposeAndIsAsymmetricOnWeights.first[0],
transposeAndIsAsymmetricOnWeights.first[1], transposeAndIsAsymmetricOnWeights.first[1],
transposeAndIsAsymmetricOnWeights.first[2], transposeAndIsAsymmetricOnWeights.first[2],
@ -61,21 +58,21 @@ public:
transposeAndIsAsymmetricOnWeights.first[4]); transposeAndIsAsymmetricOnWeights.first[4]);
} }
static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) { static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsFakeQuantizeParams> obj) {
const auto netPrecision = std::get<0>(obj.param); const auto netPrecision = std::get<0>(obj.param);
auto inputShape = std::get<1>(obj.param); auto inputShape = std::get<1>(obj.param);
IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param); IsAsymmetricOnWeightsFakeQuantizeTestValues testValues = std::get<2>(obj.param);
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param); std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);
std::ostringstream result; std::ostringstream result;
result << toString(testValues.params) << "_" << result <<
netPrecision << "_" << netPrecision << "_" <<
inputShape << "_" << inputShape << "_" <<
testValues.actual.precisionBeforeDequantization << "_" << testValues.precisionBeforeDequantization << "_" <<
testValues.actual.dequantizationOnActivations << "_" << "_weights_" << testValues.dequantizationOnActivations << "_" << "_weights_" <<
testValues.actual.weights->get_element_type() << "_" << "{ " << testValues.weights->get_element_type() << "_" << "{ " <<
testValues.actual.weights->cast_vector<float>()[0] << " }_" << testValues.weights->cast_vector<float>()[0] << " }_" <<
testValues.actual.fakeQuantizeOnWeights << "_" << testValues.fakeQuantizeOnWeights << "_" <<
transposeAndIsAsymmetricOnWeights.first[0] << "_" << transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
transposeAndIsAsymmetricOnWeights.first[1] << "_" << transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
transposeAndIsAsymmetricOnWeights.first[2] << "_" << transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
@ -85,7 +82,7 @@ public:
} }
}; };
TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) { TEST_P(IsAsymmetricOnWeightsFakeQuantizeTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types(); actualFunction->validate_nodes_and_infer_types();
const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction); const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
@ -107,15 +104,12 @@ const std::vector<ngraph::PartialShape> suitablePartialShapes = {
ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }), ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
}; };
const std::vector<IsAsymmetricOnWeightsTestValues> testValues = { const std::vector<IsAsymmetricOnWeightsFakeQuantizeTestValues> testValues = {
{ {
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), ngraph::element::u8,
{ {{ngraph::element::f32}, { 128.f }, { 0.02f }},
ngraph::element::u8, op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
{{ngraph::element::f32}, { 128.f }, { 0.02f }}, { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
}
} }
}; };
@ -133,10 +127,10 @@ const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
smoke_LPT, smoke_LPT,
IsAsymmetricOnWeightsTransformation, IsAsymmetricOnWeightsFakeQuantizeTransformation,
::testing::Combine( ::testing::Combine(
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(suitablePartialShapes), ::testing::ValuesIn(suitablePartialShapes),
::testing::ValuesIn(testValues), ::testing::ValuesIn(testValues),
::testing::ValuesIn(transposeFlags)), ::testing::ValuesIn(transposeFlags)),
IsAsymmetricOnWeightsTransformation::getTestCaseName); IsAsymmetricOnWeightsFakeQuantizeTransformation::getTestCaseName);

View File

@ -22,9 +22,10 @@ public:
const ngraph::element::Type netPrecision, const ngraph::element::Type netPrecision,
const ngraph::element::Type inputPrecision, const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape, const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
std::shared_ptr<ngraph::opset1::Constant> weights, std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights = DequantizationOperations(),
const bool fqOnWeightsTransposeOnData = false, const bool fqOnWeightsTransposeOnData = false,
const bool fqOnWeightsTransposeOnInputLow = false, const bool fqOnWeightsTransposeOnInputLow = false,
const bool fqOnWeightsTransposeOnInputHigh = false, const bool fqOnWeightsTransposeOnInputHigh = false,

View File

@ -25,16 +25,17 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
const ngraph::element::Type netPrecision, const ngraph::element::Type netPrecision,
const ngraph::element::Type inputPrecision, const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape, const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
std::shared_ptr<ngraph::opset1::Constant> weights, std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights,
const bool transposeOnData, const bool transposeOnData,
const bool transposeOnInputLow, const bool transposeOnInputLow,
const bool transposeOnInputHigh, const bool transposeOnInputHigh,
const bool transposeOnOutputLow, const bool transposeOnOutputLow,
const bool transposeOnOutputHigh) { const bool transposeOnOutputHigh) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape); const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
auto dequantizationStructure = dequantizationBefore; auto dequantizationStructure = dequantizationOnActivations;
dequantizationStructure.multiply.outPrecision = netPrecision; dequantizationStructure.multiply.outPrecision = netPrecision;
const auto dequantization = makeDequantization(input, dequantizationStructure); const auto dequantization = makeDequantization(input, dequantizationStructure);
@ -53,15 +54,22 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape))); weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape)));
} }
const auto convertOnWeights = std::make_shared<opset1::Convert>(weights, netPrecision); std::shared_ptr<Node> convertedWeights;
OutputVector convertedOutput(1); if (dequantizationOnWeights.empty()) {
convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values()); const auto convertOnWeights = std::make_shared<opset1::Convert>(weights, netPrecision);
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr(); OutputVector convertedOutput(1);
convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
convertedWeights = convertedOutput[0].get_node_shared_ptr();
} else {
convertedWeights = weights;
}
const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3}); const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ? std::shared_ptr<Node> onWeights;
convertedWeights : if (fqOnWeights.empty()) {
std::make_shared<opset1::FakeQuantize>( onWeights = dequantizationOnWeights.empty() ? convertedWeights : makeDequantization(convertedWeights, dequantizationOnWeights);
} else {
onWeights = std::make_shared<opset1::FakeQuantize>(
transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights, transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
transposeOnInputLow ? transposeOnInputLow ?
std::make_shared<opset1::Transpose>( std::make_shared<opset1::Transpose>(
@ -84,6 +92,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
constant->clone_with_new_inputs({})) : constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()), makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
fqOnWeights.quantizationLevel); fqOnWeights.quantizationLevel);
}
auto convolutionOriginal = ngraph::opset1::Convolution( auto convolutionOriginal = ngraph::opset1::Convolution(
ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(), ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),