diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index ac6d00ef420..bc5ee3706ba 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -42,7 +42,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma if (dequantization.empty()) { const auto fqOnWeights = getFakeQuantizeOnWeights(layer); const auto dataPrecision = getDataPrecisionOnWeights(layer); - if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { + if ((dataPrecision.precision == ngraph::element::undefined) || (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision))) { return false; } } else { @@ -391,7 +391,7 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr< return true; } } else { - if (dequantization.subtract != nullptr) { + if ((dequantization.subtract != nullptr) && (NetworkHelper::optimizeSubtract(dequantization.subtract) != nullptr)) { return true; } } diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_dequantization.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_dequantization.cpp new file mode 100644 index 00000000000..e5caba16f00 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_dequantization.cpp @@ -0,0 +1,128 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include +#include + +#include + +#include +#include +#include +#include "lpt_ngraph_functions/convolution_function.hpp" + +using namespace testing; +using namespace ngraph; +using namespace ngraph::pass; + +class IsAsymmetricOnWeightsDequantizationTestValues { +public: + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + std::shared_ptr weights; + builder::subgraph::DequantizationOperations dequantizationOnWeights; + bool isAsymmetricOnWeights; +}; + +typedef std::tuple< + element::Type, + ngraph::PartialShape, + IsAsymmetricOnWeightsDequantizationTestValues> IsAsymmetricOnWeightsDequantizationParams; + +class IsAsymmetricOnWeightsDequantizationTransformation : + public LayerTransformation, + public testing::WithParamInterface { +public: + void SetUp() override { + const auto netPrecision = std::get<0>(GetParam()); + const auto inputShape = std::get<1>(GetParam()); + auto testValues = std::get<2>(GetParam()); + + actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal( + netPrecision, + testValues.precisionBeforeDequantization, + inputShape, + testValues.dequantizationOnActivations, + testValues.weights, + {}, + testValues.dequantizationOnWeights); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const auto netPrecision = std::get<0>(obj.param); + auto inputShape = std::get<1>(obj.param); + IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(obj.param); + + std::ostringstream result; + result << + netPrecision << "_" << + inputShape << "_" << + testValues.precisionBeforeDequantization << "_" << + testValues.dequantizationOnActivations << "_" << "_weights_" << + testValues.weights->get_element_type() << "_" << "{ " << + testValues.weights->cast_vector()[0] << " }_" << + testValues.dequantizationOnWeights; + return result.str(); + } +}; + +TEST_P(IsAsymmetricOnWeightsDequantizationTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + + const auto convolutions = LayerTransformation::get(actualFunction); + ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found"; + + IsAsymmetricOnWeightsDequantizationTestValues testValues = std::get<2>(GetParam()); + + const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]); + ASSERT_EQ(testValues.isAsymmetricOnWeights, isAsymmetricOnWeights); +} + +const std::vector netPrecisions = { + element::f32 +}; + +const std::vector suitablePartialShapes = { + ngraph::PartialShape({ 1, 3, 72, 48 }), + ngraph::PartialShape({ 4, 3, 72, 48 }), + ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }), + ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }), +}; + +const std::vector testValues = { + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + { + {ngraph::element::f32}, + {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}}, + {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}} + }, + true + }, + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ 2.f }), + { + {ngraph::element::f32}, + {{0, 0, 1.e-7, 0, 0, 0}, ngraph::element::f32, {6, 1, 1, 1}}, + {{1, 2, 3, 4, 5, 6}, ngraph::element::f32, {6, 1, 1, 1}} + }, + false + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + IsAsymmetricOnWeightsDequantizationTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(suitablePartialShapes), + ::testing::ValuesIn(testValues)), + IsAsymmetricOnWeightsDequantizationTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_fq.cpp similarity index 64% rename from inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp rename to inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_fq.cpp index e9b4a4a8c41..78bd7aa5483 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/is_asymmetric_on_weights_fq.cpp @@ -19,27 +19,23 @@ using namespace testing; using namespace ngraph; using namespace ngraph::pass; -class IsAsymmetricOnWeightsTestValues { +class IsAsymmetricOnWeightsFakeQuantizeTestValues { public: - class Actual { - public: - ngraph::element::Type precisionBeforeDequantization; - ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; - std::shared_ptr weights; - builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; - }; - - TestTransformationParams params; - Actual actual; + ngraph::element::Type precisionBeforeDequantization; + ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations; + std::shared_ptr weights; + builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; }; typedef std::tuple< element::Type, ngraph::PartialShape, - IsAsymmetricOnWeightsTestValues, - std::pair, bool> > IsAsymmetricOnWeightsParams; + IsAsymmetricOnWeightsFakeQuantizeTestValues, + std::pair, bool> > IsAsymmetricOnWeightsFakeQuantizeParams; -class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface { +class IsAsymmetricOnWeightsFakeQuantizeTransformation : + public LayerTransformation, + public testing::WithParamInterface { public: void SetUp() override { const auto netPrecision = std::get<0>(GetParam()); @@ -49,11 +45,12 @@ public: actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal( netPrecision, - testValues.actual.precisionBeforeDequantization, + testValues.precisionBeforeDequantization, inputShape, - testValues.actual.dequantizationOnActivations, - testValues.actual.weights, - testValues.actual.fakeQuantizeOnWeights, + testValues.dequantizationOnActivations, + testValues.weights, + testValues.fakeQuantizeOnWeights, + {}, transposeAndIsAsymmetricOnWeights.first[0], transposeAndIsAsymmetricOnWeights.first[1], transposeAndIsAsymmetricOnWeights.first[2], @@ -61,21 +58,21 @@ public: transposeAndIsAsymmetricOnWeights.first[4]); } - static std::string getTestCaseName(testing::TestParamInfo obj) { + static std::string getTestCaseName(testing::TestParamInfo obj) { const auto netPrecision = std::get<0>(obj.param); auto inputShape = std::get<1>(obj.param); - IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param); + IsAsymmetricOnWeightsFakeQuantizeTestValues testValues = std::get<2>(obj.param); std::pair, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param); std::ostringstream result; - result << toString(testValues.params) << "_" << + result << netPrecision << "_" << inputShape << "_" << - testValues.actual.precisionBeforeDequantization << "_" << - testValues.actual.dequantizationOnActivations << "_" << "_weights_" << - testValues.actual.weights->get_element_type() << "_" << "{ " << - testValues.actual.weights->cast_vector()[0] << " }_" << - testValues.actual.fakeQuantizeOnWeights << "_" << + testValues.precisionBeforeDequantization << "_" << + testValues.dequantizationOnActivations << "_" << "_weights_" << + testValues.weights->get_element_type() << "_" << "{ " << + testValues.weights->cast_vector()[0] << " }_" << + testValues.fakeQuantizeOnWeights << "_" << transposeAndIsAsymmetricOnWeights.first[0] << "_" << transposeAndIsAsymmetricOnWeights.first[1] << "_" << transposeAndIsAsymmetricOnWeights.first[2] << "_" << @@ -85,7 +82,7 @@ public: } }; -TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) { +TEST_P(IsAsymmetricOnWeightsFakeQuantizeTransformation, CompareFunctions) { actualFunction->validate_nodes_and_infer_types(); const auto convolutions = LayerTransformation::get(actualFunction); @@ -107,15 +104,12 @@ const std::vector suitablePartialShapes = { ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }), }; -const std::vector testValues = { +const std::vector testValues = { { - LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - { - ngraph::element::u8, - {{ngraph::element::f32}, { 128.f }, { 0.02f }}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), - { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } }, - } + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } }, } }; @@ -133,10 +127,10 @@ const std::vector, bool> > transposeFlags = { INSTANTIATE_TEST_SUITE_P( smoke_LPT, - IsAsymmetricOnWeightsTransformation, + IsAsymmetricOnWeightsFakeQuantizeTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(suitablePartialShapes), ::testing::ValuesIn(testValues), ::testing::ValuesIn(transposeFlags)), - IsAsymmetricOnWeightsTransformation::getTestCaseName); + IsAsymmetricOnWeightsFakeQuantizeTransformation::getTestCaseName); diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp index f552cf8503d..1a93cc8a2b0 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/convolution_function.hpp @@ -22,9 +22,10 @@ public: const ngraph::element::Type netPrecision, const ngraph::element::Type inputPrecision, const ngraph::PartialShape& inputShape, - const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations, std::shared_ptr weights, const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights = DequantizationOperations(), const bool fqOnWeightsTransposeOnData = false, const bool fqOnWeightsTransposeOnInputLow = false, const bool fqOnWeightsTransposeOnInputHigh = false, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp index cc50bce4546..e5606d9647b 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp @@ -25,16 +25,17 @@ std::shared_ptr ConvolutionFunction::getOriginal( const ngraph::element::Type netPrecision, const ngraph::element::Type inputPrecision, const ngraph::PartialShape& inputShape, - const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations, std::shared_ptr weights, const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights, + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const bool transposeOnData, const bool transposeOnInputLow, const bool transposeOnInputHigh, const bool transposeOnOutputLow, const bool transposeOnOutputHigh) { const auto input = std::make_shared(inputPrecision, inputShape); - auto dequantizationStructure = dequantizationBefore; + auto dequantizationStructure = dequantizationOnActivations; dequantizationStructure.multiply.outPrecision = netPrecision; const auto dequantization = makeDequantization(input, dequantizationStructure); @@ -53,15 +54,22 @@ std::shared_ptr ConvolutionFunction::getOriginal( weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape))); } - const auto convertOnWeights = std::make_shared(weights, netPrecision); - OutputVector convertedOutput(1); - convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values()); - const auto convertedWeights = convertedOutput[0].get_node_shared_ptr(); + std::shared_ptr convertedWeights; + if (dequantizationOnWeights.empty()) { + const auto convertOnWeights = std::make_shared(weights, netPrecision); + OutputVector convertedOutput(1); + convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values()); + convertedWeights = convertedOutput[0].get_node_shared_ptr(); + } else { + convertedWeights = weights; + } const std::shared_ptr constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3}); - const std::shared_ptr onWeights = fqOnWeights.empty() ? - convertedWeights : - std::make_shared( + std::shared_ptr onWeights; + if (fqOnWeights.empty()) { + onWeights = dequantizationOnWeights.empty() ? convertedWeights : makeDequantization(convertedWeights, dequantizationOnWeights); + } else { + onWeights = std::make_shared( transposeOnData ? std::make_shared(convertedWeights, constant) : convertedWeights, transposeOnInputLow ? std::make_shared( @@ -84,6 +92,7 @@ std::shared_ptr ConvolutionFunction::getOriginal( constant->clone_with_new_inputs({})) : makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()), fqOnWeights.quantizationLevel); + } auto convolutionOriginal = ngraph::opset1::Convolution( ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),