From b6d60a2c822350b0951d757509dc2b25044e6519 Mon Sep 17 00:00:00 2001 From: Vladimir Zinoviev Date: Wed, 12 Jan 2022 14:50:19 +0300 Subject: [PATCH] [LPT] Support FakeQuantize with convert on intervals (#9579) * [LPT] Support FakeQuantize with convert on intervals * [LPT] GPU tests --- .../low_precision/quantization_details.hpp | 2 +- .../src/low_precision.cpp | 2 +- .../src/quantization_details.cpp | 18 +++++++++++++----- .../fake_quantize_transformation.cpp | 8 +++++++- .../fake_quantize_transformation.cpp | 8 +++++++- .../fake_quantize_transformation.hpp | 3 ++- .../fake_quantize_transformation.cpp | 11 ++++++++--- .../common/fake_quantize_on_data.hpp | 5 +++-- .../src/common/builders.cpp | 16 ++++++++++++++-- .../src/common/fake_quantize_on_data.cpp | 6 ++++-- 10 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/common/low_precision_transformations/include/low_precision/quantization_details.hpp b/src/common/low_precision_transformations/include/low_precision/quantization_details.hpp index 975dca8cc53..d1e6787f03f 100644 --- a/src/common/low_precision_transformations/include/low_precision/quantization_details.hpp +++ b/src/common/low_precision_transformations/include/low_precision/quantization_details.hpp @@ -29,7 +29,7 @@ public: const std::vector& outputLowValues, const std::vector& outputHighValues); - static bool outputLayoutIsSupported(std::shared_ptr quantize); + static bool outputLayoutIsSupported(std::shared_ptr quantize, bool isConvertExpected = false); static void getInputIntervals( std::shared_ptr quantize, diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index 5d5bb3a02a5..f23140fe382 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -275,7 +275,7 @@ bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::s const std::shared_ptr fakeQuantize = ov::as_type_ptr(parent); if ((fakeQuantize != nullptr) && - QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && + QuantizationDetails::outputLayoutIsSupported(fakeQuantize, true) && QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { return true; } diff --git a/src/common/low_precision_transformations/src/quantization_details.cpp b/src/common/low_precision_transformations/src/quantization_details.cpp index ada3df7e1ca..e74f1a46baa 100644 --- a/src/common/low_precision_transformations/src/quantization_details.cpp +++ b/src/common/low_precision_transformations/src/quantization_details.cpp @@ -49,11 +49,19 @@ QuantizationDetails::QuantizationDetails(const size_t levels, const std::vector< outputLowValues(outputLowValues), outputHighValues(outputHighValues) {} -bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr quantize) { - return ov::is_type(quantize->get_input_node_ptr(1)) && - ov::is_type(quantize->get_input_node_ptr(2)) && - ov::is_type(quantize->get_input_node_ptr(3)) && - ov::is_type(quantize->get_input_node_ptr(4)); +bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr quantize, bool isConvertExpected) { + const auto inputs = quantize->inputs(); + for (size_t i = 1; i < inputs.size(); ++i) { + const auto node = inputs[i].get_source_output().get_node_shared_ptr(); + bool supported = ov::is_type(node); + if (!supported && isConvertExpected) { + supported = ov::is_type(node) && ov::is_type(node->get_input_node_ptr(0)); + } + if (!supported) { + return false; + } + } + return true; } void QuantizationDetails::getInputIntervals( diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 5b1166b1577..7c7a19feb0e 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -25,6 +25,11 @@ const std::vector trasformationParamValues = { LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8() }; +const std::vector isConvertOnConstants = { + false, + true +}; + const std::vector fakeQuantizeOnDataValues = { { {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, @@ -82,6 +87,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FakeQuantizeTransformation, ::testing::Values(ngraph::PartialShape({ 1, 32, 72, 48 })), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::ValuesIn(trasformationParamValues), - ::testing::ValuesIn(fakeQuantizeOnDataValues)), + ::testing::ValuesIn(fakeQuantizeOnDataValues), + ::testing::ValuesIn(isConvertOnConstants)), FakeQuantizeTransformation::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index f3d90f0ea72..c13e2e5a19e 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -17,6 +17,11 @@ const std::vector netPrecisions = { ngraph::element::f16 }; +const std::vector isConvertOnConstants = { + false, + true +}; + const std::vector trasformationParamValues = { // can not be passed to plugin // nGraph: I8 -> FP32 Convert is not supported @@ -65,6 +70,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FakeQuantizeTransformation, ::testing::Values(ngraph::PartialShape({ 1, 32, 72, 48 })), ::testing::Values(CommonTestUtils::DEVICE_GPU), ::testing::ValuesIn(trasformationParamValues), - ::testing::ValuesIn(fakeQuantizeOnDataValues)), + ::testing::ValuesIn(fakeQuantizeOnDataValues), + ::testing::ValuesIn(isConvertOnConstants)), FakeQuantizeTransformation::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp index b56e2309737..8fdf8e1d2ff 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp @@ -23,7 +23,8 @@ typedef std::tuple< ngraph::PartialShape, std::string, ngraph::pass::low_precision::LayerTransformation::Params, - FakeQuantizeTransformationParam> FakeQuantizeTransformationParams; + FakeQuantizeTransformationParam, + bool> FakeQuantizeTransformationParams; class FakeQuantizeTransformation : public testing::WithParamInterface, diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp index 083f1e64bee..596aa49d76c 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp @@ -23,10 +23,12 @@ std::string FakeQuantizeTransformation::getTestCaseName(const testing::TestParam std::string targetDevice; ngraph::pass::low_precision::LayerTransformation::Params params; FakeQuantizeTransformationParam testParams; - std::tie(netPrecision, inputShape, targetDevice, params, testParams) = obj.param; + bool isConvertOnConstants; + std::tie(netPrecision, inputShape, targetDevice, params, testParams, isConvertOnConstants) = obj.param; std::ostringstream result; - result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << testParams.fakequantize; + result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << + isConvertOnConstants << "_" << testParams.fakequantize; return result.str(); } @@ -35,7 +37,10 @@ void FakeQuantizeTransformation::SetUp() { ngraph::PartialShape inputShape; ngraph::pass::low_precision::LayerTransformation::Params params; FakeQuantizeTransformationParam testParams; - std::tie(netPrecision, inputShape, targetDevice, params, testParams) = this->GetParam(); + bool isConvertOnConstants; + std::tie(netPrecision, inputShape, targetDevice, params, testParams, isConvertOnConstants) = this->GetParam(); + + testParams.fakequantize.addConverts = isConvertOnConstants; function = ngraph::builder::subgraph::FakeQuantizeFunction::getOriginal( params, diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp index 0ca59f7b5d0..0c728b75f8c 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp @@ -71,8 +71,8 @@ public: const std::vector& outputLowValues, const std::vector& outputHighValues, const ngraph::element::Type outputPrecision = ngraph::element::undefined, - - const std::vector& attributes = {}); + const std::vector& attributes = {}, + const bool addConverts = false); virtual ~FakeQuantizeOnDataWithConstant(); virtual bool empty() const; @@ -85,6 +85,7 @@ public: std::vector outputHighValues; ngraph::element::Type outputPrecision; std::vector attributes; + bool addConverts; }; inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnDataWithConstant& data) { diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp index 5ad53ea23ca..bf79d40ae32 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/builders.cpp @@ -282,6 +282,9 @@ std::shared_ptr makeFakeQuantize( fqOnData.constantShapes.empty() ? ngraph::Shape{} : fqOnData.constantShapes[0], fqOnData.inputLowValues, fqOnData.inputLowValues.empty()); + if (fqOnData.addConverts) { + inputLowNode = ngraph::builder::makeConversion(inputLowNode, ov::element::f32, ngraph::helpers::ConversionTypes::CONVERT); + } inputHighNode = ngraph::builder::makeConstant( constantPrecision, @@ -290,23 +293,32 @@ std::shared_ptr makeFakeQuantize( (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[1]), fqOnData.inputHighValues, fqOnData.inputHighValues.empty()); + if (fqOnData.addConverts) { + inputHighNode = ngraph::builder::makeConversion(inputHighNode, ov::element::f32, ngraph::helpers::ConversionTypes::CONVERT); + } } - const auto outputLowNode = ngraph::builder::makeConstant( + auto outputLowNode = ngraph::builder::makeConstant( constantPrecision, fqOnData.constantShapes.empty() ? ngraph::Shape{} : (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[2]), fqOnData.outputLowValues, fqOnData.outputLowValues.empty()); + if (fqOnData.addConverts) { + outputLowNode = ngraph::builder::makeConversion(outputLowNode, ov::element::f32, ngraph::helpers::ConversionTypes::CONVERT); + } - const auto outputHighNode = ngraph::builder::makeConstant( + auto outputHighNode = ngraph::builder::makeConstant( constantPrecision, fqOnData.constantShapes.empty() ? ngraph::Shape{} : (fqOnData.constantShapes.size() == 1 ? fqOnData.constantShapes[0] : fqOnData.constantShapes[3]), fqOnData.outputHighValues, fqOnData.outputHighValues.empty()); + if (fqOnData.addConverts) { + outputHighNode = ngraph::builder::makeConversion(outputHighNode, ov::element::f32, ngraph::helpers::ConversionTypes::CONVERT); + } auto fq = std::make_shared(input, inputLowNode, inputHighNode, outputLowNode, outputHighNode, fqOnData.quantizationLevel); diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp index 6affee88336..818a73170ca 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/common/fake_quantize_on_data.cpp @@ -58,7 +58,8 @@ FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant( const std::vector& outputLowValues, const std::vector& outputHighValues, const ngraph::element::Type outputPrecision, - const std::vector& attributes) : + const std::vector& attributes, + const bool addConverts) : quantizationLevel(quantizationLevel), constantShapes(constantShapes), inputLowValues(inputLowValues), @@ -66,7 +67,8 @@ FakeQuantizeOnDataWithConstant::FakeQuantizeOnDataWithConstant( outputLowValues(outputLowValues), outputHighValues(outputHighValues), outputPrecision(outputPrecision), - attributes(attributes) + attributes(attributes), + addConverts(addConverts) {} FakeQuantizeOnDataWithConstant::~FakeQuantizeOnDataWithConstant() {}