From 19afae3638aab3e6c6dc65535bb66b1d48513866 Mon Sep 17 00:00:00 2001 From: Vladimir Zinoviev Date: Thu, 29 Apr 2021 18:24:21 +0300 Subject: [PATCH] [LPT] INT4 FakeQuantize not transform (#5082) --- .../fake_quantize_transformation.cpp | 29 ++++++++++-- .../fake_quantize_transformation.cpp | 28 ++++++++++-- .../fake_quantize_transformation.hpp | 12 +++-- .../fake_quantize_transformation.cpp | 44 +++++++------------ .../fake_quantize_function.hpp | 5 +++ .../src/fake_quantize_function.cpp | 25 +++++++++++ 6 files changed, 105 insertions(+), 38 deletions(-) diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 6ab6f4e23eb..2f856a61cd7 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -25,10 +25,31 @@ const std::vector trasformationParamValues = { LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8() }; -const std::vector fakeQuantizeOnDataValues = { - { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, - { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, - { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } }, +const std::vector fakeQuantizeOnDataValues = { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + "Pooling", "U8" + }, + { + { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, + "Pooling", "U8" + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } }, + "Pooling", "I8" + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } }, + "Pooling", "U8" + }, + { + { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, + "Pooling", "FP32" + }, + { + { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, + "Pooling", "FP32" + }, // nGraph: I8->FP32 Convert is not supported // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } }, // { 256ul, { 1ul }, { -1.28f} , { 1.27f } } diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 1a00abb7f03..35f047794da 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -25,9 +25,31 @@ const std::vector trasformationParamValues = { LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8() }; -const std::vector fakeQuantizeOnDataValues = { - { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, - { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, +const std::vector fakeQuantizeOnDataValues = { + { + {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, + "Pooling", "U8" + }, + { + { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } }, + "Pooling", "U8" + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } }, + "Pooling", "I8" + }, + { + { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } }, + "Pooling", "U8" + }, + { + { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, + "Pooling", "FP32" + }, + { + { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, + "Pooling", "FP32" + }, // nGraph: I8->FP32 Convert is not supported // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } }, // { 256ul, { 1ul }, { -1.28f} , { 1.27f } } diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp index aef99adf002..f2b82386c5e 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp @@ -10,13 +10,20 @@ #include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp" namespace LayerTestsDefinitions { +class FakeQuantizeTransformationParam { +public: + ngraph::builder::subgraph::FakeQuantizeOnData fakequantize; + + std::string layerName; + std::string expectedKernelType; +}; typedef std::tuple< ngraph::element::Type, ngraph::Shape, std::string, ngraph::pass::low_precision::LayerTransformation::Params, - ngraph::builder::subgraph::FakeQuantizeOnData> FakeQuantizeTransformationParams; + FakeQuantizeTransformationParam> FakeQuantizeTransformationParams; class FakeQuantizeTransformation : public testing::WithParamInterface, @@ -27,8 +34,7 @@ public: protected: void SetUp() override; -private: - void validate(); + void Run() override; }; } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp index bd65adae44b..4f14e33a757 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp @@ -22,11 +22,11 @@ std::string FakeQuantizeTransformation::getTestCaseName(testing::TestParamInfoGetParam(); + FakeQuantizeTransformationParam testParams; + std::tie(netPrecision, inputShape, targetDevice, params, testParams) = this->GetParam(); - function = ngraph::builder::subgraph::FakeQuantizeFunction::getOriginal( + function = ngraph::builder::subgraph::FakeQuantizeFunction::getOriginalWithMaxPool( netPrecision, inputShape, - fakeQuantizeOnData); - - ngraph::pass::InitNodeInfo().run_on_function(function); - validate(); + testParams.fakequantize); } -void FakeQuantizeTransformation::validate() { - ngraph::element::Type precision; - ngraph::Shape inputShapes; - std::string targetDevice; - ngraph::pass::low_precision::LayerTransformation::Params params; - ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData; - std::tie(precision, inputShapes, targetDevice, params, fakeQuantizeOnData) = this->GetParam(); +void FakeQuantizeTransformation::Run() { + LayerTestsCommon::Run(); - auto transformations = getLowPrecisionTransformationsNGraph(params); - transformations.removeStandaloneCleanup(); - transformations.removeStandaloneCleanup(); - - const auto transformed = transformNGraph(params, transformations); - EXPECT_EQ(1ul, transformed->get_output_size()); - - const auto output = transformed->get_output_op(0); - const auto scaleShift = output->get_input_node_shared_ptr(0); - const std::string typeName = scaleShift->get_type_name(); - ASSERT_EQ("ScaleShiftIE", typeName); + const auto params = std::get<4>(GetParam()); + const auto actualPrecision = getRuntimePrecisionByType(params.layerName); + auto expectedPrecision = params.expectedKernelType; + if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) { + expectedPrecision = "FP16"; + } + EXPECT_EQ(actualPrecision, expectedPrecision); } TEST_P(FakeQuantizeTransformation, CompareWithRefImpl) { diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp index c0a7da296be..92dbdc1df53 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp @@ -23,6 +23,11 @@ public: const ngraph::Shape& inputShape, const FakeQuantizeOnData& fakeQuantizeOnData); + static std::shared_ptr getOriginalWithMaxPool( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const FakeQuantizeOnData& fakeQuantizeOnData); + static std::shared_ptr getReference( const ngraph::element::Type precision, const ngraph::Shape& inputShape, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp index 050dae69841..f9b802fad2d 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp @@ -20,6 +20,31 @@ namespace subgraph { using namespace ngraph::pass; +std::shared_ptr FakeQuantizeFunction::getOriginalWithMaxPool( + const ngraph::element::Type precision, + const ngraph::Shape& inputShape, + const FakeQuantizeOnData& fakeQuantizeOnData) { + const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); + input->set_friendly_name("input"); + + const auto fakeQuantize = ngraph::builder::makeFakeQuantize( + input, element::f32, fakeQuantizeOnData.quantizationLevel, fakeQuantizeOnData.constantShape, + fakeQuantizeOnData.inputLowValues, fakeQuantizeOnData.inputHighValues, fakeQuantizeOnData.outputLowValues, fakeQuantizeOnData.outputHighValues); + const auto maxPool = std::make_shared( + fakeQuantize, + Strides{ 1, 1 }, + Shape{ 1, 1 }, + Shape{ 0, 0 }, + Shape{ 2, 2 }); + + fakeQuantize->set_friendly_name("fakeQuantize"); + auto& rtInfo = fakeQuantize->get_rt_info(); + rtInfo["Variant::std::string"] = std::make_shared>("fakeQuantize"); + + ngraph::ResultVector results{ std::make_shared(maxPool) }; + return std::make_shared(results, ngraph::ParameterVector{ input }, "FakeQuantizeFunction"); +} + std::shared_ptr FakeQuantizeFunction::getOriginal( const ngraph::element::Type precision, const ngraph::Shape& inputShape,