From cc19ff74f10bb87be3cadcfb9956a12415ba0c7d Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Tue, 1 Feb 2022 08:10:27 +0300 Subject: [PATCH] [LPT] [GPU] Multiply to group convolution (#9971) * [LPT] MultiplyToGroupConvolution optimization for GPU * [LPT] MatMul in FP32 in GPU workarround support * [LPT] GPU plugin tests --- .../low_precision/layer_transformation.hpp | 9 +++- .../include/low_precision/network_helper.hpp | 2 +- .../src/layer_transformation.cpp | 1 + .../src/network_helper.cpp | 2 +- .../src/reshape.cpp | 14 ++++++- .../src/plugin/transformations_pipeline.cpp | 36 +++++++++++++++- ...ly_to_group_convolution_transformation.cpp | 15 ++++--- ...ly_to_group_convolution_transformation.cpp | 41 +++++++++++-------- ...ly_to_group_convolution_transformation.hpp | 1 + ...ly_to_group_convolution_transformation.cpp | 6 ++- ...multiply_to_group_convolution_function.hpp | 3 +- ...multiply_to_group_convolution_function.cpp | 7 +++- 12 files changed, 102 insertions(+), 35 deletions(-) diff --git a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp index 6293351785e..0fb0725209f 100644 --- a/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -239,9 +239,11 @@ public: public: Params( const bool updatePrecisions = true, - element::Type deqPrecision = element::f32) : + element::Type deqPrecision = element::f32, + const bool reshapeIgnorePerTensorQuantizationCheck = false) : updatePrecisions(updatePrecisions), - deqPrecision(deqPrecision) {} + deqPrecision(deqPrecision), + reshapeIgnorePerTensorQuantizationCheck(reshapeIgnorePerTensorQuantizationCheck) {} Params& setUpdatePrecisions(const bool updatePrecisions) { this->updatePrecisions = updatePrecisions; @@ -255,6 +257,8 @@ public: bool updatePrecisions; element::Type deqPrecision; + // to support GPU workarround to keep Reshape and MatMul in FP32 + bool reshapeIgnorePerTensorQuantizationCheck; }; class PrecisionDetails { @@ -322,6 +326,7 @@ protected: bool updatePrecisions; element::Type deqPrecision; + bool reshapeIgnorePerTensorQuantizationCheck; static constexpr char originalLayerPostfix[] = "_original"; TransformationContext* context; diff --git a/src/common/low_precision_transformations/include/low_precision/network_helper.hpp b/src/common/low_precision_transformations/include/low_precision/network_helper.hpp index 289c10a57d1..b8adb428efa 100644 --- a/src/common/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/src/common/low_precision_transformations/include/low_precision/network_helper.hpp @@ -87,7 +87,7 @@ public: static std::shared_ptr toScalar(std::shared_ptr constant); - static std::shared_ptr getConstantInput(const std::shared_ptr& node, const bool convertIsExpected = false); + static std::shared_ptr getConstantInput(const std::shared_ptr& node, const bool convertIsExpected = false); static std::vector updateReshapeValues( const Shape& elementwiseConstantShape, diff --git a/src/common/low_precision_transformations/src/layer_transformation.cpp b/src/common/low_precision_transformations/src/layer_transformation.cpp index b8187ddf58a..6deb6c25eca 100644 --- a/src/common/low_precision_transformations/src/layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/layer_transformation.cpp @@ -30,6 +30,7 @@ std::mutex LayerTransformation::defaultPrecisionsMutex; LayerTransformation::LayerTransformation(const Params& params) : updatePrecisions(params.updatePrecisions), deqPrecision(params.deqPrecision), + reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck), context(nullptr) {} void LayerTransformation::setContext(TransformationContext* context) noexcept { diff --git a/src/common/low_precision_transformations/src/network_helper.cpp b/src/common/low_precision_transformations/src/network_helper.cpp index 7b6b55b0987..950f856e373 100644 --- a/src/common/low_precision_transformations/src/network_helper.cpp +++ b/src/common/low_precision_transformations/src/network_helper.cpp @@ -373,7 +373,7 @@ std::shared_ptr NetworkHelper::toScalar(std::shared_ptr(constant->get_element_type(), Shape{}, constant->get_data_ptr()); } -std::shared_ptr NetworkHelper::getConstantInput(const std::shared_ptr& node, const bool convertIsExpected) { +std::shared_ptr NetworkHelper::getConstantInput(const std::shared_ptr& node, const bool convertIsExpected) { std::shared_ptr parent = ov::as_type_ptr(node->input_value(0).get_node_shared_ptr()); if (parent != nullptr) { return parent; diff --git a/src/common/low_precision_transformations/src/reshape.cpp b/src/common/low_precision_transformations/src/reshape.cpp index 74caf10a173..24fb29df1bb 100644 --- a/src/common/low_precision_transformations/src/reshape.cpp +++ b/src/common/low_precision_transformations/src/reshape.cpp @@ -195,8 +195,18 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex return false; } - if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) && - ((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) { + bool ignorePerTensorQuantizationCheck = false; + if (reshapeIgnorePerTensorQuantizationCheck) { + const auto inputs = op->get_output_target_inputs(0); + if (inputs.size() == 1ul) { + const auto consumer = inputs.begin()->get_node(); + ignorePerTensorQuantizationCheck = ngraph::as_type(consumer) != nullptr; + } + } + + if (!ignorePerTensorQuantizationCheck && + (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) && + ((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) { return true; } diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 135fd29a355..76da6e52693 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -409,13 +409,47 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); }); + if (!use_onednn) { lptPassConfig->set_callback([](const_node_ptr& node) -> bool { return MatMulTransformation::is3DTensorOnActivations(node); }); } - lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptPassConfig->set_callback([&](const_node_ptr& node) -> bool { + // disable MultiplyToGroupConvolution if Multiply with Constant can be fused + + const auto dequantization = NetworkHelper::getDequantization(node, 0, true); + std::shared_ptr parent = dequantization.empty() ? nullptr : dequantization.data.get_node()->shared_from_this(); + if (parent == nullptr) { + const auto constantNode = NetworkHelper::getConstantInput(node); + const auto constant = constantNode == nullptr ? nullptr : ngraph::as_type_ptr(constantNode); + if (constant != nullptr) { + auto parent = node->get_input_node_shared_ptr(0); + if (parent == constant) { + parent = node->get_input_node_shared_ptr(1); + } + } + } + + if (parent != nullptr) { + const auto parentHasOneConsumer = parent->get_output_target_inputs(0).size() == 1ul; + if (parentHasOneConsumer) { + return true; + } + } + + // disable MultiplyToGroupConvolution for Multiply with scalar + + if (MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node)) { + return true; + } + + return false; + }); + + auto params = LayerTransformation::Params(true, element::f32, true); + lptManager.register_pass(supportedPrecisions, perTensorQuantization, params); lptManager.run_passes(func); } diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp index 8d139573c86..8bebd1f8ad9 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp @@ -23,35 +23,40 @@ const std::vector params = { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "U8" + "U8", + true }, // Multiply with scalar is not transformed to GroupConvolution { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, {{4.f}, element::f32, Shape{1, 1, 1, 1}}, "output/GroupConvolution", - "" + "", + true }, // Multiply with scalar is not transformed to GroupConvolution { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, {{4.f}, element::f32, Shape{}}, "output/GroupConvolution", - "" + "", + true }, // Zero point { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "U8" + "U8", + true }, // Zero point { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "U8" + "U8", + true } }; diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp index dbd50748fba..8a4d80e8bb5 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution_transformation.cpp @@ -23,36 +23,41 @@ const std::vector params = { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "U8" - }, - // Multiply with scalar is transformed to GroupConvolution - { - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, - {{4.f}, element::f32, Shape{1, 1, 1, 1}}, - "output/GroupConvolution", - "U8" - }, - // multiply with scalar is transformed to groupconvolution - { - { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, - {{4.f}, element::f32, Shape{}}, - "output/GroupConvolution", - "U8" + "U8", + false }, // zero point { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "I8" + "I8", + false }, // zero point { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} }, {{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}}, "output/GroupConvolution", - "U8" - } + "U8", + false + }, + + // Multiply => GroupConvolution optimizations + { + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + {{3.f}, element::f32, Shape{1, 1, 1, 1}}, + "output/GroupConvolution", + "", + false + }, + { + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + {{3.f}, element::f32, Shape{1, 1, 1, 1}}, + "output/GroupConvolution", + "", + true + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation, diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp index 14cf35c8a45..8356f70d283 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/multiply_to_group_convolution_transformation.hpp @@ -22,6 +22,7 @@ public: builder::subgraph::Constant constant; std::string layerName; std::string expectedKernelType; + bool parentHasOneConsumer; }; typedef std::tuple < diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp index 77711697dc1..4f656e76c5f 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/multiply_to_group_convolution_transformation.cpp @@ -34,7 +34,8 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(const test param.fqOnData << "_" << param.constant << "_" << param.layerName << "_" << - param.expectedKernelType; + param.expectedKernelType << "_" << + param.parentHasOneConsumer; return result.str(); } @@ -48,7 +49,8 @@ void MultiplyToGroupConvolutionTransformation::SetUp() { precision, shape, param.fqOnData, - param.constant); + param.constant, + param.parentHasOneConsumer); } void MultiplyToGroupConvolutionTransformation::Run() { diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp index 47031837c40..561f79e333d 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/multiply_to_group_convolution_function.hpp @@ -29,7 +29,8 @@ public: const ngraph::element::Type precision, const ngraph::PartialShape& inputShape, const FakeQuantizeOnData& fqOnData, - const Constant& constant); + const Constant& constant, + const bool parentHasOneConsumer = true); static std::shared_ptr getReference( const ngraph::PartialShape& inputShape, diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp index 5a02119ea6f..9b3e4d1eed4 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/multiply_to_group_convolution_function.cpp @@ -39,7 +39,8 @@ std::shared_ptr MultiplyToGroupConvolutionFunction::getOrigina const ngraph::element::Type precision, const ngraph::PartialShape& inputShape, const FakeQuantizeOnData& fqOnData, - const Constant& constant) { + const Constant& constant, + const bool parentHasOneConsumer) { const auto input = std::make_shared(precision, inputShape); const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData); @@ -58,7 +59,9 @@ std::shared_ptr MultiplyToGroupConvolutionFunction::getOrigina std::make_shared(constant.outPrecision, constant.shape, constant.values)); multiply->set_friendly_name("output"); - ngraph::ResultVector results{std::make_shared(multiply)}; + ngraph::ResultVector results = parentHasOneConsumer ? + ngraph::ResultVector{std::make_shared(multiply)} : + ngraph::ResultVector{std::make_shared(maxPool), std::make_shared(multiply)}; return std::make_shared(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction"); }