Add transformation that aligns elementwise input ranks (#10125)

* [CPU] Add transformation that aligns elementwise input ranks * fix tests - check also aBcd16b format * add support for fq * add test for sqr diff * move to moc transformations * fix tests * align only for numpy autobroadcast type * fix fetching autob from fq * [CPU] Eltwise tests corrected & callback for CPU removed * remove transformation callback call * revert changes to getMKLDNNOutputMemoryFormats * remove comment * use single wrap_type Co-authored-by: Vladislav Golubev <vladislav.golubev@intel.com>
2022-02-15 10:47:54 +01:00 · 2022-02-15 10:47:54 +01:00 · a875f6ed9c
commit a875f6ed9c
parent 523adff17a
7 changed files with 294 additions and 16 deletions
--- a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp
+++ b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief transformation aligns elementwise constant inputs ranks with its output rank
+ */
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API AlignEltwiseInputRanks: public MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    AlignEltwiseInputRanks();
+};
+
+}  // namespace pass
+}  // namespace ngraph
--- a/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp
@ -0,0 +1,63 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/align_eltwise_input_ranks.hpp"
+
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::AlignEltwiseInputRanks, "AlignEltwiseInputRanks", 0);
+
+ngraph::pass::AlignEltwiseInputRanks::AlignEltwiseInputRanks() {
+    auto eltwise_pattern = pattern::wrap_type<opset8::SquaredDifference,
+                                              op::util::BinaryElementwiseComparison,
+                                              op::util::BinaryElementwiseLogical,
+                                              op::util::BinaryElementwiseArithmetic,
+                                              opset8::FakeQuantize>(pattern::has_static_rank());
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto node = m.get_match_root();
+
+        auto fq = as_type<opset8::FakeQuantize>(node.get());
+        if (fq) {
+            if (fq->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NUMPY) {
+                return false;
+            }
+        } else if (node->get_autob() != ngraph::op::AutoBroadcastType::NUMPY) {
+            return false;
+        }
+
+        // NormalizeL2 and Multiply can be fused to NormalizeIE.
+        // NormalizeIE has an attribute called channel_shared, which is set
+        // based on Multiply's constant input rank - it's true if the rank is 1.
+        // So we skip extending Multiply's constant input rank here.
+        if (ov::is_type<opset8::Multiply>(node)) {
+            auto inputs = node->input_values();
+            if (std::any_of(inputs.begin(), inputs.end(),
+                [] (const Output<Node>& input) -> bool { return ov::is_type<opset8::NormalizeL2>(input.get_node()); }))
+                return false;
+        }
+
+        const auto rank = node->get_output_partial_shape(0).size();
+
+        for (size_t i = 0; i < node->get_input_size(); i++) {
+            auto const_node = as_type<op::Constant>(node->get_input_node_ptr(i));
+            if (const_node == nullptr)
+                continue;
+            const auto& const_shape = const_node->get_shape();
+            auto diff = rank - const_shape.size();
+            if (diff > 0) {
+                Shape new_shape = const_shape;
+                new_shape.insert(new_shape.begin(), diff, 1);
+                auto new_const = std::make_shared<op::Constant>(*const_node, new_shape);
+                node->input(i).replace_source_output(new_const);
+            }
+        }
+
+        return false;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(eltwise_pattern, "AlignEltwiseInputRanks");
+    this->register_matcher(m, callback);
+}
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@ -58,6 +58,7 @@
 #include <transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp>
 #include <transformations/common_optimizations/ric_fusion.hpp>
 #include <transformations/common_optimizations/matmul_multiply_fusion.hpp>
+#include "transformations/common_optimizations/align_eltwise_input_ranks.hpp"

 NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);

@ -180,6 +181,8 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph

    manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();

+    manager.register_pass<ngraph::pass::AlignEltwiseInputRanks>();
+
    manager.register_pass<ngraph::pass::ConstantFolding>();

    manager.run_passes(f);
--- a/src/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp
+++ b/src/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp
@ -937,6 +937,8 @@ TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
        <layer id="14" name="broadcast1_data" precision="FP32" type="Const">
            <output>
                <port id="1">
+                    <dim>1</dim>
+                    <dim>1</dim>
                    <dim>112</dim>
                    <dim>1</dim>
                </port>
@ -955,6 +957,8 @@ TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
                    <dim>112</dim>
                </port>
                <port id="1">
+                    <dim>1</dim>
+                    <dim>1</dim>
                    <dim>112</dim>
                    <dim>1</dim>
                </port>
@ -1058,6 +1062,8 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
        <layer id="14" name="broadcast1_data" precision="FP32" type="Const">
            <output>
                <port id="1">
+                    <dim>1</dim>
+                    <dim>1</dim>
                    <dim>112</dim>
                    <dim>1</dim>
                </port>
@ -1076,6 +1082,8 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
                    <dim>112</dim>
                </port>
                <port id="1">
+                    <dim>1</dim>
+                    <dim>1</dim>
                    <dim>112</dim>
                    <dim>1</dim>
                </port>
@ -1873,6 +1881,7 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise2) {
                    <dim>1</dim>
                    <dim>1</dim>
                    <dim>1</dim>
+                    <dim>1</dim>
                </port>
            </output>
            <blobs>
@ -1892,6 +1901,7 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise2) {
                    <dim>1</dim>
                    <dim>1</dim>
                    <dim>1</dim>
+                    <dim>1</dim>
                </port>
            </input>
            <output>
--- a/src/tests/functional/inference_engine/transformations/common_optimizations/align_eltwise_input_ranks.cpp
+++ b/src/tests/functional/inference_engine/transformations/common_optimizations/align_eltwise_input_ranks.cpp
@ -0,0 +1,78 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/opsets/opset8.hpp>
+#include <transformations/common_optimizations/align_eltwise_input_ranks.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+
+using namespace testing;
+using namespace ngraph;
+
+
+using AlignEltwiseInputRanksParams = std::tuple<PartialShape, Shape, Shape, bool>;
+
+class AlignEltwiseInputRanksTest
+        : public testing::WithParamInterface<AlignEltwiseInputRanksParams>,
+          public TransformationTestsF {
+};
+
+TEST_P(AlignEltwiseInputRanksTest, FusionTest) {
+    auto params = GetParam();
+    const auto& input_shape = std::get<0>(params);
+    auto const_shape = std::get<1>(params);
+    auto expected_const_shape = std::get<2>(params);
+    bool can_align = std::get<3>(params);
+
+    {
+        auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
+        auto add = std::make_shared<opset8::Add>(data, op::Constant::create(element::f32, const_shape, {3}));
+        auto less = std::make_shared<opset8::Less>(data, op::Constant::create(element::f32, const_shape, {5}));
+        auto sqr_diff = std::make_shared<opset8::SquaredDifference>(data, op::Constant::create(element::f32, const_shape, {5}));
+        auto convert = std::make_shared<opset8::Convert>(data, element::boolean);
+        auto logical_or = std::make_shared<opset8::LogicalOr>(convert, op::Constant::create(element::boolean, const_shape, {false}));
+        auto low = op::Constant::create(element::f32, const_shape, {0});
+        auto high = op::Constant::create(element::f32, const_shape, {20});
+        auto fq = std::make_shared<opset8::FakeQuantize>(add, low, high, low, high, 256);
+        function = std::make_shared<Function>(NodeVector{less, logical_or, fq}, ParameterVector{data});
+
+        manager.register_pass<pass::AlignEltwiseInputRanks>();
+    }
+
+    if (can_align) {
+        auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
+        auto add = std::make_shared<opset8::Add>(data, op::Constant::create(element::f32, expected_const_shape, {3}));
+        auto less = std::make_shared<opset8::Less>(data, op::Constant::create(element::f32, expected_const_shape, {5}));
+        auto sqr_diff = std::make_shared<opset8::SquaredDifference>(data, op::Constant::create(element::f32, expected_const_shape, {5}));
+        auto convert = std::make_shared<opset8::Convert>(data, element::boolean);
+        auto logical_or = std::make_shared<opset8::LogicalOr>(convert, op::Constant::create(element::boolean, expected_const_shape, {false}));
+        auto low = op::Constant::create(element::f32, expected_const_shape, {0});
+        auto high = op::Constant::create(element::f32, expected_const_shape, {20});
+        auto fq = std::make_shared<opset8::FakeQuantize>(add, low, high, low, high, 256);
+        function_ref = std::make_shared<Function>(NodeVector{less, logical_or, fq}, ParameterVector{data});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+static std::vector<AlignEltwiseInputRanksParams> params = {
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {}, {1, 1, 1}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {1}, {1, 1, 1}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {1, 1}, {1, 1, 1}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {3}, {1, 1, 3}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {2, 3}, {1, 2, 3}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {}, {1, 1, 1, 1}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {3, 1, 1}, {1, 3, 1, 1}, true),
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {3}, {1, 1, 1, 3}, true),
+    AlignEltwiseInputRanksParams(Shape{1, 4, 10, 10}, {4, 1, 1}, {1, 4, 1, 1}, true),
+    // negative cases
+    AlignEltwiseInputRanksParams(PartialShape::dynamic(), {2, 3, 4}, {}, false),
+    AlignEltwiseInputRanksParams(Shape{}, {}, {}, false),
+    AlignEltwiseInputRanksParams(Shape{}, {1}, {}, false),
+    AlignEltwiseInputRanksParams(Shape{}, {2, 3, 4}, {}, false),
+};
+
+INSTANTIATE_TEST_SUITE_P(TransformationTests, AlignEltwiseInputRanksTest, ::testing::ValuesIn(params));
--- a/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
@ -434,54 +434,98 @@ std::vector<std::vector<ngraph::Shape>> inShapes_4D_1D = {
        {{1, 3, 3, 3}, {3}},
 };

-std::vector<CPUSpecificParams> cpuParams_4D_1D = {
-        CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
-        CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
-        CPUSpecificParams({nchw, x}, {nchw}, {}, {})
+std::vector<CPUSpecificParams> cpuParams_4D_1D_Constant_mode = {
+        CPUSpecificParams({nChw16c, nchw}, {nChw16c}, {}, {}),
+        CPUSpecificParams({nhwc, nhwc}, {nhwc}, {}, {}),
+        CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})
 };

-const auto params_4D_1D = ::testing::Combine(
+const auto params_4D_1D_constant_mode = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_1D)),
                ::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
-                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
                ::testing::Values(ElementType::f32),
                ::testing::Values(ElementType::f32),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
-        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D)),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Constant_mode)),
        ::testing::Values(emptyFusingSpec));

-INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D, EltwiseLayerCPUTest, params_4D_1D, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D_Constant, EltwiseLayerCPUTest, params_4D_1D_constant_mode, EltwiseLayerCPUTest::getTestCaseName);
+
+std::vector<CPUSpecificParams> cpuParams_4D_1D_Parameter_mode = {
+        CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
+        CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
+        CPUSpecificParams({nchw, x}, {nchw}, {}, {})
+};
+
+const auto params_4D_1D_parameter_mode = ::testing::Combine(
+        ::testing::Combine(
+                ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_1D)),
+                ::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
+                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
+                ::testing::ValuesIn(opTypes),
+                ::testing::ValuesIn(netType),
+                ::testing::Values(ElementType::f32),
+                ::testing::Values(ElementType::f32),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(additional_config)),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Parameter_mode)),
+        ::testing::Values(emptyFusingSpec));
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D_Parameter, EltwiseLayerCPUTest, params_4D_1D_parameter_mode, EltwiseLayerCPUTest::getTestCaseName);

 std::vector<std::vector<ngraph::Shape>> inShapes_5D_1D = {
        {{2, 17, 5, 4, 10}, {10}},
        {{1, 3, 3, 3, 3}, {3}},
 };

-std::vector<CPUSpecificParams> cpuParams_5D_1D = {
-        CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
-        CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
-        CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
+std::vector<CPUSpecificParams> cpuParams_5D_1D_constant = {
+        CPUSpecificParams({nCdhw16c, ncdhw}, {nCdhw16c}, {}, {}),
+        CPUSpecificParams({ndhwc, ndhwc}, {ndhwc}, {}, {}),
+        CPUSpecificParams({ncdhw, ncdhw}, {ncdhw}, {}, {})
 };

-const auto params_5D_1D = ::testing::Combine(
+const auto params_5D_1D_constant = ::testing::Combine(
        ::testing::Combine(
                ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D_1D)),
                ::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
-                ::testing::ValuesIn(secondaryInputTypes),
+                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
                ::testing::Values(ElementType::f32),
                ::testing::Values(ElementType::f32),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
-        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D)),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_constant)),
        ::testing::Values(emptyFusingSpec));

-INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D, EltwiseLayerCPUTest, params_5D_1D, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D_Constant, EltwiseLayerCPUTest, params_5D_1D_constant, EltwiseLayerCPUTest::getTestCaseName);
+
+std::vector<CPUSpecificParams> cpuParams_5D_1D_parameter = {
+        CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
+        CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
+        CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
+};
+
+const auto params_5D_1D_parameter = ::testing::Combine(
+        ::testing::Combine(
+                ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D_1D)),
+                ::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
+                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
+                ::testing::ValuesIn(opTypes),
+                ::testing::ValuesIn(netType),
+                ::testing::Values(ElementType::f32),
+                ::testing::Values(ElementType::f32),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(additional_config)),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_parameter)),
+        ::testing::Values(emptyFusingSpec));
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D_Parameter, EltwiseLayerCPUTest, params_5D_1D_parameter, EltwiseLayerCPUTest::getTestCaseName);


 std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesBinDyn = {
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/subgraph_with_blocked_format.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/subgraph_with_blocked_format.cpp
@ -0,0 +1,55 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include <ngraph/opsets/opset8.hpp>
+
+using namespace ngraph;
+
+namespace SubgraphTestsDefinitions {
+
+class SubgraphWithBlockedFormat : virtual public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        auto type = element::f32;
+        auto param = std::make_shared<opset8::Parameter>(type, Shape{1, 32, 64, 32});
+        auto weights = builder::makeConstant(type, Shape{32, 32, 1, 1}, std::vector<float>{}, true);
+        auto conv = std::make_shared<opset8::Convolution>(param, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
+        auto mean = std::make_shared<opset8::ReduceMean>(conv, opset8::Constant::create(element::i32, Shape{2}, {2, 3}), true);
+        auto reshape_before = std::make_shared<opset8::Reshape>(mean, opset8::Constant::create(element::i32, Shape{3}, {0, 16, -1}), true);
+        auto mvn = std::make_shared<opset8::MVN>(reshape_before, opset8::Constant::create(element::i32, Shape{1}, {2}),
+                false, 0.1, op::MVNEpsMode::INSIDE_SQRT);
+        auto reshape_after = std::make_shared<opset8::Reshape>(mvn, std::make_shared<opset8::ShapeOf>(mean), false);
+        auto mul = std::make_shared<opset8::Multiply>(reshape_after, builder::makeConstant(type, Shape{32, 1, 1}, std::vector<float>{}, true));
+        auto add = std::make_shared<opset8::Add>(mul, builder::makeConstant(type, Shape{32, 1, 1}, std::vector<float>{}, true));
+        auto sigmoid = std::make_shared<opset8::Sigmoid>(add);
+        auto mul2 = std::make_shared<opset8::Multiply>(conv, sigmoid);
+
+        function = std::make_shared<Function>(mul2, ParameterVector{param});
+    }
+
+    void TearDown() override {
+        auto runtime_function = executableNetwork.GetExecGraphInfo().getFunction();
+        int nodes_found = 0;
+        for (const auto& n : runtime_function->get_ordered_ops()) {
+            auto layer_type = n->get_rt_info().at(ExecGraphInfoSerialization::LAYER_TYPE).as<std::string>();
+            if (layer_type == "Subgraph") {
+                nodes_found++;
+                auto output_layout = n->get_rt_info().at(ExecGraphInfoSerialization::OUTPUT_LAYOUTS).as<std::string>();
+                ASSERT_TRUE(output_layout == "aBcd8b" || output_layout == "aBcd16b");
+            }
+        }
+        ASSERT_GT(nodes_found, 0);
+    }
+};
+
+TEST_F(SubgraphWithBlockedFormat, smoke_CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Run();
+}
+
+} // namespace SubgraphTestsDefinitions