Add TransposeMatMul transformation to MOC (#20460)

Transformation fuses Transpose on first or second MatMul's input and sets MatMul's transpose_a/transpose_b accordingly. TransposeMatMul is already part of SmartReshape, but it can be added to MOCTransformations as well so native models that are don't use reshape can benefit from that. Ticket: CVS-118908
2023-10-24 14:40:36 +02:00 · 2023-10-24 14:40:36 +02:00 · b7406247ff
commit b7406247ff
parent 6395fc672d
7 changed files with 125 additions and 1 deletions
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@ -87,6 +87,7 @@
 #include "transformations/op_conversions/convert_ti_to_sequences.hpp"
 #include "transformations/resolve_names_collisions.hpp"
 #include "transformations/smart_reshape/lstm_states_broadcast.hpp"
+#include "transformations/smart_reshape/matmul_sr.hpp"
 #include "transformations/smart_reshape/reshape_sinking.hpp"

 bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>& f) {
@ -166,11 +167,13 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>

    auto transpose_sinking = manager.register_pass<ov::pass::GraphRewrite>();
    ADD_MATCHER(transpose_sinking, TransposeSinking)
-
    // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking,
    // because it replaces pattern that may contain Transposes which must be optimized before
    // the transformation and it also inserts Transpose that can be optimized by TransposeSinking
    ADD_MATCHER(transpose_sinking, SplitSqueezeConcatFusion)
+
+    REGISTER_PASS(manager, TransposeMatMul)
+
    auto eliminations = manager.register_pass<ov::pass::GraphRewrite>();
    ADD_MATCHER(eliminations, EliminateUnsqueezeGather)
    ADD_MATCHER(eliminations, NopElimination, m_use_shapes)
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
@ -0,0 +1,14 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/transpose_matmul_fusion.hpp"
+
+using namespace ov::test;
+
+namespace {
+INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion,
+                         ::testing::Values(ov::test::utils::DEVICE_CPU),
+                         TransposeMatMulFusion::getTestCaseName);
+
+}  // namespace
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
@ -102,5 +102,7 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*SplitConvTest.CompareWithRefImpl.*IS=\(1.(128|256)\).*IC=4.*OC=4.*configItem=GNA_DEVICE_MODE_GNA_SW_FP32)",
        // TODO: Issue: 114149
        R"(.*smoke_Decompose2DConv.*)",
+        // TODO: Issue: 123306
+        R"(smoke_convert_matmul_to_fc/ConvertMatmulToFcWithTransposesPass.CompareWithRefImpl/netPRC=FP(32|16)_targetDevice=GNA__configItem=GNA_COMPACT_MODE_NO_configItem=GNA_DEVICE_MODE_GNA_SW_(FP32|EXACT)_IS=\(8.*)",
    };
 }
--- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
+++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/transpose_matmul_fusion.cpp
@ -0,0 +1,14 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/transpose_matmul_fusion.hpp"
+
+using namespace ov::test;
+
+namespace {
+INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion, TransposeMatMulFusion,
+                         ::testing::Values(ov::test::utils::DEVICE_GPU),
+                         TransposeMatMulFusion::getTestCaseName);
+
+}  // namespace
--- a/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp
+++ b/src/tests/functional/plugin/shared/include/subgraph_tests/transpose_matmul_fusion.hpp
@ -0,0 +1,17 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp"
+
+namespace ov {
+namespace test {
+
+TEST_P(TransposeMatMulFusion, CompareWithRefs){
+    run();
+};
+
+}  // namespace test
+}  // namespace ov
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/transpose_matmul_fusion.hpp
@ -0,0 +1,23 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+namespace ov {
+namespace test {
+
+class TransposeMatMulFusion : public testing::WithParamInterface<const char*>,
+                                   public ov::test::SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<const char*> &obj);
+
+protected:
+    void SetUp() override;
+    void TearDown() override;
+};
+
+}  // namespace test
+}  // namespace ov
--- a/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/transpose_matmul_fusion.cpp
@ -0,0 +1,51 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/subgraph/transpose_matmul_fusion.hpp"
+
+namespace ov {
+namespace test {
+
+std::string TransposeMatMulFusion::getTestCaseName(const testing::TestParamInfo<const char*> &obj) {
+    return "device=" + std::string(obj.param);
+}
+
+void TransposeMatMulFusion::SetUp() {
+    targetDevice = GetParam();
+
+    ov::PartialShape shape1{1, 3, 128, 64};
+    ov::PartialShape shape2{1, 3, 64, 128};
+
+    InputShape input_shape1 = {shape1, {Shape{1, 3, 128, 64}}};
+    InputShape input_shape2 = {shape2, {Shape{1, 3, 64, 128}}};
+    init_input_shapes({input_shape1, input_shape2});
+
+    const auto param1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, shape1);
+    const auto param2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, shape2);
+    const auto order = ov::op::v0::Constant::create(ov::element::i32, Shape{4}, {0, 1, 3, 2});
+    const auto transpose1 = std::make_shared<ov::op::v1::Transpose>(param1, order);
+    const auto transpose2 = std::make_shared<ov::op::v1::Transpose>(param2, order);
+    const auto matmul = std::make_shared<ov::op::v0::MatMul>(transpose1, transpose2, false, false);
+    const auto constant = op::v0::Constant::create(element::f32, Shape{1}, {9});
+    const auto mul = std::make_shared<ov::op::v1::Multiply>(matmul, constant);
+    function = std::make_shared<ov::Model>(mul, ov::ParameterVector{param1, param2});
+}
+
+void TransposeMatMulFusion::TearDown() {
+    const auto model = compiledModel.get_runtime_model();
+
+    int num_ops = 0;
+    for (const auto& node : model->get_ordered_ops()) {
+        const auto& rt_info = node->get_rt_info();
+        const auto layer_type = rt_info.find("layerType")->second.as<std::string>();
+        if (layer_type != "Reorder" && layer_type != "Const")
+            num_ops++;
+        EXPECT_NE(layer_type, "Transpose");
+        EXPECT_NE(layer_type, "Permute");
+    }
+    ASSERT_EQ(num_ops, 5); // two Inputs, one Eltwise, one MatMul and one Output
+}
+
+}  // namespace test
+}  // namespace ov