diff --git a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp index a673d623e14..f0ccf5ba925 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "intel_gpu/plugin/program.hpp" #include "intel_gpu/plugin/common_utils.hpp" @@ -166,6 +168,63 @@ static void CreateMatMulOp(Program& p, const std::shared_ptrget_transpose_a(); + auto transB = op->get_transpose_b(); + + std::array inputShapes{ + op->get_input_partial_shape(0), + op->get_input_partial_shape(1) + }; + + auto canTransposeInputs = [] (const std::array& shapes, bool transA, bool transB) -> bool { + if (!transA && !transB) + return false; + if (shapes[0].rank().is_dynamic() || + shapes[1].rank().is_dynamic()) + return false; + + // don't transpose inputs if they're aligned to 16 + bool inputsAligned = std::all_of(shapes[0].rbegin(), shapes[0].rbegin() + 2, + [] (const ngraph::Dimension& dim) { return dim.is_static() && dim.get_length() % 16 == 0; }) && + std::all_of(shapes[1].rbegin(), shapes[1].rbegin() + 2, + [] (const ngraph::Dimension& dim) { return dim.is_static() && dim.get_length() % 16 == 0; }); + if (inputsAligned) + return false; + + return std::all_of(shapes[0].rbegin(), shapes[0].rbegin() + 2, + [] (const ngraph::Dimension& dim) { return dim.is_static() && dim.get_length() >= 64; }) && + std::all_of(shapes[1].rbegin(), shapes[1].rbegin() + 2, + [] (const ngraph::Dimension& dim) { return dim.is_static() && dim.get_length() >= 64; }); + }; + + auto transposeInput = [&layerName] (Program& p, const std::shared_ptr& op, const ngraph::PartialShape& shape, + const std::string& suffix, const cldnn::primitive_id& primitiveId) -> std::string { + std::vector transposeOrder(shape.size()); + std::iota(transposeOrder.begin(), transposeOrder.end(), 0); + for (auto o = transposeOrder.size(); o < 4; o++) + transposeOrder.push_back((uint16_t)o); + std::swap(*(transposeOrder.end() - 1), *(transposeOrder.end() - 2)); + + auto permuteName = op->get_friendly_name() + suffix; + auto permutePrim = cldnn::permute(permuteName, + primitiveId, + transposeOrder); + p.add_primitive(*op, permutePrim); + return permuteName; + }; + + if (canTransposeInputs(inputShapes, transA, transB)) { + if (transA) { + inputPrimitives[0] = transposeInput(p, op, inputShapes[0], "/transpose_a", inputPrimitives[0]); + transA = false; + } + + if (transB) { + inputPrimitives[1] = transposeInput(p, op, inputShapes[1], "/transpose_b", inputPrimitives[1]); + transB = false; + } + } + auto gemmPrim = cldnn::gemm(layerName, inputPrimitives, cldnn::element_type_to_data_type(op->get_output_element_type(0)), diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp index 8d6b404e3fb..859d65ace9e 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/mat_mul.cpp @@ -33,6 +33,9 @@ const std::vector shapeRelatedParams = { { { {2, 1, 2, 3}, true }, { {3, 4, 2}, true } }, { { {3}, false }, { {2, 2, 3, 1}, false } }, { { {2, 2, 1, 3}, false }, { {3}, false } }, + { { {65, 100}, false }, { {73, 100}, true } }, + { { {100, 65}, true }, { {100, 73}, false } }, + { { {100, 65}, true }, { {73, 100}, true } }, { { {1, 5}, false }, { {5, 1}, false } }, { { {5, 1}, true }, { {5, 1}, false } }, { { {1, 5}, false }, { {1, 5}, true } },