[GPU] Let reorder_inputs pass to use not only output_layout but also input layout for dynamic shape (#15037)

Previously, reorder_input pass checked only output layouts of each node, with an assumption that the input/output ranks are same at that pass. However in dynamic shape using ngraph shape infer, there are cases input / output ranks differ. In such cases, the reorder_input pass inserts reorder to the format of current node's output_layout in the input of the current node, which causes error. Fixed the above behavior by applying set_preferred_input/output_layout.
2023-01-17 21:41:11 -08:00 · 2023-01-17 21:41:11 -08:00 · b98900859b
commit b98900859b
parent fcd95f2169
5 changed files with 38 additions and 7 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@ -113,8 +113,9 @@ struct travel_direction_wrapper<direction_e::backwards> {
 static format get_target_output_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *next) {
    auto user_idx = node->get_user_index(*next);

+    bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
    // 1. Check selected preferred_output_format
-    if (lo.get_optimization_attributes().use_onednn_impls) {
+    if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) {
        // If onednn is not used, need to ignore get_preferred_output_fmt result as it is from onednn
        auto ret = node->get_preferred_output_fmt(user_idx);

@ -133,8 +134,9 @@ static format get_target_output_format(layout_optimizer& lo, const std::map<prog
 static format get_target_input_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *prev) {
    auto dep_idx = node->get_dependency_index(*prev);

+    bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
    // 1. Check selected preferred_input_format
-    if (lo.get_optimization_attributes().use_onednn_impls) {
+    if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) {
        // If onednn is not used, need to ignore get_preferred_input_fmt result as it is from onednn
        auto ret = node->get_preferred_input_fmt(dep_idx);
        if (ret != format::any)
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp
@ -81,7 +81,21 @@ public:
        params.axis = convert_axis(primitive->axis, input_layout.get_rank());
        params.batch_dim = size_t(primitive->batch_dim);
        params.support_neg_ind = primitive->support_neg_ind;
+        auto output_layout = impl_param.get_output_layout(0);
+        auto in_rank = impl_param.get_input_layout(0).get_rank();
+        auto out_rank = impl_param.get_output_layout(0).get_rank();
+        if (in_rank > 4 && in_rank > out_rank) { // if in_rank <= 4, the dims are to be adjusted to 4 by convert_data_tensor
+            auto output_shape = impl_param.get_output_layout(0).get_partial_shape();
+            ov::PartialShape new_output_shape({output_shape[0], output_shape[1]});
+            for (size_t i = 0; i < in_rank - out_rank; ++i)
+                new_output_shape.push_back(1);

+            for (size_t i = 2; i < out_rank; ++i) {
+                new_output_shape.push_back(output_shape[i]);
+            }
+            output_layout = layout(new_output_shape, impl_param.get_output_layout(0).data_type, format::get_default_format(new_output_shape.size()));
+        }
+        params.outputs[0] = convert_data_tensor(output_layout);
        params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
        return {params, optional_params};
    }
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -1673,6 +1673,14 @@ format layout_optimizer::get_preferred_format(program_node& node) {
    if (allow_new_shape_infer) {
        if (node.is_type<shape_of>())
            return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
+
+        // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa
+        auto out_lay_rank = node.get_output_layout(false).get_rank();
+        auto in_lay_rank = node.get_dependencies().size() > 0 ? node.get_dependency(0).get_output_layout(false).get_rank() : out_lay_rank;
+        if (in_lay_rank != out_lay_rank)
+            node.set_preferred_input_fmt(0, get_preferred_format(node.get_dependency(0)));
+
+        // shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is
        for (auto u : node.get_users()) {
            for (auto dep_idx : u->get_shape_infer_dependencies()) {
                if (u->get_dependencies().size() <= dep_idx)
@ -1828,6 +1836,9 @@ format layout_optimizer::get_preferred_format(program_node& node) {
        expected = format::get_default_format(node.get_input_layouts()[0].get_rank(), false, false);
    }

+    if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) {
+        node.set_preferred_output_fmt(0, expected);
+    }
    return expected;
 }

--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp
@ -144,9 +144,10 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a
        idx_order[i] = zero_val;

    // Fix size to inputs[0] dims size
-    for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++)
-        idx_order.pop_back();
-
+    if (params.outputs[0].GetDims().size() > params.inputs[0].GetDims().size()) {
+        for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++)
+            idx_order.pop_back();
+    }
    idx_order[axis] = input_axis_index_macro;

    return GetOrderString(idx_order);
--- a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp
+++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp
@ -184,13 +184,16 @@ const std::vector<GatherShapeParams> dynamicInputShapeConstTargetShape = {
        ov::test::InputShape(ov::PartialShape({}), {{3, 4, 3}}),
        3, 2
    },
-    #if 0 // TODO (99432) 5D=>4D test does not work properly because of the current reorder_impl logic does not work as expected.
    {
        ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{2, 4, 2, 2, 3}, {2, 4, 8, 9, 10}}),
        ov::test::InputShape(ov::PartialShape({}), {{2, 4}}),
        2, 2
    },
-    #endif
+    {
+        ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1, -1}), {{2, 4, 2, 3, 1, 3}, {2, 4, 7, 8, 9, 10}}),
+        ov::test::InputShape(ov::PartialShape({}), {{2, 4}}),
+        2, 2
+    },
 };

 INSTANTIATE_TEST_SUITE_P(smoke_dynamic_input_shapes_const_target_shapes, GatherGPUTest,