From b98900859bb62ef442a8cef636e42fc95454f84e Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Tue, 17 Jan 2023 21:41:11 -0800 Subject: [PATCH] [GPU] Let reorder_inputs pass to use not only output_layout but also input layout for dynamic shape (#15037) Previously, reorder_input pass checked only output layouts of each node, with an assumption that the input/output ranks are same at that pass. However in dynamic shape using ngraph shape infer, there are cases input / output ranks differ. In such cases, the reorder_input pass inserts reorder to the format of current node's output_layout in the input of the current node, which causes error. Fixed the above behavior by applying set_preferred_input/output_layout. --- .../src/graph/graph_optimizer/reorder_inputs.cpp | 6 ++++-- .../intel_gpu/src/graph/impls/ocl/gather.cpp | 14 ++++++++++++++ .../intel_gpu/src/graph/layout_optimizer.cpp | 11 +++++++++++ .../kernels/gather/gather_kernel_ref.cpp | 7 ++++--- .../gpu/single_layer_tests/dynamic/gather.cpp | 7 +++++-- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 5992927df9b..9bd167eb70a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -113,8 +113,9 @@ struct travel_direction_wrapper { static format get_target_output_format(layout_optimizer& lo, const std::map& fmt_map, program_node *node, program_node *next) { auto user_idx = node->get_user_index(*next); + bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); // 1. Check selected preferred_output_format - if (lo.get_optimization_attributes().use_onednn_impls) { + if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) { // If onednn is not used, need to ignore get_preferred_output_fmt result as it is from onednn auto ret = node->get_preferred_output_fmt(user_idx); @@ -133,8 +134,9 @@ static format get_target_output_format(layout_optimizer& lo, const std::map& fmt_map, program_node *node, program_node *prev) { auto dep_idx = node->get_dependency_index(*prev); + bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer); // 1. Check selected preferred_input_format - if (lo.get_optimization_attributes().use_onednn_impls) { + if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) { // If onednn is not used, need to ignore get_preferred_input_fmt result as it is from onednn auto ret = node->get_preferred_input_fmt(dep_idx); if (ret != format::any) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp index c5dab2f9bb1..b251ebc4e6d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather.cpp @@ -81,7 +81,21 @@ public: params.axis = convert_axis(primitive->axis, input_layout.get_rank()); params.batch_dim = size_t(primitive->batch_dim); params.support_neg_ind = primitive->support_neg_ind; + auto output_layout = impl_param.get_output_layout(0); + auto in_rank = impl_param.get_input_layout(0).get_rank(); + auto out_rank = impl_param.get_output_layout(0).get_rank(); + if (in_rank > 4 && in_rank > out_rank) { // if in_rank <= 4, the dims are to be adjusted to 4 by convert_data_tensor + auto output_shape = impl_param.get_output_layout(0).get_partial_shape(); + ov::PartialShape new_output_shape({output_shape[0], output_shape[1]}); + for (size_t i = 0; i < in_rank - out_rank; ++i) + new_output_shape.push_back(1); + for (size_t i = 2; i < out_rank; ++i) { + new_output_shape.push_back(output_shape[i]); + } + output_layout = layout(new_output_shape, impl_param.get_output_layout(0).data_type, format::get_default_format(new_output_shape.size())); + } + params.outputs[0] = convert_data_tensor(output_layout); params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index da02e5389d3..5676f8717f2 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1673,6 +1673,14 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (allow_new_shape_infer) { if (node.is_type()) return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank()); + + // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa + auto out_lay_rank = node.get_output_layout(false).get_rank(); + auto in_lay_rank = node.get_dependencies().size() > 0 ? node.get_dependency(0).get_output_layout(false).get_rank() : out_lay_rank; + if (in_lay_rank != out_lay_rank) + node.set_preferred_input_fmt(0, get_preferred_format(node.get_dependency(0))); + + // shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is for (auto u : node.get_users()) { for (auto dep_idx : u->get_shape_infer_dependencies()) { if (u->get_dependencies().size() <= dep_idx) @@ -1828,6 +1836,9 @@ format layout_optimizer::get_preferred_format(program_node& node) { expected = format::get_default_format(node.get_input_layouts()[0].get_rank(), false, false); } + if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) { + node.set_preferred_output_fmt(0, expected); + } return expected; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp index 02878916ad9..c5dc9a2db62 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp @@ -144,9 +144,10 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a idx_order[i] = zero_val; // Fix size to inputs[0] dims size - for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++) - idx_order.pop_back(); - + if (params.outputs[0].GetDims().size() > params.inputs[0].GetDims().size()) { + for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++) + idx_order.pop_back(); + } idx_order[axis] = input_axis_index_macro; return GetOrderString(idx_order); diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp index acd18a2b67c..4dbd78c6fd0 100644 --- a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp +++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/gather.cpp @@ -184,13 +184,16 @@ const std::vector dynamicInputShapeConstTargetShape = { ov::test::InputShape(ov::PartialShape({}), {{3, 4, 3}}), 3, 2 }, - #if 0 // TODO (99432) 5D=>4D test does not work properly because of the current reorder_impl logic does not work as expected. { ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{2, 4, 2, 2, 3}, {2, 4, 8, 9, 10}}), ov::test::InputShape(ov::PartialShape({}), {{2, 4}}), 2, 2 }, - #endif + { + ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1, -1}), {{2, 4, 2, 3, 1, 3}, {2, 4, 7, 8, 9, 10}}), + ov::test::InputShape(ov::PartialShape({}), {{2, 4}}), + 2, 2 + }, }; INSTANTIATE_TEST_SUITE_P(smoke_dynamic_input_shapes_const_target_shapes, GatherGPUTest,