[GPU] Let reorder_inputs pass to use not only output_layout but also input layout for dynamic shape (#15037)

Previously, reorder_input pass checked only output layouts of each node, with an assumption that the input/output ranks are same at that pass.
However in dynamic shape using ngraph shape infer, there are cases input / output ranks differ. In such cases, the reorder_input pass inserts reorder to the format of current node's output_layout in the input of the current node, which causes error.
Fixed the above behavior by applying set_preferred_input/output_layout.
This commit is contained in:
Taylor Yeonbok Lee 2023-01-17 21:41:11 -08:00 committed by GitHub
parent fcd95f2169
commit b98900859b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 7 deletions

View File

@ -113,8 +113,9 @@ struct travel_direction_wrapper<direction_e::backwards> {
static format get_target_output_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *next) { static format get_target_output_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *next) {
auto user_idx = node->get_user_index(*next); auto user_idx = node->get_user_index(*next);
bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
// 1. Check selected preferred_output_format // 1. Check selected preferred_output_format
if (lo.get_optimization_attributes().use_onednn_impls) { if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) {
// If onednn is not used, need to ignore get_preferred_output_fmt result as it is from onednn // If onednn is not used, need to ignore get_preferred_output_fmt result as it is from onednn
auto ret = node->get_preferred_output_fmt(user_idx); auto ret = node->get_preferred_output_fmt(user_idx);
@ -133,8 +134,9 @@ static format get_target_output_format(layout_optimizer& lo, const std::map<prog
static format get_target_input_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *prev) { static format get_target_input_format(layout_optimizer& lo, const std::map<program_node*, format::type>& fmt_map, program_node *node, program_node *prev) {
auto dep_idx = node->get_dependency_index(*prev); auto dep_idx = node->get_dependency_index(*prev);
bool allow_new_shape_infer = node->get_program().get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
// 1. Check selected preferred_input_format // 1. Check selected preferred_input_format
if (lo.get_optimization_attributes().use_onednn_impls) { if (lo.get_optimization_attributes().use_onednn_impls || allow_new_shape_infer) {
// If onednn is not used, need to ignore get_preferred_input_fmt result as it is from onednn // If onednn is not used, need to ignore get_preferred_input_fmt result as it is from onednn
auto ret = node->get_preferred_input_fmt(dep_idx); auto ret = node->get_preferred_input_fmt(dep_idx);
if (ret != format::any) if (ret != format::any)

View File

@ -81,7 +81,21 @@ public:
params.axis = convert_axis(primitive->axis, input_layout.get_rank()); params.axis = convert_axis(primitive->axis, input_layout.get_rank());
params.batch_dim = size_t(primitive->batch_dim); params.batch_dim = size_t(primitive->batch_dim);
params.support_neg_ind = primitive->support_neg_ind; params.support_neg_ind = primitive->support_neg_ind;
auto output_layout = impl_param.get_output_layout(0);
auto in_rank = impl_param.get_input_layout(0).get_rank();
auto out_rank = impl_param.get_output_layout(0).get_rank();
if (in_rank > 4 && in_rank > out_rank) { // if in_rank <= 4, the dims are to be adjusted to 4 by convert_data_tensor
auto output_shape = impl_param.get_output_layout(0).get_partial_shape();
ov::PartialShape new_output_shape({output_shape[0], output_shape[1]});
for (size_t i = 0; i < in_rank - out_rank; ++i)
new_output_shape.push_back(1);
for (size_t i = 2; i < out_rank; ++i) {
new_output_shape.push_back(output_shape[i]);
}
output_layout = layout(new_output_shape, impl_param.get_output_layout(0).data_type, format::get_default_format(new_output_shape.size()));
}
params.outputs[0] = convert_data_tensor(output_layout);
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
return {params, optional_params}; return {params, optional_params};
} }

View File

@ -1673,6 +1673,14 @@ format layout_optimizer::get_preferred_format(program_node& node) {
if (allow_new_shape_infer) { if (allow_new_shape_infer) {
if (node.is_type<shape_of>()) if (node.is_type<shape_of>())
return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank()); return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
// Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa
auto out_lay_rank = node.get_output_layout(false).get_rank();
auto in_lay_rank = node.get_dependencies().size() > 0 ? node.get_dependency(0).get_output_layout(false).get_rank() : out_lay_rank;
if (in_lay_rank != out_lay_rank)
node.set_preferred_input_fmt(0, get_preferred_format(node.get_dependency(0)));
// shape_infer_dep should be plain format because the memory is being read by ngraph shape infer as is
for (auto u : node.get_users()) { for (auto u : node.get_users()) {
for (auto dep_idx : u->get_shape_infer_dependencies()) { for (auto dep_idx : u->get_shape_infer_dependencies()) {
if (u->get_dependencies().size() <= dep_idx) if (u->get_dependencies().size() <= dep_idx)
@ -1828,6 +1836,9 @@ format layout_optimizer::get_preferred_format(program_node& node) {
expected = format::get_default_format(node.get_input_layouts()[0].get_rank(), false, false); expected = format::get_default_format(node.get_input_layouts()[0].get_rank(), false, false);
} }
if (allow_new_shape_infer && node.get_preferred_input_fmt() != format::any) {
node.set_preferred_output_fmt(0, expected);
}
return expected; return expected;
} }

View File

@ -144,9 +144,10 @@ static std::string GetDictionaryIndexOrder(const gather_params& params, size_t a
idx_order[i] = zero_val; idx_order[i] = zero_val;
// Fix size to inputs[0] dims size // Fix size to inputs[0] dims size
for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++) if (params.outputs[0].GetDims().size() > params.inputs[0].GetDims().size()) {
idx_order.pop_back(); for (size_t i = 0; i < params.outputs[0].GetDims().size() - params.inputs[0].GetDims().size(); i++)
idx_order.pop_back();
}
idx_order[axis] = input_axis_index_macro; idx_order[axis] = input_axis_index_macro;
return GetOrderString(idx_order); return GetOrderString(idx_order);

View File

@ -184,13 +184,16 @@ const std::vector<GatherShapeParams> dynamicInputShapeConstTargetShape = {
ov::test::InputShape(ov::PartialShape({}), {{3, 4, 3}}), ov::test::InputShape(ov::PartialShape({}), {{3, 4, 3}}),
3, 2 3, 2
}, },
#if 0 // TODO (99432) 5D=>4D test does not work properly because of the current reorder_impl logic does not work as expected.
{ {
ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{2, 4, 2, 2, 3}, {2, 4, 8, 9, 10}}), ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1}), {{2, 4, 2, 2, 3}, {2, 4, 8, 9, 10}}),
ov::test::InputShape(ov::PartialShape({}), {{2, 4}}), ov::test::InputShape(ov::PartialShape({}), {{2, 4}}),
2, 2 2, 2
}, },
#endif {
ov::test::InputShape(ov::PartialShape({-1, -1, -1, -1, -1, -1}), {{2, 4, 2, 3, 1, 3}, {2, 4, 7, 8, 9, 10}}),
ov::test::InputShape(ov::PartialShape({}), {{2, 4}}),
2, 2
},
}; };
INSTANTIATE_TEST_SUITE_P(smoke_dynamic_input_shapes_const_target_shapes, GatherGPUTest, INSTANTIATE_TEST_SUITE_P(smoke_dynamic_input_shapes_const_target_shapes, GatherGPUTest,