From 05159b8cfb9e0535b3c88af92c17c176039e3467 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Sun, 18 Dec 2022 20:03:23 -0800 Subject: [PATCH] [GPU] Fixed shape infer dep for blocked format (#14646) * Fixed shape infer dep for blocked format * Set preferred format for shape_of to be made from input rank - ShapeOf should get the original output layout of the previous node, which is not reorderd. --- .../remove_redundant_reorders.cpp | 3 + .../graph/graph_optimizer/reorder_inputs.cpp | 1 - .../intel_gpu/src/graph/layout_optimizer.cpp | 17 ++++ .../tests/shape_infer/broadcast_si_test.cpp | 82 +++++++++++++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index dfc9d6175cb..97cc6b9d772 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -251,6 +251,9 @@ void remove_redundant_reorders::run(program& p) { if (!node->is_type()) // only care for reorders continue; + if (node->is_dynamic()) + continue; + auto& r_node = node->as(); bool no_output_optimization = remove_output_reorders ? diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index b9afb144b27..794abdfd072 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -452,7 +452,6 @@ void minimize_local_reorders(program& p, std::map& for (auto node : p.get_processing_order()) { if (!node->is_in_data_flow()) continue; - auto preferred_format = lo.get_preferred_format(*node); if (preferred_format != format::any) { diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index dca4dec2849..b031e7ade69 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -12,6 +12,7 @@ #include "resample_inst.h" #include "reshape_inst.h" #include "arg_max_min_inst.h" +#include "shape_of_inst.h" #include "generic_layer.hpp" #include @@ -1653,6 +1654,22 @@ format layout_optimizer::get_preferred_format(program_node& node) { auto output_layout = node.get_output_layout(); bool use_onednn_impls = _optimization_attributes.use_onednn_impls; + bool allow_new_shape_infer = node.get_program().get_options().get()->enabled(); + + if (allow_new_shape_infer) { + if (node.is_type()) + return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank()); + for (auto u : node.get_users()) { + for (auto dep_idx : u->get_shape_infer_dependencies()) { + if (u->get_dependencies().size() <= dep_idx) + continue; + if (u->get_dependency(dep_idx).get_unique_id() == node.get_unique_id()) { + expected = format::get_default_format(output_layout.get_rank(), false, false); + return expected; + } + } + } + } if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { expected = _forcing_map.at(node.id()).first; } else if (node.is_type()) { diff --git a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp index dfea42e1131..0187666a197 100644 --- a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp +++ b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include "broadcast_inst.h" @@ -81,6 +82,87 @@ INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs, } })); +class broadcast_test_two_inputs_blocked_format : public testing::TestWithParam { }; +TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto data_mem = engine.allocate_memory(p.data_layout); + auto in1_mem = engine.allocate_memory(p.target_shape_layout); + auto in2_mem = engine.allocate_memory(p.target_shape_layout); + + // data ------------| + // shape1 (blocked)- eltwise (plain)-- broadcast + // shape2 (blocked) / + // Expectation: eltwise's result is to be used as shape_mem of broadcast, and it should be plain format + topology topology; + topology.add(input_layout("data", layout{ov::PartialShape::dynamic(p.data_layout.get_rank()), p.data_layout.data_type, p.data_layout.format}), + input_layout("shape_input_1", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}), + input_layout("shape_input_2", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}), + eltwise("target_shape", input_info("shape_input_1"), input_info("shape_input_2"), eltwise_mode::sum, ov::op::AutoBroadcastType::NUMPY), + broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode) + ); + + build_options options; + options.set_option(build_option::optimize_data(true)); + options.set_option(build_option::allow_new_shape_infer(true)); + + std::vector input_data(p.data_layout.get_linear_size(), 1); + + network network(engine, topology, options); + + set_values(data_mem, input_data); + set_values(in1_mem, p.target_shape_data); + set_values(in2_mem, p.target_shape_data); + + network.set_input_data("data", data_mem); + network.set_input_data("shape_input_1", in1_mem); + network.set_input_data("shape_input_2", in2_mem); + + auto outputs = network.execute(); + auto output = outputs.at("output").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + ASSERT_EQ(output->get_layout(), p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs_blocked_format, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{8}, data_types::i32, format::b_fs_yx_fsv16}, //data layout + layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16}, + {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0}, ov::op::BroadcastType::EXPLICIT, + layout{ov::PartialShape{8, 64, 22, 16}, data_types::i32, format::b_fs_yx_fsv16} + }, + { + layout{ov::PartialShape{16, 1, 1, 1}, data_types::i32, format::b_fs_yx_fsv16}, //data layout + layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16}, + {8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {}, ov::op::BroadcastType::NUMPY, + layout{ov::PartialShape{16, 50, 24, 20}, data_types::i32, format::b_fs_yx_fsv16} + }, + { + layout{ov::PartialShape{16}, data_types::i32, format::b_fs_zyx_fsv16}, //data layout + layout{ov::PartialShape{5}, data_types::i64, format::b_fs_zyx_fsv16}, + {8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0}, ov::op::BroadcastType::EXPLICIT, + layout{ov::PartialShape{16, 2, 50, 24, 20}, data_types::i32, format::b_fs_zyx_fsv16} + } + })); + + class broadcast_test_single_input : public testing::TestWithParam { }; TEST_P(broadcast_test_single_input, shape_infer) {