[GPU] Fixed shape infer dep for blocked format (#14646)

* Fixed shape infer dep for blocked format * Set preferred format for shape_of to be made from input rank - ShapeOf should get the original output layout of the previous node, which is not reorderd.
2022-12-18 20:03:23 -08:00 · 2022-12-18 20:03:23 -08:00 · 05159b8cfb
commit 05159b8cfb
parent a7d0b7b010
4 changed files with 102 additions and 1 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@ -251,6 +251,9 @@ void remove_redundant_reorders::run(program& p) {
        if (!node->is_type<reorder>())  // only care for reorders
            continue;

+        if (node->is_dynamic())
+            continue;
+
        auto& r_node = node->as<reorder>();

        bool no_output_optimization = remove_output_reorders ?
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@ -452,7 +452,6 @@ void minimize_local_reorders(program& p, std::map<program_node*, format::type>&
    for (auto node : p.get_processing_order()) {
        if (!node->is_in_data_flow())
            continue;
-
        auto preferred_format = lo.get_preferred_format(*node);

        if (preferred_format != format::any) {
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -12,6 +12,7 @@
 #include "resample_inst.h"
 #include "reshape_inst.h"
 #include "arg_max_min_inst.h"
+#include "shape_of_inst.h"
 #include "generic_layer.hpp"
 #include <sstream>

@ -1653,6 +1654,22 @@ format layout_optimizer::get_preferred_format(program_node& node) {
    auto output_layout = node.get_output_layout();
    bool use_onednn_impls = _optimization_attributes.use_onednn_impls;

+    bool allow_new_shape_infer = node.get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
+
+    if (allow_new_shape_infer) {
+        if (node.is_type<shape_of>())
+            return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
+        for (auto u : node.get_users()) {
+            for (auto dep_idx : u->get_shape_infer_dependencies()) {
+                if (u->get_dependencies().size() <= dep_idx)
+                    continue;
+                if (u->get_dependency(dep_idx).get_unique_id() == node.get_unique_id()) {
+                    expected = format::get_default_format(output_layout.get_rank(), false, false);
+                    return expected;
+                }
+            }
+        }
+    }
    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
        expected = _forcing_map.at(node.id()).first;
    } else if (node.is_type<convolution>()) {
--- a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp
+++ b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp
@ -6,6 +6,7 @@

 #include <intel_gpu/primitives/input_layout.hpp>
 #include <intel_gpu/primitives/broadcast.hpp>
+#include <intel_gpu/primitives/eltwise.hpp>
 #include <intel_gpu/primitives/data.hpp>

 #include "broadcast_inst.h"
@ -81,6 +82,87 @@ INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs,
        }
    }));

+class broadcast_test_two_inputs_blocked_format : public testing::TestWithParam<broadcast_test_params> { };
+TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) {
+    auto p = GetParam();
+
+    auto& engine = get_test_engine();
+
+    auto data_mem = engine.allocate_memory(p.data_layout);
+    auto in1_mem = engine.allocate_memory(p.target_shape_layout);
+    auto in2_mem = engine.allocate_memory(p.target_shape_layout);
+
+    // data ------------|
+    // shape1 (blocked)- eltwise (plain)-- broadcast
+    // shape2 (blocked) /
+    // Expectation: eltwise's result is to be used as shape_mem of broadcast, and it should be plain format
+    topology topology;
+    topology.add(input_layout("data", layout{ov::PartialShape::dynamic(p.data_layout.get_rank()), p.data_layout.data_type, p.data_layout.format}),
+                input_layout("shape_input_1", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}),
+                input_layout("shape_input_2", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}),
+                eltwise("target_shape", input_info("shape_input_1"), input_info("shape_input_2"), eltwise_mode::sum, ov::op::AutoBroadcastType::NUMPY),
+                broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode)
+    );
+
+    build_options options;
+    options.set_option(build_option::optimize_data(true));
+    options.set_option(build_option::allow_new_shape_infer(true));
+
+    std::vector<int32_t> input_data(p.data_layout.get_linear_size(), 1);
+
+    network network(engine, topology, options);
+
+    set_values(data_mem, input_data);
+    set_values(in1_mem, p.target_shape_data);
+    set_values(in2_mem, p.target_shape_data);
+
+    network.set_input_data("data", data_mem);
+    network.set_input_data("shape_input_1", in1_mem);
+    network.set_input_data("shape_input_2", in2_mem);
+
+    auto outputs = network.execute();
+    auto output = outputs.at("output").get_memory();
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    ASSERT_EQ(output->get_layout(), p.expected_layout);
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs_blocked_format,
+    testing::ValuesIn(std::vector<broadcast_test_params>{
+        {
+            layout{ov::PartialShape{8}, data_types::i32, format::b_fs_yx_fsv16}, //data layout
+            layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16},
+            {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+            {0}, ov::op::BroadcastType::EXPLICIT,
+            layout{ov::PartialShape{8, 64, 22, 16}, data_types::i32, format::b_fs_yx_fsv16}
+        },
+        {
+            layout{ov::PartialShape{16, 1, 1, 1}, data_types::i32, format::b_fs_yx_fsv16}, //data layout
+            layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16},
+            {8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+            {}, ov::op::BroadcastType::NUMPY,
+            layout{ov::PartialShape{16, 50, 24, 20}, data_types::i32, format::b_fs_yx_fsv16}
+        },
+        {
+            layout{ov::PartialShape{16}, data_types::i32, format::b_fs_zyx_fsv16}, //data layout
+            layout{ov::PartialShape{5}, data_types::i64, format::b_fs_zyx_fsv16},
+            {8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+            {0}, ov::op::BroadcastType::EXPLICIT,
+            layout{ov::PartialShape{16, 2, 50, 24, 20}, data_types::i32, format::b_fs_zyx_fsv16}
+        }
+    }));
+
+
 class broadcast_test_single_input : public testing::TestWithParam<broadcast_test_params> { };

 TEST_P(broadcast_test_single_input, shape_infer) {