[GPU] Fix layout propagation logic (#17199)

2023-04-26 17:20:48 +04:00 · 2023-04-26 17:20:48 +04:00 · 6ed85178d5
commit 6ed85178d5
parent 14a14ecd76
3 changed files with 100 additions and 3 deletions
--- a/src/plugins/intel_gpu/src/graph/dft.cpp
+++ b/src/plugins/intel_gpu/src/graph/dft.cpp
@ -33,6 +33,15 @@ layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_para
 std::string dft_inst::to_string(const dft_node& node) {
    auto desc = node.get_primitive();
    auto node_info = node.desc_to_json();
+    json_composite dft_info;
+    dft_info.add("axes", desc->axes);
+    dft_info.add("signal_size", desc->signal_size);
+    dft_info.add("output_shape", desc->output_shape);
+    dft_info.add("direction", desc->direction == dft_direction::forward ? "forward" : "inverse");
+    dft_info.add("mode", desc->mode == dft_mode::real ? "real" : "complex");
+
+    node_info->add("dft info", dft_info);
+
    std::ostringstream os;
    node_info->dump(os);
    return os.str();
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp
@ -512,7 +512,9 @@ void minimize_local_reorders(program& p, std::map<program_node*, format::type>&
            continue;

        for (auto new_fmt : local_formats) {
-            if (fmt_map.at(node) != format::any && format::dimension(fmt_map.at(node)) != format::dimension(new_fmt))
+            // Avoid setting of formats which will require transform from higher rank to smaller one which requires dimension squeeze
+            // TODO: Needs to be updated once we improve layout assignment logic
+            if (fmt_map.at(node) != format::any && format::dimension(fmt_map.at(node)) > format::dimension(new_fmt))
                continue;
            fmt_map.at(node) = new_fmt;

--- a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp
+++ b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp
@ -5,11 +5,14 @@
 #include "test_utils.h"

 #include "intel_gpu/runtime/engine.hpp"
-
 #include "intel_gpu/graph/program.hpp"
+#include "intel_gpu/graph/network.hpp"
+
 #include "data_inst.h"
 #include "eltwise_inst.h"
-#include "intel_gpu/graph/network.hpp"
+#include "dft_inst.h"
+#include "gather_inst.h"
+#include "border_inst.h"
 #include "pass_manager.h"
 #include "to_string_utils.h"

@ -69,6 +72,89 @@ TEST(reorder_inputs, propagation) {
    ASSERT_EQ(pool_node.get_output_layout().format.value, conv_pref);
 }

+TEST(reorder_inputs, mixed_ranks_irdft) {
+    // Topology:
+    // transpose -> (5d) -> irdft -> (4d) -> eltwise
+    // Expected: (bfzyx) -> irdft -> (bfyx)
+
+    auto& engine = get_test_engine();
+
+    topology topology;
+    topology.add(input_layout("input", layout{ { 1, 120, 2, 64, 33 }, data_types::f16, format::bfzyx }));
+    topology.add(input_layout("eltw_input", layout{ { 1, 120, 64, 64 }, data_types::f16, format::bfyx }));
+    topology.add(permute("permute", input_info("input"), { 0, 1, 3, 4, 2 }));
+    topology.add(dft("dft", input_info("permute"), {2, 3}, {64, 64}, {1, 120, 64, 64}, dft_direction::inverse, dft_mode::real));
+    topology.add(eltwise("eltwise", input_info("dft"), input_info("eltw_input"), eltwise_mode::sum));
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    program::ptr prog = nullptr;
+    ASSERT_NO_THROW(prog = program::build_program(engine, topology, config));
+    ASSERT_NE(prog, nullptr);
+
+    auto prog_impl = prog.get();
+
+    auto& dft_node = prog_impl->get_node("dft");
+
+    ASSERT_EQ(dft_node.get_input_layouts()[0].format, format::bfzyx);
+    ASSERT_EQ(dft_node.get_output_layout().format, format::bfyx);
+}
+
+TEST(reorder_inputs, mixed_ranks_gather) {
+    // Topology:
+    // (4d) -> conv -> (4d) -> border -> (4d) -> gather -> (5d) -> gather -> (6d) -> permute (6d)
+    // In case when preferred format for convolution is selected as byxf (in the test it's enforced)
+    // it could be propagated to border and gathers, but dimensions are handled incorrectly
+    // and the second gather may complain that axis >= rank
+    // So here we expect that input format for gather is aligned with actual output rank and format
+
+    auto& engine = get_test_engine();
+    auto data1_mem = engine.allocate_memory(layout{ { 3, 128, 1, 1 }, data_types::i32, format::bfyx });
+    auto data2_mem = engine.allocate_memory(layout{ { 3, 55, 1, 1 }, data_types::i32, format::bfyx });
+    auto weights_mem = engine.allocate_memory(layout{ { 2, 256, 3, 3 }, data_types::f16, format::bfyx });
+
+    topology topology;
+    topology.add(input_layout("input", layout{ { 1, 256, 128, 55 }, data_types::f16, format::bfyx }));
+    topology.add(data("weights", weights_mem));
+    topology.add(data("data1", data1_mem));
+    topology.add(data("data2", data2_mem));
+    topology.add(convolution("conv",
+                             input_info("input"),
+                             { "weights" },
+                             1,
+                             ov::Strides{1, 1},
+                             ov::CoordinateDiff{0, 0},
+                             ov::Strides{1, 1},
+                             ov::CoordinateDiff{0, 0},
+                             ov::CoordinateDiff{0, 0}));
+    topology.add(border("pad", { input_info("conv") }, 0, ov::CoordinateDiff{0, 0, 1, 1}, ov::CoordinateDiff{0, 0, 1, 1}));
+    topology.add(gather("gather1", input_info("pad"), input_info("data1"), 2, { 1, 2, 3, 128, 57 }, 0, false));
+    topology.add(gather("gather2", input_info("gather1"), input_info("data2"), 4, { 1, 2, 3, 128, 3, 55 }, 0, false));
+    topology.add(permute("permute", input_info("gather2"), {0, 1, 2, 4, 3, 5}));
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "" };
+    ov::intel_gpu::ImplementationDesc permute_impl = { format::bfwzyx, "" };
+    config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", conv_impl}, { "permute", permute_impl} }));
+
+    program::ptr prog = nullptr;
+    prog = program::build_program(engine, topology, config);
+    ASSERT_NE(prog, nullptr);
+
+    auto prog_impl = prog.get();
+
+    auto& gather1_node = prog_impl->get_node("gather1");
+    auto& gather2_node = prog_impl->get_node("gather2");
+
+    ASSERT_EQ(gather1_node.get_input_layouts()[0].format, format::bfzyx);
+    ASSERT_EQ(gather1_node.get_output_layout().format, format::bfzyx);
+
+    ASSERT_EQ(gather2_node.get_input_layouts()[0].format, format::bfwzyx);
+    ASSERT_EQ(gather2_node.get_output_layout().format, format::bfwzyx);
+}
+
 TEST(reorder_inputs, impl_forcing_basic_format) {
    auto& engine = get_test_engine();
    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 1 } });