[GPU] Fix remove redundant reorder to skip reorder fusing when sibling node doesn't support fused padding (#17041)

* initial fix * add corresponding unit test * skip reorder fusing when sibling node does not support fused padding * fix data type of axis for win build * Revert "fix data type of axis for win build" This reverts commit 719ea75d7826aafc7bb94c1971586c33a9842f10. * add static casting for win build
2023-04-27 01:53:23 +09:00
parent 7fc65ae3c5
commit 03a428f50c
2 changed files with 60 additions and 0 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@@ -484,6 +484,26 @@ void remove_redundant_reorders::run(program& p) {
        if (dep.is_type<input_layout>())
            return false;

+        // Skip reorder padding fusing when any one of sibling nodes is optimized out or doesn't support padding.
+        if (node->get_output_layout().data_padding) {
+            if (update_implementations)
+                return false;
+
+            for (auto user : dep.get_users()) {
+                if (user != node) {
+                    if (user->can_be_optimized())
+                        return false;
+
+                    auto node_format = node->get_output_layout().format;
+                    for (size_t axis = 0; axis < node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format).size(); axis++) {
+                        if (!user->is_padding_supported(static_cast<int>(axis),
+                            node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format)[axis]))
+                            return false;
+                    }
+                }
+            }
+        }
+
        if (usr->as<convolution>().get_primitive()->groups != 1)
            return false;

--- a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
+++ b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp
@@ -14,6 +14,8 @@
 #include "softmax_inst.h"
 #include "reduce_inst.h"
 #include "fully_connected_inst.h"
+#include "convolution_inst.h"
+#include "permute_inst.h"

 #include "pass_manager.h"
 #include "to_string_utils.h"
@@ -97,3 +99,41 @@ TEST(remove_redundant_reorders, optimize_fsv16_to_bfyx) {
    auto fc_in_layout = fc_node.get_input_layouts();
    ASSERT_EQ(fc_in_layout.front().data_padding.upper_size().feature[0], 0);
 }
+
+TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_padding) {
+    // Reorder fusing with padding in remove_redundant_reorders pass should check all sibiling nodes whether they support padding or not.
+    // This test case has two reorders after convolution and one has padding. This reorder shouldn't be fused in the pass.
+    // Reference model : Enhance3-lite
+
+    auto& engine = get_test_engine();
+    auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 32, 480, 270 } });
+    auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 16, 32, 1, 1 } });
+    auto weights_2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 64, 16, 3, 3 } });
+
+    topology topology;
+    topology.add(data("weights", weights));
+    topology.add(data("weights_2", weights_2));
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(convolution("convolution", input_info("input"), { "weights" }));
+    topology.add(reorder("reorder_reshape_1", input_info("convolution"), { data_types::f16, format::bfwzyx, { 2, 16, 1, 1, 480, 270 } }));
+    topology.add(permute("transpose_1", input_info("reorder_reshape_1"), { 0, 1, 2, 3, 5, 4 }));
+    topology.add(reorder("convolution_reorder_1", input_info("convolution"),
+                        { data_types::f16, format::fs_b_yx_fsv32, { 2, 16, 480, 270 }, padding({0, 0, 1, 1}, 0) }));
+    topology.add(convolution("convolution_2", input_info("convolution_reorder_1"),
+                            { "weights_2" }, { 1, 1}, { 1, 1}, { 1, 1}, false, padding({0, 0, 1, 1}, 0)));
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    auto prog = program::build_program(engine, topology, config, false, true);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+
+    layout_optimizer lo(true);
+
+    bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
+    program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, lo, optimize_data);
+
+    ASSERT_NE(prog, nullptr);
+
+    ASSERT_EQ(prog->get_node("convolution").get_output_layout().data_padding, padding());
+}