[GPU] Fix remove redundant reorder to skip reorder fusing when sibling node doesn't support fused padding (#17041)

* initial fix

* add corresponding unit test

* skip reorder fusing when sibling node does not support fused padding

* fix data type of axis for win build

* Revert "fix data type of axis for win build"

This reverts commit 719ea75d7826aafc7bb94c1971586c33a9842f10.

* add static casting for win build
This commit is contained in:
Wilson Seok
2023-04-27 01:53:23 +09:00
committed by GitHub
parent 7fc65ae3c5
commit 03a428f50c
2 changed files with 60 additions and 0 deletions

View File

@@ -484,6 +484,26 @@ void remove_redundant_reorders::run(program& p) {
if (dep.is_type<input_layout>())
return false;
// Skip reorder padding fusing when any one of sibling nodes is optimized out or doesn't support padding.
if (node->get_output_layout().data_padding) {
if (update_implementations)
return false;
for (auto user : dep.get_users()) {
if (user != node) {
if (user->can_be_optimized())
return false;
auto node_format = node->get_output_layout().format;
for (size_t axis = 0; axis < node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format).size(); axis++) {
if (!user->is_padding_supported(static_cast<int>(axis),
node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format)[axis]))
return false;
}
}
}
}
if (usr->as<convolution>().get_primitive()->groups != 1)
return false;

View File

@@ -14,6 +14,8 @@
#include "softmax_inst.h"
#include "reduce_inst.h"
#include "fully_connected_inst.h"
#include "convolution_inst.h"
#include "permute_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"
@@ -97,3 +99,41 @@ TEST(remove_redundant_reorders, optimize_fsv16_to_bfyx) {
auto fc_in_layout = fc_node.get_input_layouts();
ASSERT_EQ(fc_in_layout.front().data_padding.upper_size().feature[0], 0);
}
TEST(remove_redundant_reorders, skip_reorder_fusing_when_sibling_not_support_padding) {
// Reorder fusing with padding in remove_redundant_reorders pass should check all sibiling nodes whether they support padding or not.
// This test case has two reorders after convolution and one has padding. This reorder shouldn't be fused in the pass.
// Reference model : Enhance3-lite
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 32, 480, 270 } });
auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 16, 32, 1, 1 } });
auto weights_2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 64, 16, 3, 3 } });
topology topology;
topology.add(data("weights", weights));
topology.add(data("weights_2", weights_2));
topology.add(input_layout("input", input->get_layout()));
topology.add(convolution("convolution", input_info("input"), { "weights" }));
topology.add(reorder("reorder_reshape_1", input_info("convolution"), { data_types::f16, format::bfwzyx, { 2, 16, 1, 1, 480, 270 } }));
topology.add(permute("transpose_1", input_info("reorder_reshape_1"), { 0, 1, 2, 3, 5, 4 }));
topology.add(reorder("convolution_reorder_1", input_info("convolution"),
{ data_types::f16, format::fs_b_yx_fsv32, { 2, 16, 480, 270 }, padding({0, 0, 1, 1}, 0) }));
topology.add(convolution("convolution_2", input_info("convolution_reorder_1"),
{ "weights_2" }, { 1, 1}, { 1, 1}, { 1, 1}, false, padding({0, 0, 1, 1}, 0)));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config, false, true);
config.set_property(ov::intel_gpu::optimize_data(true));
layout_optimizer lo(true);
bool optimize_data = config.get_property(ov::intel_gpu::optimize_data);
program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, lo, optimize_data);
ASSERT_NE(prog, nullptr);
ASSERT_EQ(prog->get_node("convolution").get_output_layout().data_padding, padding());
}