[GPU] Fix layout optimizer removing reorder inappropriately in batch 2 (#17496)

* [GPU] Fix with permute mismatching input layout with ouput in batch 2

* Add unit test

* Fix unit test

* Don't use deprecated interface for layer test
This commit is contained in:
David Nam 2023-05-18 02:20:26 +09:00 committed by GitHub
parent 1e878b6a01
commit 1173288777
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 6 deletions

View File

@ -300,12 +300,6 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
if (next.is_type<eltwise>() && prev_simple && next_simple)
return true;
if (next.is_type<permute>() && (fmt_prev == format::b_fs_zyx_fsv16 &&
next_output_layout.batch() > 1 &&
next_output_layout.feature() % 16 != 0)) {
return true;
}
if (next.is_type<fully_connected>() &&
(fmt_prev == format::bfyx || fmt_prev == format::yxfb ||
fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::fs_b_yx_fsv32 ||

View File

@ -178,3 +178,36 @@ TEST(remove_redundant_reorders, not_to_fuse_reshape_with_fused_prims) {
ASSERT_GE(output_ptr[i], 0);
}
}
TEST(remove_redundant_reorders, not_to_fuse_permute) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f16, format::b_fs_zyx_fsv16, {2, 256, 2, 8, 8}});
auto weight = engine.allocate_memory({data_types::f16, format::bfzyx, {1, 256, 1, 1, 1}});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("weight", weight));
topology.add(
convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(
reorder("reorder1", input_info("convolution"), {data_types::f16, format::b_fs_zyx_fsv16, {2, 256, 2, 8, 8}}));
topology.add(reorder("reorder2", input_info("reorder1"), {data_types::f16, format::bfwzyx, {2, 2, 1, 8, 8, 256}}));
topology.add(permute("permute", input_info("reorder2"), {0, 3, 2, 4, 5, 1}));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config, false, true);
ASSERT_NE(prog, nullptr);
layout_optimizer lo(true);
bool opt_data = config.get_property(ov::intel_gpu::optimize_data);
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
program_wrapper::apply_opt_pass<remove_redundant_reorders>(*prog, lo, opt_data);
auto& node = prog->get_node("permute");
auto in_layout = node.get_input_layouts()[0];
ASSERT_EQ(in_layout.format.value, format::bfwzyx);
network network(engine, topology, config);
}