[GPU] Fix to skip reorder optimization during post_optimize_graph phase (#16908)
* [GPU] Fix to skip reorder optimization during post_optimize_graph phase Signed-off-by: Andrew Park <andrew.park@intel.com> * Apply comment Signed-off-by: Andrew Park <andrew.park@intel.com> * update condition to check empty padding Signed-off-by: Andrew Park <andrew.park@intel.com> * add condition to check batch size Signed-off-by: Andrew Park <andrew.park@intel.com> --------- Signed-off-by: Andrew Park <andrew.park@intel.com>
This commit is contained in:
parent
824a5aa7fb
commit
507b3251ef
@ -284,24 +284,29 @@ void remove_redundant_reorders::run(program& p) {
|
||||
i_layout.data_padding.upper_size().spatial[1] == 0 && i_layout.data_padding.lower_size().spatial[1] == 0 &&
|
||||
o_layout.data_padding.upper_size() == (tensor)0 && o_layout.data_padding.lower_size() == (tensor)0 &&
|
||||
i_layout.data_type == o_layout.data_type) {
|
||||
r_node.can_be_optimized(true);
|
||||
r_node.requires_reinterpret(true);
|
||||
// If the newly aligned pad is merged into output layout during post_optimize_graph phase
|
||||
// and then buffer is reinterpreted, user node cannot handle pad properly for kernel execution
|
||||
if (!update_implementations || (i_layout.feature() % 16 == 0 &&
|
||||
i_layout.data_padding == padding() && o_layout.data_padding == padding()) || i_layout.batch() == 1) {
|
||||
r_node.can_be_optimized(true);
|
||||
r_node.requires_reinterpret(true);
|
||||
|
||||
auto pad_lo = o_layout.data_padding.lower_size();
|
||||
auto pad_hi = o_layout.data_padding.upper_size();
|
||||
auto pad_lo = o_layout.data_padding.lower_size();
|
||||
auto pad_hi = o_layout.data_padding.upper_size();
|
||||
|
||||
pad_lo.batch[0] = i_layout.data_padding.lower_size().batch[0];
|
||||
pad_hi.batch[0] = i_layout.data_padding.upper_size().batch[0];
|
||||
pad_lo.batch[0] = i_layout.data_padding.lower_size().batch[0];
|
||||
pad_hi.batch[0] = i_layout.data_padding.upper_size().batch[0];
|
||||
|
||||
pad_lo.feature[0] = i_layout.data_padding.lower_size().feature[0];
|
||||
pad_hi.feature[0] = i_layout.data_padding.upper_size().feature[0];
|
||||
pad_lo.feature[0] = i_layout.data_padding.lower_size().feature[0];
|
||||
pad_hi.feature[0] = i_layout.data_padding.upper_size().feature[0];
|
||||
|
||||
if (i_layout.feature() % 16 != 0) {
|
||||
pad_hi.feature[0] += 16 - i_layout.feature() % 16;
|
||||
if (i_layout.feature() % 16 != 0) {
|
||||
pad_hi.feature[0] += 16 - i_layout.feature() % 16;
|
||||
}
|
||||
|
||||
r_node.merge_output_padding(padding{pad_lo.sizes(), pad_hi.sizes()});
|
||||
continue;
|
||||
}
|
||||
|
||||
r_node.merge_output_padding(padding{pad_lo.sizes(), pad_hi.sizes()});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!o_layout.compatible(i_layout))
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include "convolution_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "softmax_inst.h"
|
||||
#include "reduce_inst.h"
|
||||
#include "fully_connected_inst.h"
|
||||
|
||||
#include "pass_manager.h"
|
||||
#include "to_string_utils.h"
|
||||
@ -60,3 +62,38 @@ TEST(remove_redundant_reorders, remove_dep_dynamic) {
|
||||
|
||||
ASSERT_EQ(softmax_layout.format.value, format::bfyx);
|
||||
}
|
||||
|
||||
TEST(remove_redundant_reorders, optimize_fsv16_to_bfyx) {
|
||||
// Topology:
|
||||
// reorder(b_fs_yx_fsv16) -> reduce(b_fs_yx_fsv16) -> fully_connected(bfyx)
|
||||
//
|
||||
// Expectation:
|
||||
// Reorder that converts b_fs_yx_fsv16 to bfyx is added between reduce and fc (add_required_reorders)
|
||||
// If it is post_optimize_graph phase and the batch size of reorder output layout is not 1,
|
||||
// reorder optimization (b_fs_yx_fsv16->bfyx when spatials are eqaul to 1) is skipped (remove_redundant_reorders)
|
||||
// So there should be no upper padding for feature dim of FC's input layout
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1080, 7, 7 } });
|
||||
auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1001, 1080, 1, 1 } });
|
||||
|
||||
topology topology;
|
||||
topology.add(data("weights", weights));
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f32));
|
||||
topology.add(reduce("reduce", input_info("reorder"), reduce_mode::min, {2, 3}, true));
|
||||
topology.add(fully_connected("fc", input_info("reduce"), "weights"));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
|
||||
network.execute();
|
||||
|
||||
auto prog = network.get_program();
|
||||
ASSERT_NE(prog, nullptr);
|
||||
auto& fc_node = prog->get_node("fc");
|
||||
auto fc_in_layout = fc_node.get_input_layouts();
|
||||
ASSERT_EQ(fc_in_layout.front().data_padding.upper_size().feature[0], 0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user