[GPU] Use BFS processing order for out_of_order queue (#17250)
This commit is contained in:
parent
82ec92dd98
commit
3b8bb1fb17
@ -628,6 +628,11 @@ void program::post_optimize_graph(bool is_internal) {
|
||||
|
||||
// update loop input/output primitive mappings
|
||||
apply_opt_pass<update_loop_primitive_map>();
|
||||
|
||||
// Recalculate processing order after all graph transformation to keep optimal primitives ordering
|
||||
// for OOO queue
|
||||
if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order)
|
||||
get_processing_order().calculate_BFS_processing_order();
|
||||
}
|
||||
|
||||
// mark if the node is constant assuming that all dependencies are marked properly
|
||||
|
@ -179,3 +179,55 @@ TEST(add_intermediate_gpu, test2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(processing_order, bfs_order_restoring) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order)
|
||||
GTEST_SKIP();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 8, 8 } });
|
||||
auto eltwise_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
|
||||
auto input_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
|
||||
auto input_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
|
||||
auto output_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
|
||||
auto output_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
|
||||
|
||||
set_values(eltwise_mem, generate_random_1d<float>(1, 0, 10));
|
||||
set_values(input_low_mem, generate_random_1d<float>(3, -10, 0));
|
||||
set_values(input_high_mem, generate_random_1d<float>(3, 1, 10));
|
||||
set_values(output_low_mem, { -127.0f });
|
||||
set_values(output_high_mem, { 127.0f });
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
activation("act", input_info("input"), activation_func::relu),
|
||||
data("eltwise_data", eltwise_mem),
|
||||
data("in_low", input_low_mem),
|
||||
data("in_high", input_high_mem),
|
||||
data("out_low", output_low_mem),
|
||||
data("out_high", output_high_mem),
|
||||
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, data_types::f32),
|
||||
activation("act2", input_info("eltwise"), activation_func::pow),
|
||||
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
|
||||
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
|
||||
activation("act3", input_info("quant"), activation_func::relu),
|
||||
reorder("reorder_bfyx1", input_info("act3"), format::bfyx, data_types::f32),
|
||||
activation("act4", input_info("quant"), activation_func::floor),
|
||||
reorder("reorder_bfyx2", input_info("act4"), format::bfyx, data_types::f32)
|
||||
);
|
||||
|
||||
auto prog = program::build_program(engine, topology, config);
|
||||
const auto& processing_order = prog->get_processing_order();
|
||||
|
||||
auto act3_processing_number = processing_order.get_processing_number(prog->get_node_ptr("act3").get());
|
||||
auto act4_processing_number = processing_order.get_processing_number(prog->get_node_ptr("act4").get());
|
||||
|
||||
// Make sure activations (act3 and act4) on parallel branches execute one straight after another
|
||||
auto processing_number_diff = std::abs(act3_processing_number - act4_processing_number);
|
||||
|
||||
ASSERT_EQ(processing_number_diff, 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user