diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 20596f77d8c..1d07b289b83 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -628,6 +628,11 @@ void program::post_optimize_graph(bool is_internal) { // update loop input/output primitive mappings apply_opt_pass(); + + // Recalculate processing order after all graph transformation to keep optimal primitives ordering + // for OOO queue + if (_config.get_property(ov::intel_gpu::queue_type) == QueueTypes::out_of_order) + get_processing_order().calculate_BFS_processing_order(); } // mark if the node is constant assuming that all dependencies are marked properly diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp index 7cad52b8497..1c9f832b485 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/graph_manipulation_gpu_test.cpp @@ -179,3 +179,55 @@ TEST(add_intermediate_gpu, test2) } } } + +TEST(processing_order, bfs_order_restoring) { + auto& engine = get_test_engine(); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + + if (config.get_property(ov::intel_gpu::queue_type) != QueueTypes::out_of_order) + GTEST_SKIP(); + + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 8, 8 } }); + auto eltwise_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto input_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto output_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto output_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + + set_values(eltwise_mem, generate_random_1d(1, 0, 10)); + set_values(input_low_mem, generate_random_1d(3, -10, 0)); + set_values(input_high_mem, generate_random_1d(3, 1, 10)); + set_values(output_low_mem, { -127.0f }); + set_values(output_high_mem, { 127.0f }); + + topology topology( + input_layout("input", input->get_layout()), + activation("act", input_info("input"), activation_func::relu), + data("eltwise_data", eltwise_mem), + data("in_low", input_low_mem), + data("in_high", input_high_mem), + data("out_low", output_low_mem), + data("out_high", output_high_mem), + eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, data_types::f32), + activation("act2", input_info("eltwise"), activation_func::pow), + quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"), + input_info("out_low"), input_info("out_high"), 256, data_types::u8), + activation("act3", input_info("quant"), activation_func::relu), + reorder("reorder_bfyx1", input_info("act3"), format::bfyx, data_types::f32), + activation("act4", input_info("quant"), activation_func::floor), + reorder("reorder_bfyx2", input_info("act4"), format::bfyx, data_types::f32) + ); + + auto prog = program::build_program(engine, topology, config); + const auto& processing_order = prog->get_processing_order(); + + auto act3_processing_number = processing_order.get_processing_number(prog->get_node_ptr("act3").get()); + auto act4_processing_number = processing_order.get_processing_number(prog->get_node_ptr("act4").get()); + + // Make sure activations (act3 and act4) on parallel branches execute one straight after another + auto processing_number_diff = std::abs(act3_processing_number - act4_processing_number); + + ASSERT_EQ(processing_number_diff, 1); +}