[GPU] Fix endless loop issue in onednn optimization post-opt (#13413)

* [GPU] Fix endless loop issue in onednn optimization post-opt * Reproduced in yolo_v5s.int8 * Add OV_GPU_DisableOnednnOptPostOps in debug_configuration
2022-10-13 14:47:03 +09:00 · 2022-10-13 14:47:03 +09:00 · 383ec09d85
commit 383ec09d85
parent 01d332eb59
3 changed files with 9 additions and 2 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp
@ -37,6 +37,7 @@ public:
    int print_multi_kernel_perf;            // Print execution time of each kernel in multi-kernel primitimive
    int disable_usm;                        // Disable usm usage
    int disable_onednn;                     // Disable onednn for discrete GPU (no effect for integrated GPU)
    int disable_onednn_opt_post_ops;        // Disable onednn optimize post operators
    std::string dump_profiling_data;        // Enables dump of extended performance profiling to specified dir
    std::string dump_graphs;                // Dump optimized graph
    std::string dump_sources;               // Dump opencl sources
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@ -626,8 +626,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
        auto cur_idx = static_cast<int>(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx);
        auto prev_idx = static_cast<int>(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx);
-        // if 2 indices are same, add the last post-op to dnnl::post_ops
+        // If prev_idx and cur_idx are same, add the last post-op to dnnl::post_ops
-        if (prev_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
+        if (prev_post_op_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
            add_post_op(prev_type, p_ops, optimized_p_ops, prev_idx);
            break;
        }
@ -1228,6 +1228,9 @@ void program_node::init_onednn_primitive_attributes() {
        // Trying to combine multiplications and additions which are placed one after another.
        // We do it in the cycle because some optimization cases can be simplified again from time to time
        do {
            GPU_DEBUG_GET_INSTANCE(debug_config);
            GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops)
                break;
            optimized_post_ops = try_optimize_post_ops(optimized_post_ops, attrs, optimization_is_finished);
        } while (!optimization_is_finished);
--- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
+++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp
@ -105,6 +105,7 @@ static void print_help_messages() {
    message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
    message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
    message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
    message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators");
    message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
                              " Note: Performance impact may be significant as this option enforces host side sync after each primitive");
    message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
@ -143,6 +144,7 @@ debug_configuration::debug_configuration()
        , print_multi_kernel_perf(0)
        , disable_usm(0)
        , disable_onednn(0)
        , disable_onednn_opt_post_ops(0)
        , dump_profiling_data(std::string(""))
        , dump_graphs(std::string())
        , dump_sources(std::string())
@ -167,6 +169,7 @@ debug_configuration::debug_configuration()
    get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
    get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
    get_gpu_debug_env_var("DisableOnednn", disable_onednn);
    get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
    get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
    get_gpu_debug_env_var("DryRunPath", dry_run_path);
    get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);