From 383ec09d85e546676cc5e6b0db0607e22e29b302 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Thu, 13 Oct 2022 14:47:03 +0900 Subject: [PATCH] [GPU] Fix endless loop issue in onednn optimization post-opt (#13413) * [GPU] Fix endless loop issue in onednn optimization post-opt * Reproduced in yolo_v5s.int8 * Add OV_GPU_DisableOnednnOptPostOps in debug_configuration --- .../include/intel_gpu/runtime/debug_configuration.hpp | 1 + src/plugins/intel_gpu/src/graph/program_node.cpp | 7 +++++-- src/plugins/intel_gpu/src/runtime/debug_configuration.cpp | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index 27e4c434352..337cb04c420 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -37,6 +37,7 @@ public: int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive int disable_usm; // Disable usm usage int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU) + int disable_onednn_opt_post_ops; // Disable onednn optimize post operators std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir std::string dump_graphs; // Dump optimized graph std::string dump_sources; // Dump opencl sources diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 27322f86ee7..a0612e19a53 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -626,8 +626,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const auto cur_idx = static_cast(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx); auto prev_idx = static_cast(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx); - // if 2 indices are same, add the last post-op to dnnl::post_ops - if (prev_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) { + // If prev_idx and cur_idx are same, add the last post-op to dnnl::post_ops + if (prev_post_op_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) { add_post_op(prev_type, p_ops, optimized_p_ops, prev_idx); break; } @@ -1228,6 +1228,9 @@ void program_node::init_onednn_primitive_attributes() { // Trying to combine multiplications and additions which are placed one after another. // We do it in the cycle because some optimization cases can be simplified again from time to time do { + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops) + break; optimized_post_ops = try_optimize_post_ops(optimized_post_ops, attrs, optimization_is_finished); } while (!optimization_is_finished); diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 83b82478114..94550355d69 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -105,6 +105,7 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive"); message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage"); message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)"); + message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators"); message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory." " Note: Performance impact may be significant as this option enforces host side sync after each primitive"); message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph"); @@ -143,6 +144,7 @@ debug_configuration::debug_configuration() , print_multi_kernel_perf(0) , disable_usm(0) , disable_onednn(0) + , disable_onednn_opt_post_ops(0) , dump_profiling_data(std::string("")) , dump_graphs(std::string()) , dump_sources(std::string()) @@ -167,6 +169,7 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only); get_gpu_debug_env_var("DumpLayersResult", dump_layers_result); get_gpu_debug_env_var("DisableOnednn", disable_onednn); + get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops); get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data); get_gpu_debug_env_var("DryRunPath", dry_run_path); get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);