[GPU] Fix endless loop issue in onednn optimization post-opt (#13413)
* [GPU] Fix endless loop issue in onednn optimization post-opt * Reproduced in yolo_v5s.int8 * Add OV_GPU_DisableOnednnOptPostOps in debug_configuration
This commit is contained in:
parent
01d332eb59
commit
383ec09d85
@ -37,6 +37,7 @@ public:
|
||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||
int disable_usm; // Disable usm usage
|
||||
int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU)
|
||||
int disable_onednn_opt_post_ops; // Disable onednn optimize post operators
|
||||
std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir
|
||||
std::string dump_graphs; // Dump optimized graph
|
||||
std::string dump_sources; // Dump opencl sources
|
||||
|
@ -626,8 +626,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
|
||||
auto cur_idx = static_cast<int>(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx);
|
||||
auto prev_idx = static_cast<int>(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx);
|
||||
|
||||
// if 2 indices are same, add the last post-op to dnnl::post_ops
|
||||
if (prev_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
|
||||
// If prev_idx and cur_idx are same, add the last post-op to dnnl::post_ops
|
||||
if (prev_post_op_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
|
||||
add_post_op(prev_type, p_ops, optimized_p_ops, prev_idx);
|
||||
break;
|
||||
}
|
||||
@ -1228,6 +1228,9 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
// Trying to combine multiplications and additions which are placed one after another.
|
||||
// We do it in the cycle because some optimization cases can be simplified again from time to time
|
||||
do {
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops)
|
||||
break;
|
||||
optimized_post_ops = try_optimize_post_ops(optimized_post_ops, attrs, optimization_is_finished);
|
||||
} while (!optimization_is_finished);
|
||||
|
||||
|
@ -105,6 +105,7 @@ static void print_help_messages() {
|
||||
message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
|
||||
message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
|
||||
message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
|
||||
message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators");
|
||||
message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
|
||||
" Note: Performance impact may be significant as this option enforces host side sync after each primitive");
|
||||
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
|
||||
@ -143,6 +144,7 @@ debug_configuration::debug_configuration()
|
||||
, print_multi_kernel_perf(0)
|
||||
, disable_usm(0)
|
||||
, disable_onednn(0)
|
||||
, disable_onednn_opt_post_ops(0)
|
||||
, dump_profiling_data(std::string(""))
|
||||
, dump_graphs(std::string())
|
||||
, dump_sources(std::string())
|
||||
@ -167,6 +169,7 @@ debug_configuration::debug_configuration()
|
||||
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
||||
get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
|
||||
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
||||
get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
|
||||
get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
|
||||
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
||||
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
||||
|
Loading…
Reference in New Issue
Block a user