[GPU] Fix endless loop issue in onednn optimization post-opt (#13413)
* [GPU] Fix endless loop issue in onednn optimization post-opt * Reproduced in yolo_v5s.int8 * Add OV_GPU_DisableOnednnOptPostOps in debug_configuration
This commit is contained in:
parent
01d332eb59
commit
383ec09d85
@ -37,6 +37,7 @@ public:
|
|||||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||||
int disable_usm; // Disable usm usage
|
int disable_usm; // Disable usm usage
|
||||||
int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU)
|
int disable_onednn; // Disable onednn for discrete GPU (no effect for integrated GPU)
|
||||||
|
int disable_onednn_opt_post_ops; // Disable onednn optimize post operators
|
||||||
std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir
|
std::string dump_profiling_data; // Enables dump of extended performance profiling to specified dir
|
||||||
std::string dump_graphs; // Dump optimized graph
|
std::string dump_graphs; // Dump optimized graph
|
||||||
std::string dump_sources; // Dump opencl sources
|
std::string dump_sources; // Dump opencl sources
|
||||||
|
@ -626,8 +626,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
|
|||||||
auto cur_idx = static_cast<int>(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx);
|
auto cur_idx = static_cast<int>(has_out_scales(attr) ? (cur_post_op_idx >= 1 ? cur_post_op_idx - 1 : 0) : cur_post_op_idx);
|
||||||
auto prev_idx = static_cast<int>(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx);
|
auto prev_idx = static_cast<int>(has_out_scales(attr) ? (prev_post_op_idx >= 1 ? prev_post_op_idx - 1 : 0) : prev_post_op_idx);
|
||||||
|
|
||||||
// if 2 indices are same, add the last post-op to dnnl::post_ops
|
// If prev_idx and cur_idx are same, add the last post-op to dnnl::post_ops
|
||||||
if (prev_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
|
if (prev_post_op_idx == post_ops_size - 1 && prev_idx == cur_idx && !type_is_any_optimized(prev_type)) {
|
||||||
add_post_op(prev_type, p_ops, optimized_p_ops, prev_idx);
|
add_post_op(prev_type, p_ops, optimized_p_ops, prev_idx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1228,6 +1228,9 @@ void program_node::init_onednn_primitive_attributes() {
|
|||||||
// Trying to combine multiplications and additions which are placed one after another.
|
// Trying to combine multiplications and additions which are placed one after another.
|
||||||
// We do it in the cycle because some optimization cases can be simplified again from time to time
|
// We do it in the cycle because some optimization cases can be simplified again from time to time
|
||||||
do {
|
do {
|
||||||
|
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||||
|
GPU_DEBUG_IF(debug_config->disable_onednn_opt_post_ops)
|
||||||
|
break;
|
||||||
optimized_post_ops = try_optimize_post_ops(optimized_post_ops, attrs, optimization_is_finished);
|
optimized_post_ops = try_optimize_post_ops(optimized_post_ops, attrs, optimization_is_finished);
|
||||||
} while (!optimization_is_finished);
|
} while (!optimization_is_finished);
|
||||||
|
|
||||||
|
@ -105,6 +105,7 @@ static void print_help_messages() {
|
|||||||
message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
|
message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
|
||||||
message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
|
message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
|
||||||
message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
|
message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
|
||||||
|
message_list.emplace_back("OV_GPU_DisableOnednnOptPostOps", "Disable onednn optimize post operators");
|
||||||
message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
|
message_list.emplace_back("OV_GPU_DumpProfilingData", "Enables dump of extended profiling information to specified directory."
|
||||||
" Note: Performance impact may be significant as this option enforces host side sync after each primitive");
|
" Note: Performance impact may be significant as this option enforces host side sync after each primitive");
|
||||||
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
|
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
|
||||||
@ -143,6 +144,7 @@ debug_configuration::debug_configuration()
|
|||||||
, print_multi_kernel_perf(0)
|
, print_multi_kernel_perf(0)
|
||||||
, disable_usm(0)
|
, disable_usm(0)
|
||||||
, disable_onednn(0)
|
, disable_onednn(0)
|
||||||
|
, disable_onednn_opt_post_ops(0)
|
||||||
, dump_profiling_data(std::string(""))
|
, dump_profiling_data(std::string(""))
|
||||||
, dump_graphs(std::string())
|
, dump_graphs(std::string())
|
||||||
, dump_sources(std::string())
|
, dump_sources(std::string())
|
||||||
@ -167,6 +169,7 @@ debug_configuration::debug_configuration()
|
|||||||
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
||||||
get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
|
get_gpu_debug_env_var("DumpLayersResult", dump_layers_result);
|
||||||
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
||||||
|
get_gpu_debug_env_var("DisableOnednnOptPostOps", disable_onednn_opt_post_ops);
|
||||||
get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
|
get_gpu_debug_env_var("DumpProfilingData", dump_profiling_data);
|
||||||
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
||||||
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
||||||
|
Loading…
Reference in New Issue
Block a user