[GPU] Remove WA to fallback to cldnn (#14147)
This commit is contained in:
@@ -102,7 +102,10 @@ bool concat_in_place_optimization::match(concatenation_node& node) {
|
||||
else
|
||||
continue;
|
||||
}
|
||||
is_onednn_impl = true;
|
||||
|
||||
// Optimized-out input node is no longer onednn impl.
|
||||
if (!input->can_be_optimized())
|
||||
is_onednn_impl = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,10 +40,7 @@ std::map<program_node*, format::type> get_preferred_formats(program& p, layout_o
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
size_t onednn_impls_counter = 0;
|
||||
size_t all_impls_counter = 0;
|
||||
const float onednn_min_threshold = 0.09f;
|
||||
bool should_update_fmt_map = false;
|
||||
|
||||
// Calculate onednn kernels number and all kernels number inside the network
|
||||
for (auto n : p.get_processing_order()) {
|
||||
if (!n->is_in_data_flow())
|
||||
@@ -57,22 +54,9 @@ std::map<program_node*, format::type> get_preferred_formats(program& p, layout_o
|
||||
|
||||
if (impl == impl_types::onednn)
|
||||
onednn_impls_counter++;
|
||||
|
||||
all_impls_counter++;
|
||||
}
|
||||
|
||||
float onednn_usage_ratio = all_impls_counter ? static_cast<float>(onednn_impls_counter) / static_cast<float>(all_impls_counter) : 0.f;
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "----------------------------------------------" << std::endl;
|
||||
GPU_DEBUG_COUT << "Onednn kernels number: " << onednn_impls_counter << " from " << all_impls_counter
|
||||
<< " (" << onednn_usage_ratio * 100.f << "%)" << std::endl;
|
||||
GPU_DEBUG_COUT << "Onednn usage threshold: " << onednn_min_threshold * 100.f << "%" << std::endl;
|
||||
}
|
||||
|
||||
// Reverted to cldnn way for cases when onednn kernels number inside the whole network is extremely low =>
|
||||
// improvements from onednn usage less than losses due to unoptimized formats for cldnn kernels, extra reorders, etc.
|
||||
if (onednn_usage_ratio < onednn_min_threshold && lo.get_optimization_attributes().use_onednn_impls) {
|
||||
if (onednn_impls_counter < 1 && lo.get_optimization_attributes().use_onednn_impls) {
|
||||
should_update_fmt_map = true;
|
||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 0);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
@@ -80,14 +64,8 @@ std::map<program_node*, format::type> get_preferred_formats(program& p, layout_o
|
||||
}
|
||||
}
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "----------------------------------------------" << std::endl;
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
if (should_update_fmt_map)
|
||||
#endif
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
{
|
||||
for (auto n : p.get_processing_order()) {
|
||||
if (!n->is_in_data_flow())
|
||||
|
||||
@@ -322,7 +322,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
prev.is_input() && (prev_dt == data_types::u8 || prev_dt == data_types::i8))
|
||||
return true;
|
||||
|
||||
if (!use_onednn_impls) {
|
||||
if (!use_onednn_impls || next.get_preferred_impl_type() == impl_types::ocl) {
|
||||
if (next.is_type<convolution>() &&
|
||||
(fmt_prev == format::bfyx || fmt_prev == format::bs_fs_yx_bsv4_fsv2) &&
|
||||
((fmt_next == format::fs_b_yx_fsv32 && next.as<convolution>().get_primitive()->groups == 1) ||
|
||||
|
||||
Reference in New Issue
Block a user