From 9de861d2e195a8301dc9aaf166cc9b0db1bdaeed Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Thu, 11 Mar 2021 22:45:11 +0900 Subject: [PATCH] [IE CLDNN] Use fsv4 when feature-depth is shallow (#4398) --- .../thirdparty/clDNN/src/program.cpp | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index 3ce96ce5f1b..54450db03ea 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -1159,6 +1159,9 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) { size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0; size_t total_crop_layers = 0; + size_t weighted_sum_feature_size = 0; + size_t weight_sum = 0; + for (auto& node : get_processing_order()) { auto &prim = *node; if (prim.type() == cldnn::convolution::type_id()) { @@ -1309,4 +1312,35 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) { if (should_use_bs_fs_yx_bsv16_fsv16) lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); + + + // This is to avoid using fsv16 for shallow-feature networks. + // This may not be exactly same as real execution graph as layer fusing is not done yet, + // but it is a reasonable approximation. + // Check the expected network efficiency after setting layer optimization attributes. + // If network depth is shallow, it is faster with fsv4. + for (auto& node : get_processing_order()) { + auto &prim = *node; + + if (prim.is_in_data_flow() && prim.type() == cldnn::convolution::type_id()) { + size_t num_feature = prim.get_output_layout().size.feature.vector()[0]; + size_t num_spatial = 1; + for (auto s : prim.get_output_layout().size.spatial.vector()) + num_spatial *= s; + + if (lo.get_preferred_format(prim) != format::b_fs_yx_fsv4) { + weight_sum += num_spatial; + weighted_sum_feature_size += num_spatial * num_feature; + } + } + } + + size_t weighted_average_feature_depth = weighted_sum_feature_size / std::max(weight_sum, static_cast(1)); + + // Need to confirm that weighted_average_feature_depth > 1 to keep unittest behavior. + if (is_quantized_int8_model && weighted_average_feature_depth < 8 && weighted_average_feature_depth > 1) { + lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::fs_b_yx_fsv32_network, 0); + lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::b_fs_yx_fsv16_network, 0); + lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 0); + } }