[IE CLDNN] Use fsv4 when feature-depth is shallow (#4398)

This commit is contained in:
Mingyu Kim 2021-03-11 22:45:11 +09:00 committed by GitHub
parent 7ac7e90880
commit 9de861d2e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1159,6 +1159,9 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0;
size_t total_crop_layers = 0;
size_t weighted_sum_feature_size = 0;
size_t weight_sum = 0;
for (auto& node : get_processing_order()) {
auto &prim = *node;
if (prim.type() == cldnn::convolution::type_id()) {
@ -1309,4 +1312,35 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
if (should_use_bs_fs_yx_bsv16_fsv16)
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
// This is to avoid using fsv16 for shallow-feature networks.
// This may not be exactly same as real execution graph as layer fusing is not done yet,
// but it is a reasonable approximation.
// Check the expected network efficiency after setting layer optimization attributes.
// If network depth is shallow, it is faster with fsv4.
for (auto& node : get_processing_order()) {
auto &prim = *node;
if (prim.is_in_data_flow() && prim.type() == cldnn::convolution::type_id()) {
size_t num_feature = prim.get_output_layout().size.feature.vector()[0];
size_t num_spatial = 1;
for (auto s : prim.get_output_layout().size.spatial.vector())
num_spatial *= s;
if (lo.get_preferred_format(prim) != format::b_fs_yx_fsv4) {
weight_sum += num_spatial;
weighted_sum_feature_size += num_spatial * num_feature;
}
}
}
size_t weighted_average_feature_depth = weighted_sum_feature_size / std::max(weight_sum, static_cast<size_t>(1));
// Need to confirm that weighted_average_feature_depth > 1 to keep unittest behavior.
if (is_quantized_int8_model && weighted_average_feature_depth < 8 && weighted_average_feature_depth > 1) {
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::fs_b_yx_fsv32_network, 0);
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::b_fs_yx_fsv16_network, 0);
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 0);
}
}