[GPU] Fix output format for FP16/FP32 Convolution output (#8223)

2021-10-28 09:37:25 +03:00 · 2021-10-28 09:37:25 +03:00 · db9fbcbec2
commit db9fbcbec2
parent 262c87c6e1
1 changed files with 13 additions and 3 deletions
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@ -281,6 +281,12 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
            (fmt_next == format::bs_fs_yx_bsv32_fsv16 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4))))
            return true;

+        // Remove Reorder for Convolution: b_fs_yx_fsv32 (i8/u8) -> b_fs_yx_fsv16 (fp32/fp16)
+        if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv32 && fmt_next == format::b_fs_yx_fsv16 &&
+            !data_type_traits::is_floating_point(prev_dt) && data_type_traits::is_floating_point(next_dt)) {
+            return true;
+        }
+
        if (next.is_type<quantize>())
            return true;

@ -826,10 +832,14 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
        /* ***************************** OneDNN impls format selection part ****************************** */
        if (i8_u8_input) {
            if ((non_grouped || valid_grouped || valid_int8_dw) && onednn_valid_post_ops && is_2d) {
-                if (input_layout.size.batch[0] % 16 == 0)
+                if (input_layout.size.batch[0] % 16 == 0) {
                    expected_format = cldnn::format::bs_fs_yx_bsv32_fsv32;
-                else
-                    expected_format = cldnn::format::b_fs_yx_fsv32;
+                } else {
+                    if (data_type_traits::is_floating_point(output_layout.data_type))
+                        expected_format = cldnn::format::b_fs_yx_fsv16;
+                    else
+                        expected_format = cldnn::format::b_fs_yx_fsv32;
+                }
            } else if ((_optimization_attributes.b_fs_yx_fsv16_network &&
                       convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim)) && is_2d) {
                if (is_dw)