From 1177d2b282a7699cc398717ccd8a35dd7a375bf0 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Wed, 15 Dec 2021 13:15:13 +0300 Subject: [PATCH] [GPU] Change FQ output for first Convolution (#9200) * update onednn_gpu * [GPU] Add bs_fs_yx_bsv8_fsv4 format Co-authored-by: Kim,SungEun --- .../clDNN/api/intel_gpu/runtime/tensor.hpp | 2 + .../kernel_selector/common/tensor_type.cpp | 6 +++ .../kernel_selector/common/tensor_type.h | 1 + .../include/batch_headers/fetch_data.cl | 33 ++++++++++++++ .../kernel_selector/core/common/jitter.cpp | 2 + .../core/kernel_selector_common.cpp | 1 + .../clDNN/src/impls/ocl/convolution.cpp | 5 +++ .../clDNN/src/impls/ocl/eltwise.cpp | 7 +++ .../src/impls/onednn/concatenation_onednn.cpp | 5 +++ .../src/impls/onednn/convolution_onednn.cpp | 5 +++ .../src/impls/onednn/deconvolution_onednn.cpp | 5 +++ .../clDNN/src/impls/onednn/utils.cpp | 1 + .../clDNN/src/include/to_string_utils.h | 2 + .../clDNN/src/kernel_selector_helper.cpp | 4 ++ .../thirdparty/clDNN/src/layout_optimizer.cpp | 6 ++- .../thirdparty/clDNN/src/program_helpers.cpp | 43 ++++++++----------- 16 files changed, 102 insertions(+), 26 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp index 846cf6e4bf6..aeea86c190e 100644 --- a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp +++ b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp @@ -85,6 +85,7 @@ struct format { bs_fs_zyx_bsv16_fsv16, ///< format used for 3D blocked convolution (batch and features blocked by 16) bs_fs_yx_bsv16_fsv16, ///< format used for 2D blocked convolution (batch and features blocked by 16) bs_fs_yx_bsv4_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 4) + bs_fs_yx_bsv8_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4) bs_fs_yx_bsv4_fsv2, ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2) bs_fs_zyx_bsv4_fsv4, ///< format used for 3D blocked convolution (batch and features blocked by 4) bs_fs_zyx_bsv4_fsv2, ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2) @@ -255,6 +256,7 @@ struct format { { bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}}}, { bs_fs_yx_bsv16_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 16 }, {1, 16}}}}, { bs_fs_yx_bsv4_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 4}}}}, + { bs_fs_yx_bsv8_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 8 }, {1, 4}}}}, { bs_fs_yx_bsv4_fsv2, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 2}}}}, { bs_fs_zyx_bsv4_fsv4, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 4}}}}, { bs_fs_zyx_bsv4_fsv2, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 2}}}}, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp index 97d6c7da91b..ce7ec16ad4e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp @@ -29,6 +29,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{ { DataLayout::bs_fs_zyx_bsv16_fsv16, { 0, 1, 2, -1, 3, 4 } }, { DataLayout::bs_fs_yx_bsv16_fsv16, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv4_fsv4, { 0, 1, -1, -1, 2, 3 } }, + { DataLayout::bs_fs_yx_bsv8_fsv4, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv4_fsv2, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv32_fsv32, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv32_fsv16, { 0, 1, -1, -1, 2, 3 } }, @@ -206,6 +207,11 @@ NDims DataTensor::GetSimpleDims(const std::vector& d, DataLayout l) { newDims[2] = RoundUp(newDims[2], 4); newDims[3] = RoundUp(newDims[3], 4); break; + case bs_fs_yx_bsv8_fsv4: + assert(newDims.size() == 4); + newDims[2] = RoundUp(newDims[2], 4); + newDims[3] = RoundUp(newDims[3], 8); + break; case bs_fs_yx_bsv4_fsv2: assert(newDims.size() == 4); newDims[2] = RoundUp(newDims[2], 2); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h index 7ed87ec644b..fb57e4592dc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h @@ -39,6 +39,7 @@ enum DataLayout { bs_fs_yx_bsv16_fsv16, // batch, feature, 2D spatial. Blocks of 16 batch and channels bs_fs_zyx_bsv16_fsv16, // batch, feature, 3D spatial. Blocks of 16 batch and channels bs_fs_yx_bsv4_fsv4, // batch, feature, 2D spatial. Blocks of 4 batch and 4 channels + bs_fs_yx_bsv8_fsv4, // batch, feature, 2D spatial. Blocks of 8 batch and 4 channels bs_fs_yx_bsv4_fsv2, // batch, feature, 2D spatial. Blocks of 4 batch and 2 channels bs_fs_yx_bsv32_fsv32, // batch, feature, 2D spatial. Blocks of 32 batch and 32 channels bs_fs_yx_bsv32_fsv16, // batch, feature, 2D spatial. Blocks of 32 batch and 16 channels diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl index b35522168b5..5af9d161ce3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl @@ -506,6 +506,22 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x, CAT(prefix, _PAD_BEFORE_SIZE_X), \ CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4) +#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX(prefix, b, f, y, x) \ + get_bs_fs_zyx_bsv_fsv_index( \ + b, f, 0, y, x, \ + CAT(prefix, _SIZE_X), \ + CAT(prefix, _SIZE_Y), \ + CAT(prefix, _SIZE_Z), \ + CAT(prefix, _FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \ + CAT(prefix, _PAD_AFTER_FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_SIZE_Z), \ + CAT(prefix, _PAD_AFTER_SIZE_Z), \ + CAT(prefix, _PAD_BEFORE_SIZE_Y), \ + CAT(prefix, _PAD_AFTER_SIZE_Y), \ + CAT(prefix, _PAD_BEFORE_SIZE_X), \ + CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4) + #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX(prefix, b, f, y, x) \ get_bs_fs_zyx_bsv_fsv_index( \ b, f, 0, y, x, \ @@ -605,6 +621,23 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x, CAT(prefix, _PAD_BEFORE_SIZE_X), \ CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4) +#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX_SAFE(prefix, b, f, y, x) \ + get_bs_fs_zyx_bsv_fsv_index_safe( \ + b, f, 0, y, x, \ + CAT(prefix, _SIZE_X), \ + CAT(prefix, _SIZE_Y), \ + CAT(prefix, _SIZE_Z), \ + CAT(prefix, _FEATURE_NUM), \ + CAT(prefix, _BATCH_NUM), \ + CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \ + CAT(prefix, _PAD_AFTER_FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_SIZE_Z), \ + CAT(prefix, _PAD_AFTER_SIZE_Z), \ + CAT(prefix, _PAD_BEFORE_SIZE_Y), \ + CAT(prefix, _PAD_AFTER_SIZE_Y), \ + CAT(prefix, _PAD_BEFORE_SIZE_X), \ + CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4) + #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX_SAFE(prefix, b, f, y, x) \ get_bs_fs_zyx_bsv_fsv_index_safe( \ b, f, 0, y, x, \ diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index e5927422532..73f164b3659 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -334,6 +334,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const { layout == DataLayout::fs_b_yx_fsv32 || layout == DataLayout::bs_fs_yx_bsv16_fsv16 || layout == DataLayout::bs_fs_yx_bsv4_fsv4 || + layout == DataLayout::bs_fs_yx_bsv8_fsv4 || layout == DataLayout::bs_fs_yx_bsv4_fsv2 || layout == DataLayout::bs_fs_yx_bsv32_fsv16 || layout == DataLayout::bs_fs_yx_bsv32_fsv32) { @@ -346,6 +347,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const { layout == DataLayout::bs_fs_yx_bsv32_fsv32 || layout == DataLayout::bs_fs_yx_bsv32_fsv16 || layout == DataLayout::bs_fs_yx_bsv4_fsv4 || + layout == DataLayout::bs_fs_yx_bsv8_fsv4 || layout == DataLayout::bs_fs_yx_bsv4_fsv2 || layout == DataLayout::bs_fs_yx_bsv16_fsv16) safe_index_func_val = "GET_DATA_" + layout_str + "_INDEX_SAFE(" + _name + ", b, f, y, x)"; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp index 3491e475e07..75349b31f3e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp @@ -105,6 +105,7 @@ std::string toString(DataLayout l) { case kernel_selector::DataLayout::bs_fs_yx_bsv16_fsv16: return "BS_FS_YX_BSV16_FSV16"; case kernel_selector::DataLayout::bs_fs_zyx_bsv16_fsv16: return "BS_FS_ZYX_BSV16_FSV16"; case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv4: return "BS_FS_YX_BSV4_FSV4"; + case kernel_selector::DataLayout::bs_fs_yx_bsv8_fsv4: return "BS_FS_YX_BSV8_FSV4"; case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv2: return "BS_FS_YX_BSV4_FSV2"; case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv32: return "BS_FS_YX_BSV32_FSV32"; case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv16: return "BS_FS_YX_BSV32_FSV16"; diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp index a0c8a0874a1..69d79e22315 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp @@ -225,6 +225,11 @@ attach_convolution_impl::attach_convolution_impl() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp index 3e8c233e126..b15c473fb89 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp @@ -214,6 +214,13 @@ attach_eltwise_impl::attach_eltwise_impl() { std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp index 2367674d762..c9e337a6466 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp @@ -119,6 +119,11 @@ attach_concatenation_onednn::attach_concatenation_onednn() { std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), }); } diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp index c10ea0d5b5d..54e0328fdc9 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp @@ -256,6 +256,11 @@ attach_convolution_onednn::attach_convolution_onednn() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp index bce13ce1698..6b65c181acd 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp @@ -199,6 +199,11 @@ attach_deconvolution_onednn::attach_deconvolution_onednn() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp index a9fd1206e43..72e2effc0e1 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp @@ -91,6 +91,7 @@ dnnl::memory::format_tag convert_data_format(cldnn::format fmt) { case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c; case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c; case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b; + case cldnn::format::bs_fs_yx_bsv8_fsv4: return dnnl::memory::format_tag::ABcd8a4b; case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b; case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c; case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c; diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h index 66975629a08..801895c275c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h +++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h @@ -97,6 +97,8 @@ inline std::string fmt_to_str(format fmt) { return "bs_fs_yx_bsv4_fsv2"; case format::bs_fs_yx_bsv4_fsv4: return "bs_fs_yx_bsv4_fsv4"; + case format::bs_fs_yx_bsv8_fsv4: + return "bs_fs_yx_bsv8_fsv4"; case format::bs_fs_yx_bsv32_fsv32: return "bs_fs_yx_bsv32_fsv32"; case format::b_fs_zyx_fsv16: diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index ac577c70f22..540e84a81ea 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -136,6 +136,8 @@ kernel_selector::data_layout to_data_layout(format f) { return kernel_selector::data_layout::bs_fs_yx_bsv32_fsv16; case format::bs_fs_yx_bsv4_fsv4: return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4; + case format::bs_fs_yx_bsv8_fsv4: + return kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4; case format::bs_fs_yx_bsv4_fsv2: return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv2; case format::bs_fs_yx_bsv32_fsv32: @@ -193,6 +195,8 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) { return cldnn::format::bs_fs_yx_bsv4_fsv2; case kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4: return cldnn::format::bs_fs_yx_bsv4_fsv4; + case kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4: + return cldnn::format::bs_fs_yx_bsv8_fsv4; case kernel_selector::data_layout::bs_fs_yx_bsv32_fsv32: return cldnn::format::bs_fs_yx_bsv32_fsv32; case kernel_selector::data_layout::nv12: diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 6156ef8e8eb..1c4518b1654 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -284,10 +284,11 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, return true; if (next.is_type() && - (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4) && + (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4 || fmt_prev == format::bs_fs_yx_bsv8_fsv4) && ((fmt_next == format::b_fs_yx_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::bs_fs_yx_bsv32_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::bs_fs_yx_bsv4_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || + (fmt_next == format::bs_fs_yx_bsv8_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && (prev_output_layout.size.feature[0] == 3 || (prev_output_layout.size.feature[0] == 4 && (prev_dt == data_types::u8 || prev_dt == data_types::i8)))))) return true; @@ -1269,6 +1270,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv4_fsv4, + format::bs_fs_yx_bsv8_fsv4, format::bs_fs_yx_bsv4_fsv2, format::bs_fs_zyx_bsv4_fsv4, format::bs_fs_zyx_bsv4_fsv2, @@ -1463,7 +1465,7 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (data_type_traits::is_floating_point(conv.get_output_layout().data_type) || ws.spatial[0] != 7 || conv.get_primitive()->groups > 1) expected = format::bfyx; else - expected = format::bs_fs_yx_bsv4_fsv4; + expected = format::bs_fs_yx_bsv8_fsv4; auto conv_output_layout = conv.get_output_layout(); auto weights_layout = conv.weights(0).get_output_layout(); diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp index 09e3fbf6c99..bddd611cf8a 100644 --- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp @@ -139,30 +139,25 @@ std::pair program_helpers::are_layouts_identical(layout const& l1, l return {false, false}; if (l1.get_linear_size() != l2.get_linear_size()) return {false, false}; - if ((l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) || - (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4) || - (l1.format == format::fs_b_yx_fsv32 && l2.format != format::fs_b_yx_fsv32) || - (l2.format == format::fs_b_yx_fsv32 && l1.format != format::fs_b_yx_fsv32) || - (l1.format == format::b_fs_yx_fsv16 && l2.format != format::b_fs_yx_fsv16) || - (l2.format == format::b_fs_yx_fsv16 && l1.format != format::b_fs_yx_fsv16) || - (l1.format == format::b_fs_yx_fsv32 && l2.format != format::b_fs_yx_fsv32) || - (l2.format == format::b_fs_yx_fsv32 && l1.format != format::b_fs_yx_fsv32) || - (l1.format == format::b_fs_zyx_fsv32 && l2.format != format::b_fs_zyx_fsv32) || - (l2.format == format::b_fs_zyx_fsv32 && l1.format != format::b_fs_zyx_fsv32) || - (l1.format == format::b_fs_zyx_fsv16 && l2.format != format::b_fs_zyx_fsv16) || - (l2.format == format::b_fs_zyx_fsv16 && l1.format != format::b_fs_zyx_fsv16) || - (l1.format == format::bs_fs_yx_bsv4_fsv4 && l2.format != format::bs_fs_yx_bsv4_fsv4) || - (l2.format == format::bs_fs_yx_bsv4_fsv4 && l1.format != format::bs_fs_yx_bsv4_fsv4) || - (l1.format == format::bs_fs_yx_bsv4_fsv2 && l2.format != format::bs_fs_yx_bsv4_fsv2) || - (l2.format == format::bs_fs_yx_bsv4_fsv2 && l1.format != format::bs_fs_yx_bsv4_fsv2) || - (l1.format == format::bs_fs_yx_bsv32_fsv16 && l2.format != format::bs_fs_yx_bsv32_fsv16) || - (l2.format == format::bs_fs_yx_bsv32_fsv16 && l1.format != format::bs_fs_yx_bsv32_fsv16) || - (l1.format == format::bs_fs_yx_bsv32_fsv32 && l2.format != format::bs_fs_yx_bsv32_fsv32) || - (l2.format == format::bs_fs_yx_bsv32_fsv32 && l1.format != format::bs_fs_yx_bsv32_fsv32) || - (l1.format == format::bs_fs_yx_bsv16_fsv16 && l2.format != format::bs_fs_yx_bsv16_fsv16) || - (l2.format == format::bs_fs_yx_bsv16_fsv16 && l1.format != format::bs_fs_yx_bsv16_fsv16) || - (l1.format == format::bs_fs_zyx_bsv16_fsv16 && l2.format != format::bs_fs_zyx_bsv16_fsv16) || - (l2.format == format::bs_fs_zyx_bsv16_fsv16 && l1.format != format::bs_fs_zyx_bsv16_fsv16)) + + auto check_format = [&l1, &l2](cldnn::format format) { + return (l1.format == format && l2.format != format) || + (l2.format == format && l1.format != format); + }; + + if (check_format(format::b_fs_yx_fsv4) || + check_format(format::fs_b_yx_fsv32) || + check_format(format::b_fs_yx_fsv16) || + check_format(format::b_fs_yx_fsv32) || + check_format(format::b_fs_zyx_fsv32) || + check_format(format::b_fs_zyx_fsv16) || + check_format(format::bs_fs_yx_bsv4_fsv4) || + check_format(format::bs_fs_yx_bsv8_fsv4) || + check_format(format::bs_fs_yx_bsv4_fsv2) || + check_format(format::bs_fs_yx_bsv32_fsv16) || + check_format(format::bs_fs_yx_bsv32_fsv32) || + check_format(format::bs_fs_yx_bsv16_fsv16) || + check_format(format::bs_fs_zyx_bsv16_fsv16)) return {false, false}; auto l1_pitch = l1.get_pitches();