[GPU] Add b_fs_yx_fsv32 format support for border primitive and update tuning logic of convolution_b_fs_zyx_fsv16_imad kernel for Gen12HP (#9112)

2021-12-09 18:43:46 +03:00 · 2021-12-09 18:43:46 +03:00 · 7002dd4317
commit 7002dd4317
parent 576471cc27
3 changed files with 14 additions and 0 deletions
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
@ -24,6 +24,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
    k.EnableInputLayout(DataLayout::bfzyx);
    k.EnableInputLayout(DataLayout::bfwzyx);
    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
    k.EnableOutputLayout(DataLayout::bfyx);
@ -32,6 +33,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
    k.EnableOutputLayout(DataLayout::bfzyx);
    k.EnableOutputLayout(DataLayout::bfwzyx);
    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
    k.EnableTensorOffset();
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
@ -78,6 +78,13 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params&
    // Estimate basic block params ratio
    auto test_block_params = BlockParams{ block_width, 1, 1, simd, in_block_width, 1, 1, 1 };
    // Use default block parameters for asymmetric weights quantization for devices with immad support due to unoptimized tuning
    if ((params.quantization == QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS || params.quantization == QuantizationType::ASYMMETRIC_WEIGHTS) &&
        params.engineInfo.bIMMADSupport) {
        return test_block_params;
    }
    auto best_block_params_ratio = EstimateBlockParamsRatio(params, test_block_params);
    size_t max_slm_split = params.engineInfo.maxWorkGroupSize / simd;
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
@ -98,6 +98,11 @@ attach_border_impl::attach_border_impl() {
        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),