[GPU] Add b_fs_yx_fsv32 format support for border primitive and update tuning logic of convolution_b_fs_zyx_fsv16_imad kernel for Gen12HP (#9112)

This commit is contained in:
Sergey Shlyapnikov 2021-12-09 18:43:46 +03:00 committed by GitHub
parent 576471cc27
commit 7002dd4317
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 0 deletions

View File

@ -24,6 +24,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::bfwzyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::bfyx);
@ -32,6 +33,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableTensorOffset();

View File

@ -78,6 +78,13 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params&
// Estimate basic block params ratio
auto test_block_params = BlockParams{ block_width, 1, 1, simd, in_block_width, 1, 1, 1 };
// Use default block parameters for asymmetric weights quantization for devices with immad support due to unoptimized tuning
if ((params.quantization == QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS || params.quantization == QuantizationType::ASYMMETRIC_WEIGHTS) &&
params.engineInfo.bIMMADSupport) {
return test_block_params;
}
auto best_block_params_ratio = EstimateBlockParamsRatio(params, test_block_params);
size_t max_slm_split = params.engineInfo.maxWorkGroupSize / simd;

View File

@ -98,6 +98,11 @@ attach_border_impl::attach_border_impl() {
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),