[GPU] Add b_fs_yx_fsv32 format support for border primitive and update tuning logic of convolution_b_fs_zyx_fsv16_imad kernel for Gen12HP (#9112)
This commit is contained in:
parent
576471cc27
commit
7002dd4317
@ -24,6 +24,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableInputLayout(DataLayout::bfzyx);
|
k.EnableInputLayout(DataLayout::bfzyx);
|
||||||
k.EnableInputLayout(DataLayout::bfwzyx);
|
k.EnableInputLayout(DataLayout::bfwzyx);
|
||||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
|
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
|
||||||
|
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
|
||||||
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
|
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||||
|
|
||||||
k.EnableOutputLayout(DataLayout::bfyx);
|
k.EnableOutputLayout(DataLayout::bfyx);
|
||||||
@ -32,6 +33,7 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableOutputLayout(DataLayout::bfzyx);
|
k.EnableOutputLayout(DataLayout::bfzyx);
|
||||||
k.EnableOutputLayout(DataLayout::bfwzyx);
|
k.EnableOutputLayout(DataLayout::bfwzyx);
|
||||||
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
|
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
|
||||||
|
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
|
||||||
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
|
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||||
|
|
||||||
k.EnableTensorOffset();
|
k.EnableTensorOffset();
|
||||||
|
@ -78,6 +78,13 @@ Convolution_kernel_b_fs_zyx_fsv16_imad::GetBlockParams(const convolution_params&
|
|||||||
|
|
||||||
// Estimate basic block params ratio
|
// Estimate basic block params ratio
|
||||||
auto test_block_params = BlockParams{ block_width, 1, 1, simd, in_block_width, 1, 1, 1 };
|
auto test_block_params = BlockParams{ block_width, 1, 1, simd, in_block_width, 1, 1, 1 };
|
||||||
|
|
||||||
|
// Use default block parameters for asymmetric weights quantization for devices with immad support due to unoptimized tuning
|
||||||
|
if ((params.quantization == QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS || params.quantization == QuantizationType::ASYMMETRIC_WEIGHTS) &&
|
||||||
|
params.engineInfo.bIMMADSupport) {
|
||||||
|
return test_block_params;
|
||||||
|
}
|
||||||
|
|
||||||
auto best_block_params_ratio = EstimateBlockParamsRatio(params, test_block_params);
|
auto best_block_params_ratio = EstimateBlockParamsRatio(params, test_block_params);
|
||||||
|
|
||||||
size_t max_slm_split = params.engineInfo.maxWorkGroupSize / simd;
|
size_t max_slm_split = params.engineInfo.maxWorkGroupSize / simd;
|
||||||
|
@ -98,6 +98,11 @@ attach_border_impl::attach_border_impl() {
|
|||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||||
|
|
||||||
|
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||||
|
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||||
|
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||||
|
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||||
|
Loading…
Reference in New Issue
Block a user