[IE CLDNN] Fixed permute reorder fusing bug for blocked format => non blocked format (#5754)

This commit is contained in:
Taylor Yeonbok Lee 2021-05-24 15:46:11 +09:00 committed by GitHub
parent cec8ad417f
commit e3b4037b69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 2 deletions

View File

@ -55,7 +55,11 @@ KERNEL (permute_tile_8x8_4x4_fsv)(
#ifdef REORDERED_OUTPUT_TILED_ORDER
if (F_NO_REMAINDER_CONDITION) {
unroll_for (uint lh = 0; lh < TILE_SIZE/*8*/; ++lh) {
#ifdef YZ_REMAINDER_CONDITION
unroll_for (uint lh = 0; lh < (((YZ_REMAINDER_CONDITION)) ? YZ_REMAINDER_SIZE : TILE_SIZE); ++lh) {
#else
unroll_for (uint lh = 0; lh < TILE_SIZE; ++lh) {
#endif
// read
const uint input_idx = INPUT0_GET_TILED_INDEX(INPUT0_TILED_ORDER);
INPUTVTYPE read_data = AS_INPUTVTYPE(VLOAD(0, input + input_idx));
@ -77,7 +81,11 @@ KERNEL (permute_tile_8x8_4x4_fsv)(
}
#ifdef F_REMAINDER_CONDITION
else if (F_REMAINDER_CONDITION) {
unroll_for (uint lh = 0; lh < TILE_SIZE/*8*/; ++lh) {
#ifdef YZ_REMAINDER_CONDITION
unroll_for (uint lh = 0; lh < (((YZ_REMAINDER_CONDITION)) ? YZ_REMAINDER_SIZE : TILE_SIZE); ++lh) {
#else
unroll_for (uint lh = 0; lh < TILE_SIZE; ++lh) {
#endif
unroll_for (uint lw = 0; lw < F_REMAINDER_SIZE; ++lw) {
// read
const uint input_idx = INPUT0_GET_TILED_INDEX(INPUT0_TILED_ORDER);

View File

@ -6686,6 +6686,8 @@ struct permute_reorder_params {
// permute_opt for blocked format => reorder to differnt dim
#define CASE_PERMUTE_REORDER_TILED_F32_3 {1, 45, 1, 3, 259}, {0, 4, 1, 2, 3}, {0, 2, 3, 1}, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
#define CASE_PERMUTE_REORDER_TILED_F32_4 {2, 273, 19, 19}, {0, 3, 1, 2}, {0, 2, 3, 1}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
#define CASE_PERMUTE_REORDER_TILED_F32_5 {2, 546, 2, 2}, {0, 3, 1, 2}, {0, 2, 3, 1}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
// permute opt for blocked format => reorder to different dim/type
#define CASE_PERMUTE_REORDER_TILED_I8_4 {1, 45, 1, 3, 259}, {0, 4, 1, 2, 3}, {0, 2, 3, 1}, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
@ -6753,6 +6755,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, permute_redundant_reorder,
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F32_1, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F32_2, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F32_3, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F32_4, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_I8_4, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F16_5, 3, 4},
permute_reorder_params{CASE_PERMUTE_REORDER_TILED_F16_6, 3, 4},