diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp index 19e77cdbf33..2f76e3d43c9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp @@ -237,6 +237,10 @@ struct format { g_os_is_zyx_isv8_osv16_isv2, g_os_is_yx_isv8_osv16_isv2, g_os_is_zyx_isv16_osv16, + g_os_zy_is_x_osv8_isv2, + g_os_zy_is_x_osv8_isv4, + g_os_zyx_is_osv8_isv2, + g_os_zyx_is_osv8_isv4, g_os_zyx_is_osv16_isv4, ///< format for imad deconvolution g_os_zyx_is_osv16_isv16, ///< format for imad deconvolution g_os_zyx_is_osv16_isv32, ///< format for imad deconvolution @@ -259,6 +263,8 @@ struct format { g_os_is_yx_osa2_isa8_osv16_isv4, g_os_is_zyx_osa4_isa8_osv8_isv2, g_os_is_zyx_osa4_isa8_osv8_isv4, + g_os_is_zyx_isa8_osv8_isv2, + g_os_is_zyx_isa8_osv8_isv4, g_os_yx_is_osv8_isv2, g_os_yx_is_osv8_isv4, g_os_y_is_x_osv8_isv2, diff --git a/src/plugins/intel_gpu/src/graph/gather.cpp b/src/plugins/intel_gpu/src/graph/gather.cpp index 5cd7053f749..624869bb611 100644 --- a/src/plugins/intel_gpu/src/graph/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/gather.cpp @@ -48,6 +48,8 @@ layout gather_inst::calc_output_layout(gather_node const& node, kernel_impl_para switch (input_layout.format) { case format::bfyx: case format::bfzyx: + case format::b_fs_zyx_fsv16: + case format::b_fs_zyx_fsv32: output_format = format::get_default_format(dims_converted.size()); break; default: diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index e17c6f8c115..c01cf59b203 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -210,7 +210,6 @@ attach_fully_connected_onednn::attach_fully_connected_onednn() { }; std::vector fmt = { format::bfyx, - format::bfzyx, }; implementation_map::add(impl_types::onednn, fully_connected_onednn::create, dt, fmt); } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 70bb023572a..b2ce2930fd2 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -512,6 +512,26 @@ cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped) { && blk.inner_idxs[0] == 1 && blk.inner_idxs[1] == 2) { if (compare_strides(order, {0, 1, 3, 4, 2})) return cldnn::format::g_os_yx_is_osv8_isv4; else if (compare_strides(order, {0, 1, 3, 2, 4})) return cldnn::format::g_os_y_is_x_osv8_isv4; + } else if (desc.data.ndims == 6 && blk.inner_nblks == 2 + && blk.inner_blks[0] == 8 && blk.inner_blks[1] == 2 + && blk.inner_idxs[0] == 1 && blk.inner_idxs[1] == 2) { + if (compare_strides(order, {0, 1, 3, 4, 5, 2})) return cldnn::format::g_os_zyx_is_osv8_isv2; + else if (compare_strides(order, {0, 1, 3, 4, 2, 5})) return cldnn::format::g_os_zy_is_x_osv8_isv2; + } else if (desc.data.ndims == 6 && blk.inner_nblks == 2 + && blk.inner_blks[0] == 8 && blk.inner_blks[1] == 4 + && blk.inner_idxs[0] == 1 && blk.inner_idxs[1] == 2) { + if (compare_strides(order, {0, 1, 3, 4, 5, 2})) return cldnn::format::g_os_zyx_is_osv8_isv4; + else if (compare_strides(order, {0, 1, 3, 4, 2, 5})) return cldnn::format::g_os_zy_is_x_osv8_isv4; + } else if (desc.data.ndims == 6 && blk.inner_nblks == 3 + && blk.inner_blks[0] == 8 && blk.inner_blks[1] == 8 && blk.inner_blks[2] == 2 + && blk.inner_idxs[0] == 2 && blk.inner_idxs[1] == 1 && blk.inner_idxs[2] == 2 + && compare_strides(order, {0, 1, 2, 3, 4, 5})) { + return cldnn::format::g_os_is_zyx_isa8_osv8_isv2; + } else if (desc.data.ndims == 6 && blk.inner_nblks == 3 + && blk.inner_blks[0] == 8 && blk.inner_blks[1] == 8 && blk.inner_blks[2] == 4 + && blk.inner_idxs[0] == 2 && blk.inner_idxs[1] == 1 && blk.inner_idxs[2] == 2 + && compare_strides(order, {0, 1, 2, 3, 4, 5})) { + return cldnn::format::g_os_is_zyx_isa8_osv8_isv4; } } else { if (desc.data.ndims == 4 && blk.inner_nblks == 4 diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index a10efb9f5b0..7fb3b1b6947 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -439,6 +439,10 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) { return kernel_selector::weights_layout::g_is_os_zyx_isv16_osv16; case format::g_is_os_yx_isv16_osv16: return kernel_selector::weights_layout::g_is_os_yx_isv16_osv16; + case cldnn::format::g_os_is_zyx_isa8_osv8_isv2: + return kernel_selector::weights_layout::g_os_is_zyx_isa8_osv8_isv2; + case cldnn::format::g_os_is_zyx_isa8_osv8_isv4: + return kernel_selector::weights_layout::g_os_is_zyx_isa8_osv8_isv4; case format::g_os_is_zyx_isv8_osv16_isv2: return kernel_selector::weights_layout::g_os_is_zyx_isv8_osv16_isv2; case format::g_os_is_yx_isv8_osv16_isv2: @@ -465,6 +469,14 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) { return kernel_selector::weights_layout::g_os_is_yx_osa2_isa8_osv16_isv2; case format::g_os_zyx_is_osv16_isv4: return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4; + case format::g_os_zy_is_x_osv8_isv2: + return kernel_selector::weights_layout::g_os_zy_is_x_osv8_isv2; + case format::g_os_zy_is_x_osv8_isv4: + return kernel_selector::weights_layout::g_os_zy_is_x_osv8_isv4; + case format::g_os_zyx_is_osv8_isv2: + return kernel_selector::weights_layout::g_os_zyx_is_osv8_isv2; + case format::g_os_zyx_is_osv8_isv4: + return kernel_selector::weights_layout::g_os_zyx_is_osv8_isv4; case format::g_os_zyx_is_osv16_isv16: return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16; case format::g_os_zyx_is_osv16_isv32: @@ -581,6 +593,10 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { return cldnn::format::g_os_is_yx_osa4_isa8_osv8_isv4; case kernel_selector::weights_layout::g_os_is_zyx_osa4_isa8_osv8_isv4: return cldnn::format::g_os_is_zyx_osa4_isa8_osv8_isv4; + case kernel_selector::weights_layout::g_os_is_zyx_isa8_osv8_isv2: + return cldnn::format::g_os_is_zyx_isa8_osv8_isv2; + case kernel_selector::weights_layout::g_os_is_zyx_isa8_osv8_isv4: + return cldnn::format::g_os_is_zyx_isa8_osv8_isv4; case kernel_selector::weights_layout::g_os_is_zyx_osa4_isa8_osv8_isv2: return cldnn::format::g_os_is_zyx_osa4_isa8_osv8_isv2; case kernel_selector::weights_layout::g_os_is_yx_osv8_isv2: @@ -711,6 +727,14 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { return cldnn::format::g_os_is_zyx_osv16_isv16; case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4: return cldnn::format::g_os_zyx_is_osv16_isv4; + case kernel_selector::weights_layout::g_os_zy_is_x_osv8_isv2: + return cldnn::format::g_os_zy_is_x_osv8_isv2; + case kernel_selector::weights_layout::g_os_zy_is_x_osv8_isv4: + return cldnn::format::g_os_zy_is_x_osv8_isv4; + case kernel_selector::weights_layout::g_os_zyx_is_osv8_isv2: + return cldnn::format::g_os_zyx_is_osv8_isv2; + case kernel_selector::weights_layout::g_os_zyx_is_osv8_isv4: + return cldnn::format::g_os_zyx_is_osv8_isv4; case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16: return cldnn::format::g_os_zyx_is_osv16_isv16; case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv32: diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/fetch_weights.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/fetch_weights.cl index 9e9cb16fc13..0d290998d93 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/fetch_weights.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/fetch_weights.cl @@ -478,51 +478,47 @@ inline uint get_is_os_zyx_isa8_osv8_isv4_index(uint o, uint i, uint z, uint y, u return idx; } -inline uint get_g_os_is_zyx_isa8_osv8_isv2_index(uint g, uint o, uint i, uint z, uint y, uint x, uint size_x, - uint size_y, uint size_z, uint size_ifm, uint size_ofm, uint offset) +inline uint get_g_os_is_zyx_isa_osv_isv_index(uint g, uint o, uint i, uint z, uint y, uint x, + uint size_x, uint size_y, uint size_z, uint size_ifm, uint size_ofm, uint offset, + uint isa, uint osv, uint isv) { - const uint isv2_idx = i % 2; - const uint osv_idx = o % 8; - const uint isv1_idx = (i / 2) % 8; - const uint is_idx = i / 16; - const uint os_idx = o / 8; + const uint isv2_idx = i % isv; + const uint osv_idx = o % osv; + const uint isv1_idx = (i / isv) % isa; + const uint is_idx = i / (isa * isv); + const uint os_idx = o / osv; - const uint if_16_aligned = ((size_ifm + 15) / 16); - const uint of_8_aligned = ((size_ofm + 7) / 8); + const uint if_aligned = ((size_ifm + (isa * isv) - 1) / (isa * isv)); + const uint of_aligned = ((size_ofm + (osv - 1)) / osv); size_t idx = offset + isv2_idx + - osv_idx * 2 + - isv1_idx * 8 * 2 + - x * 8 * 8 * 2 + - y * size_x * 8 * 8 * 2 + - z * size_y * size_x * 8 * 8 * 2 + - is_idx * size_z * size_y * size_x * 8 * 8 * 2 + - os_idx * if_16_aligned * size_z * size_y * size_x * 8 * 8 * 2 + - g * of_8_aligned * if_16_aligned * size_z * size_y * size_x * 8 * 8 * 2; + osv_idx * isv + + isv1_idx * osv * isv + + x * isa * osv * isv + + y * size_x * isa * osv * isv + + z * size_y * size_x * isa * osv * isv + + is_idx * size_z * size_y * size_x * isa * osv * isv + + os_idx * if_aligned * size_z * size_y * size_x * isa * osv * isv + + g * of_aligned * if_aligned * size_z * size_y * size_x * isa * osv * isv; return idx; } -#define GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(prefix, g, o, i, z, y, x) \ - get_g_os_is_zyx_isa8_osv8_isv2_index( \ - g, o, i, z, y, x, \ - CAT(prefix, _SIZE_X), \ - CAT(prefix, _SIZE_Y), \ - CAT(prefix, _SIZE_Z), \ - CAT(prefix, _IFM_NUM), \ - CAT(prefix, _OFM_NUM), \ - CAT(prefix, _OFFSET)) +#define GET_FILTER_G_OS_IS_ZYX_ISA_OSV_ISV_INDEX(prefix, g, o, i, z, y, x, isa, osv, isv) \ + get_g_os_is_zyx_isa_osv_isv_index( \ + g, o, i, z, y, x, \ + CAT(prefix, _SIZE_X), \ + CAT(prefix, _SIZE_Y), \ + CAT(prefix, _SIZE_Z), \ + CAT(prefix, _IFM_NUM), \ + CAT(prefix, _OFM_NUM), \ + CAT(prefix, _OFFSET), \ + isa, osv, isv) -#define GET_FILTER_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(prefix, o, i, z, y, x) \ - get_g_os_is_zyx_isa8_osv8_isv2_index( \ - 0, o, i, z, y, x, \ - CAT(prefix, _SIZE_X), \ - CAT(prefix, _SIZE_Y), \ - CAT(prefix, _SIZE_Z), \ - CAT(prefix, _IFM_NUM), \ - CAT(prefix, _OFM_NUM), \ - CAT(prefix, _OFFSET)) +#define GET_FILTER_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(prefix, o, i, z, y, x) GET_FILTER_G_OS_IS_ZYX_ISA_OSV_ISV_INDEX(prefix, 0, o, i, z, y, x, 8, 8, 2) +#define GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(prefix, g, o, i, z, y, x) GET_FILTER_G_OS_IS_ZYX_ISA_OSV_ISV_INDEX(prefix, g, o, i, z, y, x, 8, 8, 2) +#define GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV4_INDEX(prefix, g, o, i, z, y, x) GET_FILTER_G_OS_IS_ZYX_ISA_OSV_ISV_INDEX(prefix, g, o, i, z, y, x, 8, 8, 4) #define GET_FILTER_IS_OS_ZYX_ISA8_OSV8_ISV2_INDEX(prefix, o, i, z, y, x) \ get_is_os_zyx_isa8_osv8_isv2_index( \ @@ -1722,6 +1718,8 @@ inline uint get_g_os_zyx_is_osv_isv_index(uint g, uint o, uint i, uint z, uint y #define GET_FILTER_G_OS_YX_IS_OSV8_ISV2_INDEX(tensor, g, o, i, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 2) #define GET_FILTER_G_OS_YX_IS_OSV8_ISV4_INDEX(tensor, g, o, i, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 4) +#define GET_FILTER_G_OS_ZYX_IS_OSV8_ISV2_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 2) +#define GET_FILTER_G_OS_ZYX_IS_OSV8_ISV4_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 4) #define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 4) #define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 16) #define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 32) @@ -1789,7 +1787,7 @@ inline uint get_g_os_y_is_x_osv_isv_index(uint g, uint o, uint i, uint y, uint x #define GET_FILTER_G_OS_Y_IS_X_OSV8_ISV2_INDEX(tensor, g, o, i, y, x) GET_FILTER_G_OS_Y_IS_X_OSV_ISV_INDEX(tensor, g, o, i, y, x, 8, 2) #define GET_FILTER_G_OS_Y_IS_X_OSV8_ISV4_INDEX(tensor, g, o, i, y, x) GET_FILTER_G_OS_Y_IS_X_OSV_ISV_INDEX(tensor, g, o, i, y, x, 8, 4) -inline uint get_os_zy_is_x_osv_isv_index(uint o, uint i, uint z, uint y, uint x, +inline uint get_g_os_zy_is_x_osv_isv_index(uint g, uint o, uint i, uint z, uint y, uint x, uint o_size, uint i_size, uint z_size, uint y_size, uint x_size, uint osv, uint isv) { uint is_size = (i_size + isv - 1) / isv; @@ -1807,6 +1805,7 @@ inline uint get_os_zy_is_x_osv_isv_index(uint o, uint i, uint z, uint y, uint x, uint y_pitch = is_pitch * is_size; uint z_pitch = y_pitch * y_size; uint os_pitch = z_pitch * z_size; + uint g_pitch = os_pitch * os_size; uint index = 0; index += isv_index * isv_pitch; @@ -1816,12 +1815,13 @@ inline uint get_os_zy_is_x_osv_isv_index(uint o, uint i, uint z, uint y, uint x, index += y * y_pitch; index += z * z_pitch; index += os_index * os_pitch; + index += g * g_pitch; return index; } -#define GET_FILTER_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, o, i, z, y, x, osv, isv) \ - get_os_zy_is_x_osv_isv_index( \ - o, i, z, y, x, \ +#define GET_FILTER_G_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, osv, isv) \ + get_g_os_zy_is_x_osv_isv_index( \ + g, o, i, z, y, x, \ CAT(tensor, _OFM_NUM), \ CAT(tensor, _IFM_NUM), \ CAT(tensor, _SIZE_Z), \ @@ -1830,5 +1830,7 @@ inline uint get_os_zy_is_x_osv_isv_index(uint o, uint i, uint z, uint y, uint x, osv, isv) -#define GET_FILTER_OS_ZY_IS_X_OSV8_ISV2_INDEX(tensor, o, i, z, y, x) GET_FILTER_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, o, i, z, y, x, 8, 2) -#define GET_FILTER_OS_ZY_IS_X_OSV8_ISV4_INDEX(tensor, o, i, z, y, x) GET_FILTER_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, o, i, z, y, x, 8, 4) +#define GET_FILTER_OS_ZY_IS_X_OSV8_ISV2_INDEX(tensor, o, i, z, y, x) GET_FILTER_G_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, 0, o, i, z, y, x, 8, 2) +#define GET_FILTER_OS_ZY_IS_X_OSV8_ISV4_INDEX(tensor, o, i, z, y, x) GET_FILTER_G_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, 0, o, i, z, y, x, 8, 4) +#define GET_FILTER_G_OS_ZY_IS_X_OSV8_ISV2_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 2) +#define GET_FILTER_G_OS_ZY_IS_X_OSV8_ISV4_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZY_IS_X_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 8, 4) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl index b9fb9173556..e8c42ffae5f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_weights.cl @@ -75,6 +75,8 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x return GET_FILTER_IS_OS_YX_ISV16_OSV8_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE); #elif defined INPUT0_LAYOUT_G_OS_IS_ZYX_ISA8_OSV8_ISV2 return GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(INPUT0, g, o, i, z, y, x); +#elif defined INPUT0_LAYOUT_G_OS_IS_ZYX_ISA8_OSV8_ISV4 + return GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV4_INDEX(INPUT0, g, o, i, z, y, x); #elif defined INPUT0_LAYOUT_OS_IS_ZYX_ISA8_OSV8_ISV2 return GET_FILTER_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(INPUT0, o, i, z, y, x); #elif defined INPUT0_LAYOUT_IS_OS_ZYX_ISA8_OSV8_ISV2 @@ -195,6 +197,12 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x return GET_FILTER_G_OS_Y_IS_X_OSV8_ISV2_INDEX(INPUT0, g, o, i, y, x); #elif defined INPUT0_LAYOUT_G_OS_Y_IS_X_OSV8_ISV4 return GET_FILTER_G_OS_Y_IS_X_OSV8_ISV4_INDEX(INPUT0, g, o, i, y, x); +#elif defined INPUT0_LAYOUT_G_OS_ZY_IS_X_OSV8_ISV2 + return GET_FILTER_G_OS_ZY_IS_X_OSV8_ISV2_INDEX(INPUT0, g, o, i, z, y, x); +#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV8_ISV2 + return GET_FILTER_G_OS_ZYX_IS_OSV8_ISV2_INDEX(INPUT0, g, o, i, z, y, x); +#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV8_ISV4 + return GET_FILTER_G_OS_ZYX_IS_OSV8_ISV4_INDEX(INPUT0, g, o, i, z, y, x); #else #error reorder_weights.cl: input format - not supported #endif @@ -317,6 +325,8 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint return GET_FILTER_IS_OS_YX_ISV16_OSV8_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE); #elif defined OUTPUT_LAYOUT_G_OS_IS_ZYX_ISA8_OSV8_ISV2 return GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(OUTPUT, g, o, i, z, y, x); +#elif defined OUTPUT_LAYOUT_G_OS_IS_ZYX_ISA8_OSV8_ISV4 + return GET_FILTER_G_OS_IS_ZYX_ISA8_OSV8_ISV4_INDEX(OUTPUT, g, o, i, z, y, x); #elif defined OUTPUT_LAYOUT_OS_IS_ZYX_ISA8_OSV8_ISV2 return GET_FILTER_OS_IS_ZYX_ISA8_OSV8_ISV2_INDEX(OUTPUT, o, i, z, y, x); #elif defined OUTPUT_LAYOUT_IS_OS_ZYX_ISA8_OSV8_ISV2 @@ -435,6 +445,14 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint return GET_FILTER_G_OS_Y_IS_X_OSV8_ISV2_INDEX(OUTPUT, g, o, i, y, x); #elif defined OUTPUT_LAYOUT_G_OS_Y_IS_X_OSV8_ISV4 return GET_FILTER_G_OS_Y_IS_X_OSV8_ISV4_INDEX(OUTPUT, g, o, i, y, x); +#elif defined OUTPUT_LAYOUT_G_OS_ZY_IS_X_OSV8_ISV2 + return GET_FILTER_G_OS_ZY_IS_X_OSV8_ISV2_INDEX(OUTPUT, g, o, i, z, y, x); +#elif defined OUTPUT_LAYOUT_G_OS_ZY_IS_X_OSV8_ISV4 + return GET_FILTER_G_OS_ZY_IS_X_OSV8_ISV4_INDEX(OUTPUT, g, o, i, z, y, x); +#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV8_ISV2 + return GET_FILTER_G_OS_ZYX_IS_OSV8_ISV2_INDEX(OUTPUT, g, o, i, z, y, x); +#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV8_ISV4 + return GET_FILTER_G_OS_ZYX_IS_OSV8_ISV4_INDEX(OUTPUT, g, o, i, z, y, x); #else #error reorder_weights.cl: output format - not supported #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 37fadd81e4d..34b8c560ccb 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -391,6 +391,10 @@ std::string toString(WeightsLayout layout) { case WeightsLayout::g_os_is_zyx_isv8_osv16_isv2: return "G_OS_IS_ZYX_ISV8_OSV16_ISV2"; case WeightsLayout::g_os_is_yx_isv8_osv16_isv2: return "G_OS_IS_YX_ISV8_OSV16_ISV2"; case WeightsLayout::g_os_is_zyx_isv16_osv16: return "G_OS_IS_ZYX_ISV16_OSV16"; + case WeightsLayout::g_os_zy_is_x_osv8_isv2: return "G_OS_ZY_IS_X_OSV8_ISV2"; + case WeightsLayout::g_os_zy_is_x_osv8_isv4: return "G_OS_ZY_IS_X_OSV8_ISV4"; + case WeightsLayout::g_os_zyx_is_osv8_isv2: return "G_OS_ZYX_IS_OSV8_ISV2"; + case WeightsLayout::g_os_zyx_is_osv8_isv4: return "G_OS_ZYX_IS_OSV8_ISV4"; case WeightsLayout::giy_xs_os_xsv2_osv16__ao32: return "GIY_XS_OS_XSV2_OSV16__AO32"; case WeightsLayout::giy_xs_os_xsv2_osv8__ao32: return "GIY_XS_OS_XSV2_OSV8__AO32"; case WeightsLayout::gs_oi_yxs_gsv4_yxsv4: return "GS_OI_YXS_GSV4_YXSV4"; @@ -400,6 +404,8 @@ std::string toString(WeightsLayout layout) { case WeightsLayout::g_os_is_yx_osa2_isa8_osv8_isv2: return "G_OS_IS_YX_OSA2_ISA8_OSV8_ISV2"; case WeightsLayout::g_os_is_yx_osa4_isa8_osv8_isv4: return "G_OS_IS_YX_OSA4_ISA8_OSV8_ISV4"; case WeightsLayout::g_os_is_zyx_osa4_isa8_osv8_isv4: return "G_OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4"; + case WeightsLayout::g_os_is_zyx_isa8_osv8_isv2: return "G_OS_IS_ZYX_ISA8_OSV8_ISV2"; + case WeightsLayout::g_os_is_zyx_isa8_osv8_isv4: return "G_OS_IS_ZYX_ISA8_OSV8_ISV4"; case WeightsLayout::os_is_yx_osa4_isa8_osv8_isv2: return "OS_IS_YX_OSA4_ISA8_OSV8_ISV2"; case WeightsLayout::os_is_zyx_osa4_isa8_osv8_isv2: return "OS_IS_ZYX_OSA4_ISA8_OSV8_ISV2"; case WeightsLayout::os_is_zyx_osa4_isa8_osv8_isv4: return "OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4"; diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp index 8744c442e5c..e2dcdecd7ed 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.cpp @@ -103,6 +103,8 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{ { WeightsLayout::g_os_is_yx_osa2_isa8_osv8_isv2, { 0, 1, -1, 2, 3, 4 } }, { WeightsLayout::g_os_is_yx_osa4_isa8_osv8_isv4, { 0, 1, -1, 2, 3, 4 } }, { WeightsLayout::g_os_is_zyx_osa4_isa8_osv8_isv4, { 0, 1, 2, 3, 4, 5 } }, + { WeightsLayout::g_os_is_zyx_isa8_osv8_isv2, { 0, 1, 2, 3, 4, 5 } }, + { WeightsLayout::g_os_is_zyx_isa8_osv8_isv4, { 0, 1, 2, 3, 4, 5 } }, { WeightsLayout::os_is_yx_osa4_isa8_osv8_isv2, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::os_is_zyx_osa4_isa8_osv8_isv2, { 0, 1, 2, 3, 4, -1 } }, { WeightsLayout::os_is_zyx_osa4_isa8_osv8_isv4, { 0, 1, 2, 3, 4, -1 } }, @@ -179,6 +181,10 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{ { WeightsLayout::g_os_is_zyx_isv8_osv16_isv2, { 0, 1, 2, 3, 4, 5 } }, { WeightsLayout::g_os_is_yx_isv8_osv16_isv2, { 0, 1, -1, 2, 3, 4 } }, { WeightsLayout::g_os_is_zyx_isv16_osv16, { 0, 1, 2, 3, 4, 5 } }, + { WeightsLayout::g_os_zy_is_x_osv8_isv2, { 0, 2, 3, 1, 4, 5 } }, + { WeightsLayout::g_os_zy_is_x_osv8_isv4, { 0, 2, 3, 1, 4, 5 } }, + { WeightsLayout::g_os_zyx_is_osv8_isv2, { 1, 2, 3, 0, 4, 5 } }, + { WeightsLayout::g_os_zyx_is_osv8_isv4, { 1, 2, 3, 0, 4, 5 } }, { WeightsLayout::giy_xs_os_xsv2_osv16__ao32, { 1, 2, -1, 3, 0, 4 } }, { WeightsLayout::giy_xs_os_xsv2_osv8__ao32, { 1, 2, -1, 3, 0, 4 } }, { WeightsLayout::g_os_is_yx_isv16_osv16, { 0, 1, -1, 2, 3, 4 } }, diff --git a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h index e33ae51d854..a38cfa50434 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h +++ b/src/plugins/intel_gpu/src/kernel_selector/tensor_type.h @@ -127,6 +127,8 @@ enum WeightsLayout { g_os_is_yx_osa2_isa8_osv8_isv2, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, g_os_is_yx_osa4_isa8_osv8_isv4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, g_os_is_zyx_osa4_isa8_osv8_isv4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, + g_os_is_zyx_isa8_osv8_isv2, + g_os_is_zyx_isa8_osv8_isv4, os_is_yx_osa4_isa8_osv8_isv2, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, os_is_zyx_osa4_isa8_osv8_isv2, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, os_is_zyx_osa4_isa8_osv8_isv4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, @@ -197,6 +199,10 @@ enum WeightsLayout { g_os_is_zyx_isv8_osv16_isv2, g_os_is_yx_isv8_osv16_isv2, g_os_is_zyx_isv16_osv16, + g_os_zy_is_x_osv8_isv2, + g_os_zy_is_x_osv8_isv4, + g_os_zyx_is_osv8_isv2, + g_os_zyx_is_osv8_isv4, g_os_is_zyx_osv16_isv16, giy_xs_os_xsv2_osv16__ao32, giy_xs_os_xsv2_osv8__ao32, diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_pooling_to_reduce.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_pooling_to_reduce.cpp index c375ef81173..f70323d14aa 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_pooling_to_reduce.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_pooling_to_reduce.cpp @@ -46,7 +46,7 @@ ov::intel_gpu::ConvertAvgPoolingToReduce::ConvertAvgPoolingToReduce() { ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{axes_shape.size()}, axes_shape), true); - reduce->set_friendly_name(pool->get_friendly_name() + "/Reduce"); + reduce->set_friendly_name(pool->get_friendly_name()); copy_runtime_info(pool, reduce); replace_node(pool, reduce); diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp index ced9d018b1c..fad495981ec 100644 --- a/src/plugins/intel_gpu/src/runtime/format.cpp +++ b/src/plugins/intel_gpu/src/runtime/format.cpp @@ -168,6 +168,10 @@ static const std::map format_traits_map { FMT_TRAITS(g_os_is_zyx_isv8_osv16_isv2, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{1, 8}, {0, 16}, {1, 2}}), FMT_TRAITS(g_os_is_yx_isv8_osv16_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{1, 8}, {0, 16}, {1, 2}}), FMT_TRAITS(g_os_is_zyx_isv16_osv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 16}, {1, 16}}), + FMT_TRAITS(g_os_zy_is_x_osv8_isv2, 1, 1, 3, 1, {0, 1, 3, 4, 2, 5}, "gozyix", "oixyz?g", {{0, 8}, {1, 2}}), + FMT_TRAITS(g_os_zy_is_x_osv8_isv4, 1, 1, 3, 1, {0, 1, 3, 4, 2, 5}, "gozyix", "oixyz?g", {{0, 8}, {1, 4}}), + FMT_TRAITS(g_os_zyx_is_osv8_isv2, 1, 1, 3, 1, {0, 1, 3, 4, 5, 2}, "gozyxi", "oixyz?g", {{0, 8}, {1, 2}}), + FMT_TRAITS(g_os_zyx_is_osv8_isv4, 1, 1, 3, 1, {0, 1, 3, 4, 5, 2}, "gozyxi", "oixyz?g", {{0, 8}, {1, 4}}), FMT_TRAITS(g_os_is_yx_osv8_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 8}, {1, 2}}), FMT_TRAITS(g_os_is_yx_osv8_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 8}, {1, 4}}), FMT_TRAITS(g_os_is_yx_osv16_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 16}, {1, 4}}), @@ -183,6 +187,8 @@ static const std::map format_traits_map { FMT_TRAITS(g_os_is_yx_osa2_isa8_osv8_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 16}, {1, 16}}), FMT_TRAITS(g_os_is_yx_osa4_isa8_osv8_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 32}}), FMT_TRAITS(g_os_is_zyx_osa4_isa8_osv8_isv4, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 32}, {1, 32}}), + FMT_TRAITS(g_os_is_zyx_isa8_osv8_isv2, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{1, 8}, {0, 8}, {1, 2}}), + FMT_TRAITS(g_os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{1, 8}, {0, 8}, {1, 4}}), FMT_TRAITS(g_os_is_yx_osa4_isa8_osv8_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 16}}), FMT_TRAITS(g_os_is_zyx_osa4_isa8_osv8_isv2, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 32}, {1, 16}}), FMT_TRAITS(g_os_is_yx_osa2_isa8_osv16_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 32}}),