diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp new file mode 100644 index 00000000000..231222f93d2 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp @@ -0,0 +1,336 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cldnn { +/// @addtogroup cpp_api C++ API +/// @{ + +/// @addtogroup cpp_memory Memory description and management +/// @{ + +/// @brief Format information helper class. +struct format_traits { + /// @brief String representation of a format. + std::string str; + /// @brief Number of batch dimensions in a format. + size_t batch_num; + /// @brief Number of feature map/channel dimensions in a format. + size_t feature_num; + /// @brief Number of spatial (x,y) dimensions in a format. + size_t spatial_num; + /// @brief Number of groups in a format. + size_t group_num; + /// @brief Dimensions order. Default {0, 1, 2, ... rank } + std::vector _order; + /// @brief Dimensions changing order from rare to often. + std::string order; + /// @brief Dimensions order for internal storage. + std::string internal_order; + /// @brief Block sizes as a vector of pairs of dimension number and block size ordered from rare to often. + std::vector> block_sizes; + /// @brief Characters representing batch dimensions in an order. + static const char* batch_chars() { return "bno"; } + /// @brief Characters representing feature map/channel dimensions in an order. + static const char* feature_chars() { return "fic"; } + /// @brief Characters representing spatial dimensions in an order. + static const char* spatial_chars() { return "xyzhsw"; } + /// @brief Characters representing group dimensions in an order. + static const char* group_chars() { return "g"; } + /// @brief Checks if @p c represents batch dimension. + static bool is_batch_char(char c) { return std::string(batch_chars()).find_first_of(c) != std::string::npos; } + /// @brief Checks if @p c represents feature map/channel dimension. + static bool is_feature_char(char c) { return std::string(feature_chars()).find_first_of(c) != std::string::npos; } + /// @brief Checks if @p c represents spatial dimension. + static bool is_spatial_char(char c) { return std::string(spatial_chars()).find_first_of(c) != std::string::npos; } + /// @brief Checks if @p c represents group dimensions. + static bool is_group_char(char c) { return std::string(group_chars()).find_first_of(c) != std::string::npos; } +}; + +/// @brief Represents memory formats (orders). +/// @n In CNN most of data is described as 4 dimensional blocks. In GPU plugin we describe memory with 4 letters +/// - b - number of blocks in batch. For weights formats: output features - conv, neurons - inner product +/// - f - number of feature maps, features or channels. For weights formats: input features - conv, inputs, inner product +/// - x - spatial, width +/// - y - spatial, height +/// /n +/// For explanation how each format type is implemented in memory we will use naming shown bellow: +struct format { + enum type : int32_t { + // Data formats + bfyx, ///< the most common format for activations in clDNN. + bfzyx, ///< format for 5d data tensors + bfwzyx, ///< batch, feature, 4D spatial + yxfb, ///< batch first, feature and than spatials + byxf, ///< used in bitmaps, input from user i.e b images of RGB format + fyxb, ///< format not used inside clDNN, but supported in reorder as extension + ///< for user provided formats. + b_fs_yx_fsv16, ///< format used for blocked convolution + b_fs_yx_fsv32, ///< format used for blocked int8 convolution + b_fs_zyx_fsv16, ///< format used for 3D blocked convolution (features blocked by 16) + b_fs_zyx_fsv32, ///< format used for blocked int8 3d convolution + bs_fs_zyx_bsv16_fsv16, ///< format used for 3D blocked convolution (batch and features blocked by 16) + bs_fs_yx_bsv16_fsv16, ///< format used for 2D blocked convolution (batch and features blocked by 16) + bs_fs_yx_bsv4_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 4) + bs_fs_yx_bsv8_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4) + bs_fs_yx_bsv4_fsv2, ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2) + bs_fs_zyx_bsv4_fsv4, ///< format used for 3D blocked convolution (batch and features blocked by 4) + bs_fs_zyx_bsv4_fsv2, ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2) + bs_fs_yx_bsv32_fsv32, ///< format used for big batches (batch and features blocked by 32) + bs_fs_yx_bsv32_fsv16, ///< format used for big batches (batch blocked by 32, features blocked by 16) + bs_fs_zyx_bsv32_fsv32, ///< format used for big batches (batch and features blocked by 32) + bs_fs_zyx_bsv32_fsv16, ///< format used for big batches (batch blocked by 32, features blocked by 16) + fs_b_yx_fsv32, ///< format for input for fp16 primitives + b_fs_yx_fsv4, ///< format for input for IMAD convolutions + bs_xs_xsv8_bsv8, ///< format used only for fully connected + bs_xs_xsv8_bsv16, ///< format used only for fully connected + bs_x_bsv16, ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice + ///< (responses slice), bsv16 - 16 values of single batch slice, x - flattened plane of (fyx) + b_fs_yx_32fp, ///< format for data for binary convolutions + winograd_2x3_s1_data, ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1 + nv12, ///< format for media nv12 input + image_2d_rgba, ///< format for image2d RGBA, always allocates memory for 4 feature maps (even when only 3 are used) + + // Weights formats + oiyx, ///< the most common format for 2D weights + ioyx, ///< 2D weights format for deconvolutions + yxio, ///< format used 2D weights + oizyx, ///< the most common format for 3D convolution + iozyx, ///< 3D weights format for deconvolutions + iyxo, + oyxi, + os_iyx_osv16, ///< format used only for convolution weights + o_is_yx_isv16, ///< format used only for convolution weights + os_yxi_osv16, ///< format used only for convolution weights + os_is_yx_osv16_isv16, ///< format used for convolution i8 weights + os_is_zyx_osv32_isv16, + os_is_zyx_osv64_isv16, + os_zyxi_osv16, ///< format used for weights for 3D convolution + os_is_yx_isv16_osv16, ///< format used for blocked convolution + os_is_zyx_isv16_osv16, ///< format used for weights for blocked 3D convolution + is_os_zyx_isv16_osv16, ///< format used for weights for blocked 3D deconvolution + is_os_yx_isv16_osv16, ///< format used for weights for blocked deconvolution + os_is_yx_isv8_osv16_isv2, ///< format used for weights for blocked 2D convolution + os_is_zyx_isv8_osv16_isv2, ///< format used for weights for blocked 3D convolution + ///< os - output feature maps slice, i - input feature maps, + ///< yx - spatials, sv16 - 16 values of single slice. + os_iyx_osv32, ///< format used only for convolution weights: + ///< os - output feature maps slice, i - input feature maps, + ///< yx - spatials, sv32 - 32 values of single slice. + os_iyx_osv64, ///< format used only for convolution weights: + ///< os - output feature maps slice, i - input feature maps, + ///< yx - spatials, sv64 - 64 values of single slice. + image_2d_weights_c4_fyx_b, ///< image format for weights, width size is f*y*x/4 + ///< (4-channels filled with fyx data), height is b + image_2d_weights_c1_b_fyx, ///< image format for weights, width size is b, + ///< height is f*y*x, single channel + winograd_2x3_s1_weights, ///< format used for weights for winograd non-fused + ///< convolution, F(2,3) -- filter 3x3 with stride 1 + winograd_2x3_s1_fused_weights, ///< format used for weights for winograd fused + ///< convolution, F(2,3) -- filter 3x3 with stride 1 + winograd_6x3_s1_fused_weights, ///< format used for weights for winograd fused + ///< convolution, F(6,3) -- filter 3x3 with stride 1 + image_2d_weights_winograd_6x3_s1_fbxyb, ///< image format used for weights for winograd fused + ///< convolution, F(6,3) -- filter 3x3 with stride 1 + image_2d_weights_winograd_6x3_s1_xfbyb, ///< image format used for weights for winograd fused + ///< convolution, F(6,3) -- filter 3x3 with stride 1 + os_is_yx_isa8_osv8_isv4, ///< format for weights for MMAD convolution + os_is_zyx_isa8_osv8_isv4, ///< format for weights for MMAD convolution + os_is_yx_isa8_osv16_isv4, ///< format for weights for fully connected MMAD + os_is_zyx_isa8_osv16_isv4, ///< format for weights for fully connected MMAD + os_is_yx_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD convolution + os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD fsv32 convolution + os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD fsv32 convolution + os_is_yx_osa4_isa8_osv8_isv2, ///< format for weights for MMAD fsv32 convolution + os_is_zyx_osa4_isa8_osv8_isv2, ///< format for weights for MMAD fsv32 convolution + os_is_zyx_osa4_isa8_osv8_isv4, ///< format for weights for MMAD fsv32 convolution + os_is_yx_osa4_isa8_osv8_isv4, ///< format for weights for MMAD fsv32 convolution + os_is_yx_osa2_isa8_osv8_isv2, + os_is_yx_osa2_isa8_osv16_isv2, + os_is_yx_osa2_isa8_osv16_isv4, + is_os_yx_isa2_osa8_isv8_osv2, + is_os_yx_isa4_osa8_isv8_osv4, + is_o_yx_isv32, ///< format for weights for 1x1 MMAD convolutions + is_o32_yx_isv32_swizzled_by_4, ///< format for weights for 1x1 MMAD convolutions + os_is_y_x8_osv8_isv4, ///< format for weights for 1x1 MMAD convolutions + os_is_y_x8_osv8_isv4_swizzled_by_4, ///< format for weights for 1x1 MMAD convolutions + os_is_yx_osv16_isv4, ///< format for weights for IMAD convolutions + os_is_yx_osv8_isv4, ///< format used for convolution i8 weights + os_is_yx_osv8_isv2, ///< format used for convolution i8 weights + os_is_zyx_osv16_isv16, ///< format for weights for IMAD convolutions + os_is_yx_osv32_isv4_swizzled_by_2, ///< format for weights for IMAD convolutions + os_is_yx_osv32_isv4, ///< format for weights for IMAD convolutions + os_is_zyx_osv32_isv4, ///< format for weights for IMAD convolutions + os_is_yx_osv32_isv32p, ///< format for weights for binary convolutions + lstm_weights_dio, ///< dynamic_lstm, direction, + ///< than IO (I - input size, O - 4 * hidden_size) + os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution + os_iyx_osv32__ai32, + iy_xs_os_xsv2_osv8__ao32, + iy_xs_os_xsv2_osv16__ao32, + i_yxs_os_yxsv2_osv16, + os_i_yxs_osv4_yxsv4, + os_i_osv16__ai8, ///< format used only for fully connected weights + os_i_osv8__ai8, ///< format used only for fully connected weights + + goiyx, ///< format used for weights for 2D convolution + gioyx, ///< format used for weights for 2D deconvolution + gyxio, ///< format used for weights for 2D convolution + goizyx, ///< format used for weights for 3D convolution + giozyx, ///< format used for weights for 3D deconvolution + g_os_iyx_osv16, ///< format used for weights for 2D convolution + g_os_iyx_osv32, ///< format used for weights for 2D convolution + gs_oiyx_gsv16, ///< format used for weights for 2D convolution + gs_oizyx_gsv16, ///< format used for weights for 3D convolution + gs_oiyx_gsv32, ///< format used for weights for 2D convolution + g_is_os_zyx_isv16_osv16, ///< format used for grouped weights for blocked 3D deconvolution + g_os_is_yx_osv16_isv4, + g_os_is_zyx_osv16_isv16, + g_is_os_yx_isv16_osv16, + g_os_is_zyx_isv8_osv16_isv2, + g_os_is_yx_isv8_osv16_isv2, + g_os_is_zyx_isv16_osv16, + g_os_zyx_is_osv16_isv4, ///< format for imad deconvolution + g_os_zyx_is_osv16_isv16, ///< format for imad deconvolution + g_os_zyx_is_osv16_isv32, ///< format for imad deconvolution + g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution + g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution + g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution + g_os_is_yx_isv16_osv16, + gs_oi_yxs_gsv4_yxsv4, + gs_oi_yxs_gsv16_yxsv4, + gs_oi_yxs_gsv32_yxsv4, + gi_yxs_os_yxsv2_osv16, + giy_xs_os_xsv2_osv8__ao32, + giy_xs_os_xsv2_osv16__ao32, + g_os_is_yx_osa4_isa8_osv8_isv4, + g_os_is_yx_osa4_isa8_osv8_isv2, + g_os_is_yx_osa2_isa8_osv16_isv2, + g_os_is_yx_osa2_isa8_osv16_isv4, + g_os_is_zyx_osa4_isa8_osv8_isv2, + g_os_is_zyx_osa4_isa8_osv8_isv4, + + format_num, ///< number of format types + any = -1 + }; + + /// @brief Get format traits for particular @p format::type + static const format_traits& traits(type fmt); + /// @brief Returns number of batch dimensions for a @p format. + static size_t batch_num(type fmt) { return traits(fmt).batch_num; } + /// @brief Returns number of feature dimensions for a @p format. + static size_t feature_num(type fmt) { return traits(fmt).feature_num; } + /// @brief Returns number of spatial dimensions for a @p format. + static size_t spatial_num(type fmt) { return traits(fmt).spatial_num; } + /// @brief Returns number of group dimensions for a @p format. + static size_t group_num(type fmt) { return traits(fmt).group_num; } + /// @brief Returns an order of dimensions for a @ format. + static const std::string& order(type fmt) { return traits(fmt).order; } + /// @brief Returns an internal orders of dimensions for a @p format. + static const std::string& internal_order(type fmt) { return traits(fmt).internal_order; } + /// @brief Returns block sizes for @p format. + static const std::vector>& block_sizes(type fmt) { return traits(fmt).block_sizes; } + /// @brief Returns number of dimensions contained within a @p format + static size_t dimension(type fmt) { return order(fmt).size(); } + /// @brief Checks if @p format is a winograd format + static bool is_winograd(type fmt) { + return (fmt == winograd_2x3_s1_data || + fmt == winograd_2x3_s1_weights || + fmt == winograd_2x3_s1_fused_weights || + fmt == winograd_6x3_s1_fused_weights || + fmt == image_2d_weights_winograd_6x3_s1_fbxyb || + fmt == image_2d_weights_winograd_6x3_s1_xfbyb); } + /// @brief Checks if @p format is of image2d type + static bool is_image_2d(type fmt) { + return (fmt == image_2d_weights_c4_fyx_b || + fmt == image_2d_weights_c1_b_fyx || + fmt == image_2d_weights_winograd_6x3_s1_fbxyb || + fmt == image_2d_weights_winograd_6x3_s1_xfbyb || + fmt == nv12 || + fmt == image_2d_rgba); + } + /// @brief Checks if @p format is weights format + static bool is_weights_format(type fmt) { + const auto internal_order = traits(fmt).internal_order; + const auto weights_chars = { "o", "i" }; + for (const auto& c : weights_chars) { + if (internal_order.find_first_of(c) != std::string::npos) { + return true; + } + } + return false; + } + /// @brief Checks if @p format is simple data format + static bool is_simple_data_format(type fmt) { + return (fmt == yxfb || fmt == byxf || + fmt == bfyx || fmt == fyxb || + fmt == bfzyx || fmt == bfwzyx); + } + /// @brief Checks if @p format is of grouped type + static bool is_grouped(type fmt) { return group_num(fmt) != 0; } + /// @brief Checks if @p format is of image type + static bool is_image(type fmt) { return (is_image_2d(fmt)); } + /// @brief Checks if @p format is blocked format + static bool is_blocked(type fmt) { return !(block_sizes(fmt).empty()); } + /// @brief Checks if @p format is nv12 format + static bool is_nv12(type fmt) { return (fmt == nv12); } + + /// @brief Returns number of batch dimensions. + size_t batch_num() const { return traits(value).batch_num; } + /// @brief Returns number of feature dimensions. + size_t feature_num() const { return traits(value).feature_num; } + /// @brief Returns number of spatial dimensions. + size_t spatial_num() const { return traits(value).spatial_num; } + /// @brief Returns number of group dimensions. + size_t group_num() const { return traits(value).group_num; } + /// @brief Returns an order of dimensions in form of string. + const std::string& order() const { return traits(value).order; } + /// @brief Returns an internal orders of dimensions form of string. + const std::string& internal_order() const { return traits(value).internal_order; } + /// @brief Returns block sizes as vector of pairs of dimension and block size for that dimension. + const std::vector>& block_sizes() const { return traits(value).block_sizes; } + /// @brief Returns number of dimensions contained within this format + size_t dimension() const { return order(value).size(); } + /// @brief Checks if @p format is a winograd format + bool is_winograd() const { return is_winograd(value); } + /// @brief Checks if @p format is of image 2d type + bool is_image_2d() const { return is_image_2d(value); } + /// @brief Checks if @p format is of image type + bool is_image() const { return is_image(value); } + /// @brief Checks if @p format is blocked format + bool is_blocked() { return is_blocked(value); } + /// @brief Checks if @p format is a nv12 format + bool is_nv12() const { return is_nv12(value); } + + /// @brief Transforms dimension from internal order to external order + size_t internal_to_external(size_t idx) const { + auto index = order().find_first_of(internal_order()[idx]); + if (index == std::string::npos) + throw std::invalid_argument("Internal dimension index does not map to external index."); + return index; + } + + type value; + /// @brief Implicit conversion from format::type. + constexpr format(type t) : value(t) {} + /// @brief Implicit conversion to format::type. + constexpr operator type() const { return value; } + + std::string to_string() const; +}; + +/// @} +/// @} +} // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 8bc0263a06e..effb054db6c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -497,13 +497,35 @@ struct layout { /// Number of bytes needed to store this layout size_t bytes_count() const { return data_type_traits::size_of(data_type) * get_linear_size(); } - bool has_fused_format(data_types const& dt, cldnn::format const& fmt) const { - return (data_type == dt && format == fmt); - } + size_t get_rank() const; - auto fused_format() const -> decltype(fuse(data_type, format)) { - return fuse(data_type, format); - } + size_t get_spatial_rank() const; + + tensor::value_type get_dim(size_t idx) const; + + tensor::value_type batch() const; + + tensor::value_type feature() const; + + tensor::value_type spatial(size_t spatial_idx) const; + + tensor::value_type group() const; + + tensor::value_type ofm() const; + + tensor::value_type ifm() const; + + std::vector get_dims() const; + + std::vector get_padded_dims() const; + + std::vector get_ordered_dims() const; + + std::vector get_dims_order() const; + + layout convert_to_weights_layout(bool is_grouped) const; + + std::string to_string() const; }; /// @} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp index d7fb7d09a78..168640b2bcf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp @@ -4,6 +4,7 @@ #pragma once +#include "format.hpp" #include "compounds.hpp" #include "utils.hpp" @@ -26,457 +27,6 @@ namespace cldnn { /// @addtogroup cpp_memory Memory description and management /// @{ -/// @brief Format information helper class. -struct format_traits { - /// @brief Number of batch dimensions in a format. - size_t batch_num; - /// @brief Number of feature map/channel dimensions in a format. - size_t feature_num; - /// @brief Number of spatial (x,y) dimensions in a format. - size_t spatial_num; - /// @brief Number of groups in a format. - size_t group_num; - /// @brief Dimensions changing order from rare to often. - std::string order; - /// @brief Dimensions order for internal storage. - std::string internal_order; - /// @brief Block sizes as a vector of pairs of dimension number and block size ordered from rare to often. - std::vector> block_sizes; - /// @brief Characters representing batch dimensions in an order. - static const char* batch_chars() { return "bno"; } - /// @brief Characters representing feature map/channel dimensions in an order. - static const char* feature_chars() { return "fic"; } - /// @brief Characters representing spatial dimensions in an order. - static const char* spatial_chars() { return "xyzhsw"; } - /// @brief Characters representing group dimensions in an order. - static const char* group_chars() { return "g"; } - /// @brief Checks if @p c represents batch dimension. - static bool is_batch_char(char c) { return std::string(batch_chars()).find_first_of(c) != std::string::npos; } - /// @brief Checks if @p c represents feature map/channel dimension. - static bool is_feature_char(char c) { return std::string(feature_chars()).find_first_of(c) != std::string::npos; } - /// @brief Checks if @p c represents spatial dimension. - static bool is_spatial_char(char c) { return std::string(spatial_chars()).find_first_of(c) != std::string::npos; } - /// @brief Checks if @p c represents group dimensions. - static bool is_group_char(char c) { return std::string(group_chars()).find_first_of(c) != std::string::npos; } -}; - -/// @brief Represents memory formats (orders). -/// @n In CNN most of data is described as 4 dimensional blocks. In Intel(R) clDNN library we describe memory with 4 letters -/// - b - number of blocks in batch. For weights formats: output features - conv, neurons - inner product -/// - f - number of feature maps, features or channels. For weights formats: input features - conv, inputs, inner product -/// - x - spatial, width -/// - y - spatial, height -/// /n -/// For explanation how each format type is implemented in memory we will use naming shown bellow (b=2,f=3,y=3,x=3): -struct format { - enum type : int32_t { - // Data formats - bfyx, ///< the most common format for activations in clDNN. - bfzyx, ///< format for 5d data tensors - bfwzyx, ///< batch, feature, 4D spatial - yxfb, ///< batch first, feature and than spatials - byxf, ///< used in bitmaps, input from user i.e b images of RGB format - fyxb, ///< format not used inside clDNN, but supported in reorder as extension - ///< for user provided formats. - b_fs_yx_fsv16, ///< format used for blocked convolution - b_fs_yx_fsv32, ///< format used for blocked int8 convolution - b_fs_zyx_fsv16, ///< format used for 3D blocked convolution (features blocked by 16) - b_fs_zyx_fsv32, ///< format used for blocked int8 3d convolution - bs_fs_zyx_bsv16_fsv16, ///< format used for 3D blocked convolution (batch and features blocked by 16) - bs_fs_yx_bsv16_fsv16, ///< format used for 2D blocked convolution (batch and features blocked by 16) - bs_fs_yx_bsv4_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 4) - bs_fs_yx_bsv8_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4) - bs_fs_yx_bsv4_fsv2, ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2) - bs_fs_zyx_bsv4_fsv4, ///< format used for 3D blocked convolution (batch and features blocked by 4) - bs_fs_zyx_bsv4_fsv2, ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2) - bs_fs_yx_bsv32_fsv32, ///< format used for big batches (batch and features blocked by 32) - bs_fs_yx_bsv32_fsv16, ///< format used for big batches (batch blocked by 32, features blocked by 16) - bs_fs_zyx_bsv32_fsv32, ///< format used for big batches (batch and features blocked by 32) - bs_fs_zyx_bsv32_fsv16, ///< format used for big batches (batch blocked by 32, features blocked by 16) - fs_b_yx_fsv32, ///< format for input for fp16 primitives - b_fs_yx_fsv4, ///< format for input for IMAD convolutions - bs_xs_xsv8_bsv8, ///< format used only for fully connected weights: bs - batch slice, - ///< xs - x slice, bsv8 - 8 values of single slice. - bs_xs_xsv8_bsv16, ///< format used only for fully connected weights: bs - batch slice, - ///< xs - x slice, bsv16 - 16 values of single slice. - bs_x_bsv16, ///< format used only for fully connected weights fp16 batch=1 : bs - batch slice - ///< (responses slice), bsv16 - 16 values of single batch slice, x - flattened plane of (fyx) - b_fs_yx_32fp, ///< format for data for binary convolutions - winograd_2x3_s1_data, ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1 - nv12, ///< format for media nv12 input - image_2d_rgba, ///< format for image2d RGBA, always allocates memory for 4 feature maps (even when only 3 are used) - - // Weights formats - oiyx, ///< the most common format for 2D weights - ioyx, ///< 2D weights format for deconvolutions - yxio, ///< format used 2D weights - oizyx, ///< the most common format for 3D convolution - iozyx, ///< 3D weights format for deconvolutions - iyxo, - os_iyx_osv16, ///< format used only for convolution weights - o_is_yx_isv16, ///< format used only for convolution weights - os_yxi_osv16, ///< format used only for convolution weights - os_is_yx_osv16_isv16, ///< format used for convolution i8 weights - os_is_zyx_osv32_isv16, - os_is_zyx_osv64_isv16, - os_zyxi_osv16, ///< format used for weights for 3D convolution - os_is_yx_isv16_osv16, ///< format used for blocked convolution - os_is_zyx_isv16_osv16, ///< format used for weights for blocked 3D convolution - is_os_zyx_isv16_osv16, ///< format used for weights for blocked 3D deconvolution - is_os_yx_isv16_osv16, ///< format used for weights for blocked deconvolution - os_is_yx_isv8_osv16_isv2, ///< format used for weights for blocked 2D convolution - os_is_zyx_isv8_osv16_isv2, ///< format used for weights for blocked 3D convolution - ///< os - output feature maps slice, i - input feature maps, - ///< yx - spatials, sv16 - 16 values of single slice. - os_iyx_osv32, ///< format used only for convolution weights: - ///< os - output feature maps slice, i - input feature maps, - ///< yx - spatials, sv32 - 32 values of single slice. - os_iyx_osv64, ///< format used only for convolution weights: - ///< os - output feature maps slice, i - input feature maps, - ///< yx - spatials, sv64 - 64 values of single slice. - image_2d_weights_c4_fyx_b, ///< image format for weights, width size is f*y*x/4 - ///< (4-channels filled with fyx data), height is b - image_2d_weights_c1_b_fyx, ///< image format for weights, width size is b, - ///< height is f*y*x, single channel - winograd_2x3_s1_weights, ///< format used for weights for winograd non-fused - ///< convolution, F(2,3) -- filter 3x3 with stride 1 - winograd_2x3_s1_fused_weights, ///< format used for weights for winograd fused - ///< convolution, F(2,3) -- filter 3x3 with stride 1 - winograd_6x3_s1_fused_weights, ///< format used for weights for winograd fused - ///< convolution, F(6,3) -- filter 3x3 with stride 1 - image_2d_weights_winograd_6x3_s1_fbxyb, ///< image format used for weights for winograd fused - ///< convolution, F(6,3) -- filter 3x3 with stride 1 - image_2d_weights_winograd_6x3_s1_xfbyb, ///< image format used for weights for winograd fused - ///< convolution, F(6,3) -- filter 3x3 with stride 1 - os_is_yx_isa8_osv8_isv4, ///< format for weights for MMAD convolution - os_is_zyx_isa8_osv8_isv4, ///< format for weights for MMAD convolution - os_is_yx_isa8_osv16_isv4, ///< format for weights for fully connected MMAD - os_is_zyx_isa8_osv16_isv4, ///< format for weights for fully connected MMAD - os_is_yx_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD convolution - os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD fsv32 convolution - os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, ///< format for weights for MMAD fsv32 convolution - os_is_yx_osa4_isa8_osv8_isv2, ///< format for weights for MMAD fsv32 convolution - os_is_zyx_osa4_isa8_osv8_isv2, ///< format for weights for MMAD fsv32 convolution - os_is_zyx_osa4_isa8_osv8_isv4, ///< format for weights for MMAD fsv32 convolution - os_is_yx_osa4_isa8_osv8_isv4, ///< format for weights for MMAD fsv32 convolution - os_is_yx_osa2_isa8_osv8_isv2, - os_is_yx_osa2_isa8_osv16_isv2, - os_is_yx_osa2_isa8_osv16_isv4, - is_os_yx_isa2_osa8_isv8_osv2, - is_os_yx_isa4_osa8_isv8_osv4, ///< format for weights for MMAD fsv32 convolution - is_o_yx_isv32, ///< format for weights for 1x1 MMAD convolutions - is_o32_yx_isv32_swizzled_by_4, ///< format for weights for 1x1 MMAD convolutions - os_is_y_x8_osv8_isv4, ///< format for weights for 1x1 MMAD convolutions - os_is_y_x8_osv8_isv4_swizzled_by_4, ///< format for weights for 1x1 MMAD convolutions - os_is_yx_osv16_isv4, ///< format for weights for IMAD convolutions - os_is_yx_osv8_isv4, ///< format used for convolution i8 weights - os_is_yx_osv8_isv2, ///< format used for convolution i8 weights - os_is_zyx_osv16_isv16, ///< format for weights for IMAD convolutions - os_is_yx_osv32_isv4_swizzled_by_2, ///< format for weights for IMAD convolutions - os_is_yx_osv32_isv4, ///< format for weights for IMAD convolutions - os_is_zyx_osv32_isv4, ///< format for weights for IMAD convolutions - os_is_yx_osv32_isv32p, ///< format for weights for binary convolutions - lstm_weights_dio, ///< dynamic_lstm, direction, - ///< than IO (I - input size, O - 4 * hidden_size) - os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution - os_iyx_osv32__ai32, - iy_xs_os_xsv2_osv8__ao32, - iy_xs_os_xsv2_osv16__ao32, - i_yxs_os_yxsv2_osv16, - os_i_yxs_osv4_yxsv4, - - goiyx, ///< format used for weights for 2D convolution - gioyx, ///< format used for weights for 2D deconvolution - yxiog, ///< format used for weights for 2D convolution - gyxio, ///< format used for weights for 2D convolution - goizyx, ///< format used for weights for 3D convolution - giozyx, ///< format used for weights for 3D deconvolution - g_os_iyx_osv16, ///< format used for weights for 2D convolution - g_os_iyx_osv32, ///< format used for weights for 2D convolution - gs_oiyx_gsv16, ///< format used for weights for 2D convolution - gs_oizyx_gsv16, ///< format used for weights for 3D convolution - gs_oiyx_gsv32, ///< format used for weights for 2D convolution - g_is_os_zyx_isv16_osv16, ///< format used for grouped weights for blocked 3D deconvolution - g_os_is_yx_osv16_isv4, - g_os_is_zyx_osv16_isv16, - g_is_os_yx_isv16_osv16, - g_os_is_zyx_isv8_osv16_isv2, - g_os_is_yx_isv8_osv16_isv2, - g_os_is_zyx_isv16_osv16, - g_os_zyx_is_osv16_isv4, ///< format for imad deconvolution - g_os_zyx_is_osv16_isv16, ///< format for imad deconvolution - g_os_zyx_is_osv16_isv32, ///< format for imad deconvolution - g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution - g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution - g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution - g_os_is_yx_isv16_osv16, - gs_oi_yxs_gsv4_yxsv4, - gs_oi_yxs_gsv16_yxsv4, - gs_oi_yxs_gsv32_yxsv4, - gi_yxs_os_yxsv2_osv16, - giy_xs_os_xsv2_osv8__ao32, - giy_xs_os_xsv2_osv16__ao32, - g_os_is_yx_osa4_isa8_osv8_isv4, - g_os_is_yx_osa4_isa8_osv8_isv2, - g_os_is_yx_osa2_isa8_osv16_isv2, - g_os_is_yx_osa2_isa8_osv16_isv4, - g_os_is_zyx_osa4_isa8_osv8_isv2, - g_os_is_zyx_osa4_isa8_osv8_isv4, - - format_num, ///< number of format types - any = -1 - }; - - /// @brief Get format traits for particular @p format::type - static const format_traits& traits(type fmt) { - static const std::map traits { - // B - number of Batch dimensions - // F - number of Feature dimensions - // S - number of Spatial dimensions - // G - number of Group dimensions - // Order - dims changing order from rare to often - // Inner order - dims order for internal storage in _sizes array - // Block sizes - vector of pairs of dimension number (by inner order) and block size ordered from rare to often - // Format B F S G Order Inner order Block sizes - { yxfb, { 1, 1, 2, 0, "yxfb", "bfxy?", {}}}, - { byxf, { 1, 1, 2, 0, "byxf", "bfxy?", {}}}, - { bfyx, { 1, 1, 2, 0, "bfyx", "bfxy?", {}}}, - { fyxb, { 1, 1, 2, 0, "fyxb", "bfxy?", {}}}, - { b_fs_yx_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy", {{1, 16}}}}, - { b_fs_yx_fsv32, { 1, 1, 2, 0, "bfyx", "bfxy", {{1, 32}}}}, - { b_fs_zyx_fsv32, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{1, 32}}}}, - { bs_xs_xsv8_bsv8, { 1, 1, 1, 0, "bx", "b?x??", {{2, 8}, {0, 8}}}}, - { bs_xs_xsv8_bsv16, { 1, 1, 1, 0, "bx", "b?x??", {{2, 8}, {0, 16}}}}, - { bs_x_bsv16, { 1, 1, 1, 0, "bx", "b?x??", {{0, 16}}}}, - { winograd_2x3_s1_data, { 1, 1, 2, 0, "bxyf", "bfxy?", {}}}, - { b_fs_yx_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{1, 4}}}}, - { bfzyx, { 1, 1, 3, 0, "bfzyx", "bfxyz", {}}}, - { bfwzyx, { 1, 1, 4, 0, "bfwzyx", "bfxyzw", {}}}, - { fs_b_yx_fsv32, { 1, 1, 2, 0, "fbyx", "bfxy?", {{1, 32}}}}, - { b_fs_yx_32fp, { 1, 1, 2, 0, "bfyx", "bfxy?", {}}}, - { b_fs_zyx_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{1, 16}}}}, - { bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}}}, - { bs_fs_yx_bsv16_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 16 }, {1, 16}}}}, - { bs_fs_yx_bsv4_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 4}}}}, - { bs_fs_yx_bsv8_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 8 }, {1, 4}}}}, - { bs_fs_yx_bsv4_fsv2, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 2}}}}, - { bs_fs_zyx_bsv4_fsv4, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 4}}}}, - { bs_fs_zyx_bsv4_fsv2, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 2}}}}, - { bs_fs_zyx_bsv32_fsv32, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 32 }, {1, 32}}}}, - { bs_fs_zyx_bsv32_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 32 }, {1, 16}}}}, - { bs_fs_yx_bsv32_fsv32, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 32 }, {1, 32}}}}, - { bs_fs_yx_bsv32_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 32 }, {1, 16}}}}, - { nv12, { 1, 1, 2, 0, "bfyx", "bfxy?", {}}}, - { image_2d_rgba, { 1, 1, 2, 0, "bfyx", "bfxy?", {}}}, - - { oiyx, { 1, 1, 2, 0, "oiyx", "oixy", {}}}, - { ioyx, { 1, 1, 2, 0, "ioyx", "oixy", {}}}, - { iyxo, { 1, 1, 2, 0, "iyxo", "oixy", {}}}, - { yxio, { 1, 1, 2, 0, "yxio", "oixy?", {}}}, - { oizyx, { 1, 1, 3, 0, "oizyx", "oixyz", {}}}, - { iozyx, { 1, 1, 3, 0, "iozyx", "oixyz", {}}}, - { os_is_yx_isv16_osv16, { 1, 1, 2, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}}, - { o_is_yx_isv16, { 1, 1, 2, 0, "oiyx", "oixy?", {{1, 16}}}}, - { os_yxi_osv16, { 1, 1, 2, 0, "oyxi", "oixy?", {{0, 16}}}}, - { os_iyx_osv16, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 16}}}}, - { os_iyx_osv32, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 32}}}}, - { os_iyx_osv64, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 64}}}}, - { winograd_2x3_s1_weights, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, "xyio", "oixy?", {}}}, - { winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, "xyio", "oixy?", {}}}, - { image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, "xyio", "oixy?", {}}}, - { image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, "xyio", "oixy?", {}}}, - { image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { lstm_weights_dio, { 1, 1, 2, 0, "oixy", "oixy?", {}}}, - { os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { os_is_yx_isa8_osv16_isv4, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, "oiyx", "oixy?", {}}}, - { os_is_yx_osa4_isa8_osv8_isv2, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 32}, {1, 16}}}}, - { os_is_yx_osa4_isa8_osv8_isv4, { 1, 1, 2, 0, "oiyx", "oixy", {{0, 32}, {1, 32}}}}, - { os_is_zyx_osa4_isa8_osv8_isv2, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 32}, {1, 16}}}}, - { os_is_zyx_osa4_isa8_osv8_isv4, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 32}, {1, 32}}}}, - { os_is_yx_osa2_isa8_osv8_isv2, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 16}, {1, 16}}}}, - { os_is_yx_osa2_isa8_osv16_isv2, { 1, 1, 2, 0, "oiyx", "oixy", {{0, 32}, {1, 16}}}}, - { os_is_yx_osa2_isa8_osv16_isv4, { 1, 1, 2, 0, "oiyx", "oixy", {{0, 32}, {1, 32}}}}, - { os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}}}, - { os_is_zyx_isa8_osv16_isv4, { 1, 1, 3, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}}}, - { os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, "oiyx", "oixy?", {{0, 32}, {1, 32}}}}, - { os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 32}, {1, 32}}}}, - { is_os_yx_isa2_osa8_isv8_osv2, { 1, 1, 2, 0, "ioyx", "ioxy?", {{1, 16}, {0, 16}}}}, - { is_os_yx_isa4_osa8_isv8_osv4, { 1, 1, 2, 0, "ioyx", "ioxy?", {{1, 32}, {0, 32}}}}, - { is_o_yx_isv32, { 1, 1, 2, 0, "oyxi", "oixy?", {{1, 32}}}}, - { is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, "oyxi", "oixy?", {}}}, - { os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, "oyxi", "oixy?", {}}}, - { os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, "oyxi", "oixy?", {}}}, - { os_is_yx_osv16_isv4, { 1, 1, 2, 0, "oixy", "oixy?", {{0, 16}, {1, 4}}}}, - { os_is_yx_osv8_isv4, { 1, 1, 2, 0, "oiyx", "oixy", {{1, 4}, {0, 8}}}}, - { os_is_yx_osv8_isv2, { 1, 1, 2, 0, "oiyx", "oixy", {{1, 2}, {0, 8}}}}, - { os_is_zyx_osv16_isv16, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}}, - { os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}}, - { os_is_yx_osv32_isv4, { 1, 1, 2, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}}, - { os_is_zyx_osv32_isv4, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 32}, {1, 4}}}}, - { os_is_yx_osv32_isv32p, { 1, 1, 1, 0, "oixy", "oixy?", {}}}, - { os_is_zyx_isv16_osv16, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}}, - { is_os_zyx_isv16_osv16, { 1, 1, 3, 0, "iozyx", "oixyz", {{1, 16}, {0, 16}}}}, - { is_os_yx_isv16_osv16, { 1, 1, 2, 0, "ioyx", "oixyz", {{1, 16}, {0, 16}}}}, - { os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 32}}}}, - { os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}}, - { os_zyxi_osv16, { 1, 1, 3, 0, "ozyxi", "oixyz", {{0, 16}}}}, - { os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}}, - { os_is_yx_osv16_isv16, { 1, 1, 2, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}}, - { os_is_zyx_osv32_isv16, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 32}, {1, 16}}}}, - { os_is_zyx_osv64_isv16, { 1, 1, 3, 0, "oizyx", "oixyz", {{0, 64}, {1, 16}}}}, - { os_iyx_osv32__ai32, { 1, 1, 2, 0, "oiyx", "oixy", {{0, 32}}}}, - { i_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, "iyxo", "oixy", {{0, 16}}}}, - { iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, "iyxo", "oixy", {{2, 2}, {0, 8}}}}, - { iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, "iyxo", "oixy", {{2, 2}, {0, 16}}}}, - { os_i_yxs_osv4_yxsv4, { 1, 1, 2, 0, "oiyx", "oixy", {{0, 4}}}}, - - { goiyx, { 1, 1, 2, 1, "goiyx", "oixy??g", {}}}, - { gioyx, { 1, 1, 2, 1, "gioyx", "oixy??g", {}}}, - { goizyx, { 1, 1, 3, 1, "goizyx", "oixyz?g", {}}}, - { giozyx, { 1, 1, 3, 1, "giozyx", "oixyz?g", {}}}, - { g_os_iyx_osv16, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 16}}}}, - { g_os_iyx_osv32, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 32}}}}, - { gs_oiyx_gsv16, { 1, 1, 2, 1, "goiyx", "oixy??g", {{6, 16}}}}, - { gs_oizyx_gsv16, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{6, 16}}}}, - { gs_oiyx_gsv32, { 1, 1, 2, 1, "goiyx", "oixy??g", {{6, 32}}}}, - { gyxio, { 1, 1, 2, 1, "gyxio", "oixy??g", {}}}, - { g_is_os_zyx_isv16_osv16, { 1, 1, 3, 1, "giozyx", "oixyz?g", {{1, 16}, {0, 16}}}}, - { g_is_os_yx_isv16_osv16, { 1, 1, 2, 1, "gioyx", "oixy??g", {{1, 16}, {0, 16}}}}, - { g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{1, 8}, {0, 16}, {1, 2}}}}, - { g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 1, "goiyx", "oixy??g", {{1, 8}, {0, 16}, {1, 2}}}}, - { g_os_is_zyx_isv16_osv16, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{0, 16}, {1, 16}}}}, - { g_os_is_yx_osv16_isv4, { 1, 1, 2, 1, "goixy", "oixy??g", {{0, 16}, {1, 4}}}}, - { g_os_is_zyx_osv16_isv16, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{0, 16}, {1, 16}}}}, - { g_os_zyx_is_osv16_isv4, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 16}, {1, 4}}}}, - { g_os_zyx_is_osv16_isv16, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 16}, {1, 16}}}}, - { g_os_zyx_is_osv16_isv32, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 16}, {1, 32}}}}, - { g_os_zyx_is_osv32_isv4, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 32}, {1, 4}}}}, - { g_os_zyx_is_osv32_isv16, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 32}, {1, 16}}}}, - { g_os_zyx_is_osv32_isv32, { 1, 1, 3, 1, "gozyxi", "oixyz?g", {{0, 32}, {1, 32}}}}, - { g_os_is_yx_osa4_isa8_osv8_isv4, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 32}, {1, 32}}}}, - { g_os_is_zyx_osa4_isa8_osv8_isv4, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{0, 32}, {1, 32}}}}, - { g_os_is_yx_osa4_isa8_osv8_isv2, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 32}, {1, 16}}}}, - { g_os_is_zyx_osa4_isa8_osv8_isv2, { 1, 1, 3, 1, "goizyx", "oixyz?g", {{0, 32}, {1, 16}}}}, - { g_os_is_yx_osa2_isa8_osv16_isv4, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 32}, {1, 32}}}}, - { g_os_is_yx_osa2_isa8_osv16_isv2, { 1, 1, 2, 1, "goiyx", "oixy??g", {{0, 32}, {1, 16}}}}, - { gs_oi_yxs_gsv4_yxsv4, { 1, 1, 2, 1, "goiyx", "oixy??g", {{6, 4}}}}, - { gs_oi_yxs_gsv16_yxsv4, { 1, 1, 2, 1, "goiyx", "oixy??g", {{6, 16}}}}, - { gs_oi_yxs_gsv32_yxsv4, { 1, 1, 2, 1, "goiyx", "oixy??g", {{6, 32}}}}, - { g_os_is_yx_isv16_osv16, { 1, 1, 2, 1, "goiyx", "oixy??g", {{1, 16}, {0, 16}}}}, - { gi_yxs_os_yxsv2_osv16, { 1, 1, 2, 1, "giyxo", "oixy??g", {{0, 16}}}}, - { giy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 1, "giyxo", "oixy??g", {{2, 2}, {0, 8}}}}, - { giy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 1, "giyxo", "oixy??g", {{2, 2}, {0, 16}}}}, - }; - if (traits.find(fmt) == traits.end()) { - throw std::runtime_error("[clDNN] Format description is missing in fmt traits"); - } - return traits.at(fmt); - } - - /// @brief Returns number of batch dimensions for a @p format. - static size_t batch_num(type fmt) { return traits(fmt).batch_num; } - /// @brief Returns number of feature dimensions for a @p format. - static size_t feature_num(type fmt) { return traits(fmt).feature_num; } - /// @brief Returns number of spatial dimensions for a @p format. - static size_t spatial_num(type fmt) { return traits(fmt).spatial_num; } - /// @brief Returns number of group dimensions for a @p format. - static size_t group_num(type fmt) { return traits(fmt).group_num; } - /// @brief Returns an order of dimensions for a @ format. - static const std::string& order(type fmt) { return traits(fmt).order; } - /// @brief Returns an internal orders of dimensions for a @p format. - static const std::string& internal_order(type fmt) { return traits(fmt).internal_order; } - /// @brief Returns block sizes for @p format. - static const std::vector>& block_sizes(type fmt) { return traits(fmt).block_sizes; } - /// @brief Returns number of dimensions contained within a @p format - static size_t dimension(type fmt) { return order(fmt).size(); } - /// @brief Checks if @p format is a winograd format - static bool is_winograd(type fmt) { - return (fmt == winograd_2x3_s1_data || - fmt == winograd_2x3_s1_weights || - fmt == winograd_2x3_s1_fused_weights || - fmt == winograd_6x3_s1_fused_weights || - fmt == image_2d_weights_winograd_6x3_s1_fbxyb || - fmt == image_2d_weights_winograd_6x3_s1_xfbyb); } - /// @brief Checks if @p format is of image2d type - static bool is_image_2d(type fmt) { - return (fmt == image_2d_weights_c4_fyx_b || - fmt == image_2d_weights_c1_b_fyx || - fmt == image_2d_weights_winograd_6x3_s1_fbxyb || - fmt == image_2d_weights_winograd_6x3_s1_xfbyb || - fmt == nv12 || - fmt == image_2d_rgba); - } - /// @brief Checks if @p format is weights format - static bool is_weights_format(type fmt) { - const auto internal_order = traits(fmt).internal_order; - const auto weights_chars = { "o", "i" }; - for (const auto& c : weights_chars) { - if (internal_order.find_first_of(c) != std::string::npos) { - return true; - } - } - return false; - } - /// @brief Checks if @p format is simple data format - static bool is_simple_data_format(type fmt) { - return (fmt == yxfb || fmt == byxf || - fmt == bfyx || fmt == fyxb || - fmt == bfzyx || fmt == bfwzyx); - } - /// @brief Checks if @p format is of grouped type - static bool is_grouped(type fmt) { return group_num(fmt) != 0; } - /// @brief Checks if @p format is of image type - static bool is_image(type fmt) { return (is_image_2d(fmt)); } - /// @brief Checks if @p format is blocked format - static bool is_blocked(type fmt) { return !(block_sizes(fmt).empty()); } - /// @brief Checks if @p format is nv12 format - static bool is_nv12(type fmt) { return (fmt == nv12); } - - /// @brief Returns number of batch dimensions. - size_t batch_num() const { return traits(value).batch_num; } - /// @brief Returns number of feature dimensions. - size_t feature_num() const { return traits(value).feature_num; } - /// @brief Returns number of spatial dimensions. - size_t spatial_num() const { return traits(value).spatial_num; } - /// @brief Returns number of group dimensions. - size_t group_num() const { return traits(value).group_num; } - /// @brief Returns an order of dimensions in form of string. - const std::string& order() const { return traits(value).order; } - /// @brief Returns an internal orders of dimensions form of string. - const std::string& internal_order() const { return traits(value).internal_order; } - /// @brief Returns block sizes as vector of pairs of dimension and block size for that dimension. - const std::vector>& block_sizes() const { return traits(value).block_sizes; } - /// @brief Returns number of dimensions contained within this format - size_t dimension() const { return order(value).size(); } - /// @brief Checks if @p format is a winograd format - bool is_winograd() const { return is_winograd(value); } - /// @brief Checks if @p format is of image 2d type - bool is_image_2d() const { return is_image_2d(value); } - /// @brief Checks if @p format is of image type - bool is_image() const { return is_image(value); } - /// @brief Checks if @p format is blocked format - bool is_blocked() { return is_blocked(value); } - /// @brief Checks if @p format is a nv12 format - bool is_nv12() const { return is_nv12(value); } - - /// @brief Transforms dimension from internal order to external order - size_t internal_to_external(size_t idx) const { - auto index = order().find_first_of(internal_order()[idx]); - if (index == std::string::npos) - throw std::invalid_argument("Internal dimension index does not map to external index."); - return index; - } - - type value; - /// @brief Implicit conversion from format::type. - constexpr format(type t) : value(t) {} - /// @brief Implicit conversion to format::type. - constexpr operator type() const { return value; } -}; - constexpr int32_t tensor_batch_dim_max = 1; constexpr int32_t tensor_feature_dim_max = 1; constexpr int32_t tensor_spatial_dim_max = 4; @@ -923,6 +473,8 @@ public: // skip f and y, z for the formats that do not have it if (((new_fmt == format::bs_xs_xsv8_bsv8) || (new_fmt == format::bs_xs_xsv8_bsv16) || + (new_fmt == format::os_i_osv8__ai8) || + (new_fmt == format::os_i_osv16__ai8) || (new_fmt == format::bs_x_bsv16)) && ((c == 'f') || (c == 'y') || diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp index 1d744896dfb..38d452656f6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/utils.hpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace cldnn { @@ -139,6 +141,36 @@ inline derived_type& downcast(base_type& base) { throw std::runtime_error("downcast failed with unhadnled exception"); } +template +inline bool all_ones(const std::vector vec) { + return std::all_of(vec.begin(), vec.end(), [](const T& val) { return val == 1; }); +} + +template +inline bool all_zeroes(const std::vector vec) { + return std::all_of(vec.begin(), vec.end(), [](const T& val) { return val == 0; }); +} + +template +inline bool any_one(const std::vector vec) { + return std::any_of(vec.begin(), vec.end(), [](const T& val) { return val == 1; }); +} + +template +inline bool any_zero(const std::vector vec) { + return std::any_of(vec.begin(), vec.end(), [](const T& val) { return val == 0; }); +} + +template +inline bool any_not_one(const std::vector vec) { + return std::any_of(vec.begin(), vec.end(), [](const T& val) { return val != 1; }); +} + +template +inline bool any_not_zero(const std::vector vec) { + return std::any_of(vec.begin(), vec.end(), [](const T& val) { return val != 0; }); +} + /// @} /// @endcond /// @} diff --git a/src/plugins/intel_gpu/src/graph/include/to_string_utils.h b/src/plugins/intel_gpu/src/graph/include/to_string_utils.h index 1c1bd6099ac..e96910b66a4 100644 --- a/src/plugins/intel_gpu/src/graph/include/to_string_utils.h +++ b/src/plugins/intel_gpu/src/graph/include/to_string_utils.h @@ -60,206 +60,7 @@ inline std::string dt_to_str(data_types dt) { } inline std::string fmt_to_str(format fmt) { - switch (fmt.value) { - case format::yxfb: - return "yxfb"; - case format::byxf: - return "byxf"; - case format::bfyx: - return "bfyx"; - case format::fyxb: - return "fyxb"; - case format::b_fs_yx_fsv16: - return "b_fs_yx_fsv16"; - case format::b_fs_yx_fsv32: - return "b_fs_yx_fsv32"; - case format::b_fs_zyx_fsv32: - return "b_fs_zyx_fsv32"; - case format::bs_xs_xsv8_bsv8: - return "bs_xs_xsv8_bsv8"; - case format::bs_xs_xsv8_bsv16: - return "bs_xs_xsv8_bsv16"; - case format::bs_x_bsv16: - return "bs_x_bsv16"; - case format::winograd_2x3_s1_data: - return "winograd_2x3_s1_data"; - case format::b_fs_yx_fsv4: - return "b_fs_yx_fsv4"; - case format::b_fs_yx_32fp: - return "b_fs_yx_32fp"; - case format::bfzyx: - return "bfzyx"; - case format::bfwzyx: - return "bfwzyx"; - case format::fs_b_yx_fsv32: - return "fs_b_yx_fsv32"; - case format::bs_fs_yx_bsv16_fsv16: - return "bs_fs_yx_bsv16_fsv16"; - case format::bs_fs_yx_bsv32_fsv16: - return "bs_fs_yx_bsv32_fsv16"; - case format::bs_fs_yx_bsv4_fsv2: - return "bs_fs_yx_bsv4_fsv2"; - case format::bs_fs_yx_bsv4_fsv4: - return "bs_fs_yx_bsv4_fsv4"; - case format::bs_fs_yx_bsv8_fsv4: - return "bs_fs_yx_bsv8_fsv4"; - case format::bs_fs_yx_bsv32_fsv32: - return "bs_fs_yx_bsv32_fsv32"; - case format::b_fs_zyx_fsv16: - return "b_fs_zyx_fsv16"; - case format::bs_fs_zyx_bsv16_fsv16: - return "bs_fs_zyx_bsv16_fsv16"; - case format::image_2d_rgba: - return "image_2d_rgba"; - - case format::oiyx: - return "oiyx"; - case format::ioyx: - return "ioyx"; - case format::yxio: - return "yxio"; - case format::oizyx: - return "oizyx"; - case format::iozyx: - return "iozyx"; - case format::winograd_2x3_s1_weights: - return "winograd_2x3_s1_weights"; - case format::winograd_2x3_s1_fused_weights: - return "winograd_2x3_s1_fused_weights"; - case format::winograd_6x3_s1_fused_weights: - return "winograd_6x3_s1_fused_weights"; - case format::image_2d_weights_c4_fyx_b: - return "image_2d_weights_c4_fyx_b"; - case format::image_2d_weights_c1_b_fyx: - return "image_2d_weights_c1_b_fyx"; - case format::image_2d_weights_winograd_6x3_s1_fbxyb: - return "image_2d_weights_winograd_6x3_s1_fbxyb"; - case format::image_2d_weights_winograd_6x3_s1_xfbyb: - return "image_2d_weights_winograd_6x3_s1_xfbyb"; - case format::os_yxi_osv16: - return "os_yxi_osv16"; - case format::o_is_yx_isv16: - return "o_is_yx_isv16"; - case format::os_iyx_osv16: - return "os_iyx_osv16"; - case format::os_is_yx_osv16_isv16: - return "os_is_yx_osv16_isv16"; - case format::os_iyx_osv32: - return "os_iyx_osv32"; - case format::os_iyx_osv64: - return "os_iyx_osv64"; - case format::is_o_yx_isv32: - return "is_o_yx_isv32"; - case format::os_is_yx_isv16_osv16: - return "os_is_yx_isv16_osv16"; - case format::os_is_yx_isa8_osv8_isv4: - return "os_is_yx_isa8_osv8_isv4"; - case format::os_is_yx_isa8_osv16_isv4: - return "os_is_yx_isa8_osv16_isv4"; - case format::os_is_zyx_isa8_osv8_isv4: - return "os_is_zyx_isa8_osv8_isv4"; - case format::os_is_zyx_isa8_osv16_isv4: - return "os_is_zyx_isa8_osv16_isv4"; - case format::os_is_yx_osa4_isa8_osv8_isv2: - return "os_is_yx_osa4_isa8_osv8_isv2"; - case format::os_is_zyx_osa4_isa8_osv8_isv2: - return "os_is_zyx_osa4_isa8_osv8_isv2"; - case format::os_is_zyx_osa4_isa8_osv8_isv4: - return "os_is_zyx_osa4_isa8_osv8_isv4"; - case format::g_os_is_yx_osa4_isa8_osv8_isv2: - return "g_os_is_yx_osa4_isa8_osv8_isv2"; - case format::g_os_is_yx_osa4_isa8_osv8_isv4: - return "g_os_is_yx_osa4_isa8_osv8_isv4"; - case format::g_os_is_zyx_osa4_isa8_osv8_isv4: - return "g_os_is_zyx_osa4_isa8_osv8_isv4"; - case format::g_os_is_zyx_osa4_isa8_osv8_isv2: - return "g_os_is_zyx_osa4_isa8_osv8_isv2"; - case format::os_is_yx_osa4_isa8_osv8_isv4: - return "os_is_yx_osa4_isa8_osv8_isv4"; - case format::os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4: - return "os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4"; - case format::os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4: - return "os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4"; - case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4: - return "os_is_yx_isa8_osv8_isv4_swizzled_by_4"; - case format::is_o32_yx_isv32_swizzled_by_4: - return "is_o32_yx_isv32_swizzled_by_4"; - case format::os_is_yx_osv8_isv2: - return "os_is_yx_osv8_isv2"; - case format::os_is_yx_osv8_isv4: - return "os_is_yx_osv8_isv4"; - case format::os_is_yx_osv16_isv4: - return "os_is_yx_osv16_isv4"; - case format::os_is_yx_osv32_isv4_swizzled_by_2: - return "os_is_yx_osv32_isv4_swizzled_by_2"; - case format::os_is_yx_osv32_isv4: - return "os_is_yx_osv32_isv4"; - case format::os_is_zyx_osv32_isv4: - return "os_is_zyx_osv32_isv4"; - case format::os_is_y_x8_osv8_isv4: - return "os_is_y_x8_osv8_isv4"; - case format::os_is_yx_osv32_isv32p: - return "os_is_yx_osv32_isv32p"; - case format::os_is_zyx_isv16_osv16: - return "os_is_zyx_isv16_osv16"; - case format::is_os_zyx_isv16_osv16: - return "is_os_zyx_isv16_osv16"; - case format::is_os_yx_isv16_osv16: - return "is_os_yx_isv16_osv16"; - case format::os_is_osv32_isv32_swizzled_by_4: - return "os_is_osv32_isv32_swizzled_by_4"; - case format::os_is_zyx_isv8_osv16_isv2: - return "os_is_zyx_isv8_osv16_isv2"; - case format::os_zyxi_osv16: - return "os_zyxi_osv16"; - - case format::goiyx: - return "goiyx"; - case format::goizyx: - return "goizyx"; - case format::gioyx: - return "gioyx"; - case format::giozyx: - return "giozyx"; - case format::g_os_iyx_osv16: - return "g_os_iyx_osv16"; - case format::g_os_iyx_osv32: - return "g_os_iyx_osv32"; - case format::gs_oiyx_gsv16: - return "gs_oiyx_gsv16"; - case format::gs_oiyx_gsv32: - return "gs_oiyx_gsv32"; - case format::g_is_os_zyx_isv16_osv16: - return "g_is_os_zyx_isv16_osv16"; - case format::g_is_os_yx_isv16_osv16: - return "g_is_os_yx_isv16_osv16"; - case format::g_os_is_zyx_isv8_osv16_isv2: - return "g_os_is_zyx_isv8_osv16_isv2"; - case format::g_os_is_yx_isv8_osv16_isv2: - return "g_os_is_yx_isv8_osv16_isv2"; - case format::g_os_is_zyx_isv16_osv16: - return "g_os_is_zyx_isv16_osv16"; - case format::g_os_is_yx_osv16_isv4: - return "g_os_is_yx_osv16_isv4"; - case format::g_os_is_zyx_osv16_isv16: - return "g_os_is_zyx_osv16_isv16"; - case format::g_os_zyx_is_osv16_isv4: - return "g_os_zyx_is_osv16_isv4"; - case format::g_os_zyx_is_osv16_isv16: - return "g_os_zyx_is_osv16_isv16"; - case format::g_os_zyx_is_osv16_isv32: - return "g_os_zyx_is_osv16_isv32"; - case format::g_os_zyx_is_osv32_isv4: - return "g_os_zyx_is_osv32_isv4"; - case format::g_os_zyx_is_osv32_isv16: - return "g_os_zyx_is_osv32_isv16"; - case format::g_os_zyx_is_osv32_isv32: - return "g_os_zyx_is_osv32_isv32"; - case format::gs_oi_yxs_gsv32_yxsv4: - return "gs_oi_yxs_gsv32_yxsv4"; - default: - return "unknown (" + std::to_string(fmt.value) + ")"; - } + return fmt.to_string(); } inline std::string type_to_str(std::shared_ptr primitive) { return primitive->type_string(); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/common/tensor_type.cpp b/src/plugins/intel_gpu/src/kernel_selector/common/tensor_type.cpp index 431b964b11e..c6118615d75 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common/tensor_type.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/common/tensor_type.cpp @@ -53,6 +53,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{ { WeightsLayout::ioyx, { 0, 1, -1, 3, 2, -1 } }, { WeightsLayout::oyxi, { 1, 2, -1, 0, 3, -1 } }, { WeightsLayout::iyxo, { 1, 2, -1, 3, 0, -1 } }, + { WeightsLayout::oyxi, { 1, 2, -1, 0, 3, -1 } }, { WeightsLayout::yxio, { 2, 3, -1, 1, 0, -1 } }, { WeightsLayout::os_iyx_osv16, { 0, 1, -1, 2, 3, -1 } }, { WeightsLayout::os_iyx_osv32, { 0, 1, -1, 2, 3, -1 } }, diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp new file mode 100644 index 00000000000..931c56748ec --- /dev/null +++ b/src/plugins/intel_gpu/src/runtime/format.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/format.hpp" + +#include +#include +#include + +namespace cldnn { + +const format_traits& format::traits(type fmt) { + #define FMT_TRAITS(fmt, ...) {fmt, {#fmt, __VA_ARGS__}} + + static const std::map traits { + // B - number of Batch dimensions + // F - number of Feature dimensions + // S - number of Spatial dimensions + // G - number of Group dimensions + // Order - dims changing order from rare to often + // Inner order - dims order for internal storage in _sizes array + // Block sizes - vector of pairs of dimension number (by inner order) and block size ordered from rare to often + // Format B F S G Dims order Order Inner order Block sizes + FMT_TRAITS(yxfb, 1, 1, 2, 0, {2, 3, 1, 0}, "yxfb", "bfxy?", {}), + FMT_TRAITS(byxf, 1, 1, 2, 0, {0, 2, 3, 1}, "byxf", "bfxy?", {}), + FMT_TRAITS(bfyx, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {}), + FMT_TRAITS(fyxb, 1, 1, 2, 0, {1, 2, 3, 0}, "fyxb", "bfxy?", {}), + FMT_TRAITS(b_fs_yx_fsv16, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy", {{1, 16}}), + FMT_TRAITS(b_fs_yx_fsv32, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy", {{1, 32}}), + FMT_TRAITS(b_fs_zyx_fsv32, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{1, 32}}), + FMT_TRAITS(bs_xs_xsv8_bsv8, 1, 0, 1, 0, {0, 1}, "bx", "b?x??", {{2, 8}, {0, 8}}), + FMT_TRAITS(bs_xs_xsv8_bsv16, 1, 0, 1, 0, {0, 1}, "bx", "b?x??", {{2, 8}, {0, 16}}), + FMT_TRAITS(bs_x_bsv16, 1, 1, 1, 0, {0, 1}, "bx", "b?x??", {{0, 16}}), + FMT_TRAITS(winograd_2x3_s1_data, 1, 1, 2, 0, {0, 2, 3, 1}, "bxyf", "bfxy?", {}), + FMT_TRAITS(b_fs_yx_fsv4, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{1, 4}}), + FMT_TRAITS(bfzyx, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {}), + FMT_TRAITS(bfwzyx, 1, 1, 4, 0, {0, 1, 2, 3, 4, 5}, "bfwzyx", "bfxyzw", {}), + FMT_TRAITS(fs_b_yx_fsv32, 1, 1, 2, 0, {1, 0, 2, 3}, "fbyx", "bfxy?", {{1, 32}}), + FMT_TRAITS(b_fs_yx_32fp, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {}), + FMT_TRAITS(b_fs_zyx_fsv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{1, 16}}), + FMT_TRAITS(bs_fs_zyx_bsv16_fsv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}), + FMT_TRAITS(bs_fs_yx_bsv16_fsv16, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 16 }, {1, 16}}), + FMT_TRAITS(bs_fs_yx_bsv4_fsv4, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 4 }, {1, 4}}), + FMT_TRAITS(bs_fs_yx_bsv8_fsv4, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 8 }, {1, 4}}), + FMT_TRAITS(bs_fs_yx_bsv4_fsv2, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 4 }, {1, 2}}), + FMT_TRAITS(bs_fs_zyx_bsv4_fsv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 4 }, {1, 4}}), + FMT_TRAITS(bs_fs_zyx_bsv4_fsv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 4 }, {1, 2}}), + FMT_TRAITS(bs_fs_zyx_bsv32_fsv32, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 32 }, {1, 32}}), + FMT_TRAITS(bs_fs_zyx_bsv32_fsv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "bfzyx", "bfxyz", {{0, 32 }, {1, 16}}), + FMT_TRAITS(bs_fs_yx_bsv32_fsv32, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 32 }, {1, 32}}), + FMT_TRAITS(bs_fs_yx_bsv32_fsv16, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {{0, 32 }, {1, 16}}), + FMT_TRAITS(nv12, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {}), + FMT_TRAITS(image_2d_rgba, 1, 1, 2, 0, {0, 1, 2, 3}, "bfyx", "bfxy?", {}), + + FMT_TRAITS(oiyx, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {}), + FMT_TRAITS(ioyx, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "oixy", {}), + FMT_TRAITS(iyxo, 1, 1, 2, 0, {1, 2, 3, 0}, "iyxo", "oixy", {}), + FMT_TRAITS(oyxi, 1, 1, 2, 0, {0, 2, 3, 1}, "oyxi", "oixy", {}), + FMT_TRAITS(yxio, 1, 1, 2, 0, {2, 3, 1, 0}, "yxio", "oixy?", {}), + FMT_TRAITS(oizyx, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {}), + FMT_TRAITS(iozyx, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "oixyz", {}), + FMT_TRAITS(os_is_yx_isv16_osv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 16}, {0, 16}}), + FMT_TRAITS(o_is_yx_isv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 16}}), + FMT_TRAITS(os_yxi_osv16, 1, 1, 2, 0, {0, 2, 3, 1}, "oyxi", "oixy?", {{0, 16}}), + FMT_TRAITS(os_iyx_osv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 16}}), + FMT_TRAITS(os_iyx_osv32, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}}), + FMT_TRAITS(os_iyx_osv64, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 64}}), + FMT_TRAITS(winograd_2x3_s1_weights, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(winograd_2x3_s1_fused_weights, 1, 1, 2, 0, {3, 2, 1, 0}, "xyio", "oixy?", {}), + FMT_TRAITS(winograd_6x3_s1_fused_weights, 1, 1, 2, 0, {3, 2, 1, 0}, "xyio", "oixy?", {}), + FMT_TRAITS(image_2d_weights_winograd_6x3_s1_fbxyb, 1, 1, 2, 0, {3, 2, 1, 0}, "xyio", "oixy?", {}), + FMT_TRAITS(image_2d_weights_winograd_6x3_s1_xfbyb, 1, 1, 2, 0, {3, 2, 1, 0}, "xyio", "oixy?", {}), + FMT_TRAITS(image_2d_weights_c4_fyx_b, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(image_2d_weights_c1_b_fyx, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(lstm_weights_dio, 1, 1, 2, 0, {0, 1, 3, 2}, "oixy", "oixy?", {}), + FMT_TRAITS(os_is_yx_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(os_is_yx_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(os_is_yx_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}), + FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 16}}), + FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 32}}), + FMT_TRAITS(os_is_zyx_osa4_isa8_osv8_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 16}}), + FMT_TRAITS(os_is_zyx_osa4_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 32}}), + FMT_TRAITS(os_is_yx_osa2_isa8_osv16_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 16}}), + FMT_TRAITS(os_is_yx_osa2_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 32}}), + FMT_TRAITS(os_is_yx_osa2_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 16}, {1, 16}}), + FMT_TRAITS(os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}), + FMT_TRAITS(os_is_zyx_isa8_osv16_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}), + FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 32}}), + FMT_TRAITS(os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 32}}), + FMT_TRAITS(is_os_yx_isa2_osa8_isv8_osv2, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 16}, {0, 16}}), + FMT_TRAITS(is_os_yx_isa4_osa8_isv8_osv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 32}, {0, 32}}), + FMT_TRAITS(is_o_yx_isv32, 1, 1, 2, 0, {1, 0, 2, 3}, "oyxi", "oixy?", {{1, 32}}), + FMT_TRAITS(is_o32_yx_isv32_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oyxi", "oixy?", {}), + FMT_TRAITS(os_is_y_x8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oyxi", "oixy?", {}), + FMT_TRAITS(os_is_y_x8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oyxi", "oixy?", {}), + FMT_TRAITS(os_is_yx_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 16}, {1, 4}}), + FMT_TRAITS(os_is_yx_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 4}, {0, 8}}), + FMT_TRAITS(os_is_yx_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 2}, {0, 8}}), + FMT_TRAITS(os_is_zyx_osv16_isv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 16}, {1, 16}}), + FMT_TRAITS(os_is_yx_osv32_isv4_swizzled_by_2, 1, 1, 2, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 32}, {1, 4}}), + FMT_TRAITS(os_is_yx_osv32_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 32}, {1, 4}}), + FMT_TRAITS(os_is_zyx_osv32_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 4}}), + FMT_TRAITS(os_is_yx_osv32_isv32p, 1, 1, 1, 0, {0, 1, 2, 3}, "oixy", "oixy?", {}), + FMT_TRAITS(os_is_zyx_isv16_osv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 16}, {1, 16}}), + FMT_TRAITS(is_os_zyx_isv16_osv16, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "oixyz", {{1, 16}, {0, 16}}), + FMT_TRAITS(is_os_yx_isv16_osv16, 1, 1, 2, 0, {1, 0, 2, 3, 4}, "ioyx", "oixy", {{1, 16}, {0, 16}}), + FMT_TRAITS(os_is_osv32_isv32_swizzled_by_4, 1, 1, 0, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 32}, {1, 32}}), + FMT_TRAITS(os_is_zyx_isv8_osv16_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}), + FMT_TRAITS(os_zyxi_osv16, 1, 1, 3, 0, {0, 2, 3, 4, 1}, "ozyxi", "oixyz", {{0, 16}}), + FMT_TRAITS(os_is_yx_isv8_osv16_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 8}, {0, 16}, {1, 2}}), + FMT_TRAITS(os_is_yx_osv16_isv16, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{1, 16}, {0, 16}}), + FMT_TRAITS(os_is_zyx_osv32_isv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 16}}), + FMT_TRAITS(os_is_zyx_osv64_isv16, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 64}, {1, 16}}), + FMT_TRAITS(os_iyx_osv32__ai32, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}}), + FMT_TRAITS(i_yxs_os_yxsv2_osv16, 1, 1, 2, 0, {1, 2, 3, 0}, "iyxo", "oixy", {{0, 16}}), + FMT_TRAITS(iy_xs_os_xsv2_osv8__ao32, 1, 1, 2, 0, {1, 2, 3, 0}, "iyxo", "oixy", {{2, 2}, {0, 8}}), + FMT_TRAITS(iy_xs_os_xsv2_osv16__ao32, 1, 1, 2, 0, {1, 2, 3, 0}, "iyxo", "oixy", {{2, 2}, {0, 16}}), + FMT_TRAITS(os_i_yxs_osv4_yxsv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 4}}), + FMT_TRAITS(os_i_osv16__ai8, 1, 1, 0, 0, {0, 1}, "oi", "oi??", {{1, 8}, {0, 16}}), + FMT_TRAITS(os_i_osv8__ai8, 1, 1, 0, 0, {0, 1}, "oi", "oi??", {{1, 8}, {0, 8}}), + + FMT_TRAITS(goiyx, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {}), + FMT_TRAITS(gioyx, 1, 1, 2, 1, {0, 2, 1, 3, 4}, "gioyx", "oixy??g", {}), + FMT_TRAITS(goizyx, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {}), + FMT_TRAITS(giozyx, 1, 1, 3, 1, {0, 2, 1, 3, 4, 5}, "giozyx", "oixyz?g", {}), + FMT_TRAITS(g_os_iyx_osv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 16}}), + FMT_TRAITS(g_os_iyx_osv32, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}}), + FMT_TRAITS(gs_oiyx_gsv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 16}}), + FMT_TRAITS(gs_oizyx_gsv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{6, 16}}), + FMT_TRAITS(gs_oiyx_gsv32, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 32}}), + FMT_TRAITS(gyxio, 1, 1, 2, 1, {0, 3, 4, 2, 1}, "gyxio", "oixy??g", {}), + FMT_TRAITS(g_is_os_zyx_isv16_osv16, 1, 1, 3, 1, {0, 2, 1, 3, 4, 5}, "giozyx", "oixyz?g", {{1, 16}, {0, 16}}), + FMT_TRAITS(g_is_os_yx_isv16_osv16, 1, 1, 2, 1, {0, 2, 1, 3, 4}, "gioyx", "oixy??g", {{1, 16}, {0, 16}}), + FMT_TRAITS(g_os_is_zyx_isv8_osv16_isv2, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{1, 8}, {0, 16}, {1, 2}}), + FMT_TRAITS(g_os_is_yx_isv8_osv16_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{1, 8}, {0, 16}, {1, 2}}), + FMT_TRAITS(g_os_is_zyx_isv16_osv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 16}, {1, 16}}), + FMT_TRAITS(g_os_is_yx_osv16_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goixy", "oixy??g", {{0, 16}, {1, 4}}), + FMT_TRAITS(g_os_is_zyx_osv16_isv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 16}, {1, 16}}), + FMT_TRAITS(g_os_zyx_is_osv16_isv4, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 16}, {1, 4}}), + FMT_TRAITS(g_os_zyx_is_osv16_isv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 16}, {1, 16}}), + FMT_TRAITS(g_os_zyx_is_osv16_isv32, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 16}, {1, 32}}), + FMT_TRAITS(g_os_zyx_is_osv32_isv4, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 32}, {1, 4}}), + FMT_TRAITS(g_os_zyx_is_osv32_isv16, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 32}, {1, 16}}), + FMT_TRAITS(g_os_zyx_is_osv32_isv32, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "gozyxi", "oixyz?g", {{0, 32}, {1, 32}}), + FMT_TRAITS(g_os_is_yx_osa4_isa8_osv8_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 32}}), + FMT_TRAITS(g_os_is_zyx_osa4_isa8_osv8_isv4, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 32}, {1, 32}}), + FMT_TRAITS(g_os_is_yx_osa4_isa8_osv8_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 16}}), + FMT_TRAITS(g_os_is_zyx_osa4_isa8_osv8_isv2, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {{0, 32}, {1, 16}}), + FMT_TRAITS(g_os_is_yx_osa2_isa8_osv16_isv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 32}}), + FMT_TRAITS(g_os_is_yx_osa2_isa8_osv16_isv2, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}, {1, 16}}), + FMT_TRAITS(gs_oi_yxs_gsv4_yxsv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 4}}), + FMT_TRAITS(gs_oi_yxs_gsv16_yxsv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 16}}), + FMT_TRAITS(gs_oi_yxs_gsv32_yxsv4, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 32}}), + FMT_TRAITS(g_os_is_yx_isv16_osv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{1, 16}, {0, 16}}), + FMT_TRAITS(gi_yxs_os_yxsv2_osv16, 1, 1, 2, 1, {0, 2, 3, 4, 1}, "giyxo", "oixy??g", {{0, 16}}), + FMT_TRAITS(giy_xs_os_xsv2_osv8__ao32, 1, 1, 2, 1, {0, 2, 3, 4, 1}, "giyxo", "oixy??g", {{2, 2}, {0, 8}}), + FMT_TRAITS(giy_xs_os_xsv2_osv16__ao32, 1, 1, 2, 1, {0, 2, 3, 4, 1}, "giyxo", "oixy??g", {{2, 2}, {0, 16}}), + }; + if (traits.find(fmt) == traits.end()) { + throw std::runtime_error("[GPU] Format description is missing in fmt traits"); + } + return traits.at(fmt); +} + +std::string format::to_string() const { + if (value == any) { + return "any"; + } + return traits(value).str; +} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp new file mode 100644 index 00000000000..bf34c3acb3c --- /dev/null +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -0,0 +1,172 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/layout.hpp" + +#include +#include +#include + +namespace cldnn { + +size_t layout::get_rank() const { + return format.dimension(); +} + +size_t layout::get_spatial_rank() const { + return format.spatial_num(); +} + +tensor::value_type layout::get_dim(size_t idx) const { + auto dims = get_dims(); + return dims[idx]; +} + +tensor::value_type layout::batch() const { + auto dims = get_dims(); + const size_t dim_idx = 0; + if (!format::is_weights_format(format) && dims[dim_idx] != size.batch[0]) { + throw std::runtime_error("batch mismatch: " + std::to_string(dims[dim_idx]) + " vs " + std::to_string(size.batch[0])); + } + return dims[dim_idx]; +} + +tensor::value_type layout::feature() const { + auto dims = get_dims(); + const size_t dim_idx = 1; + if (!format::is_weights_format(format) && dims[dim_idx] != size.feature[0]) { + throw std::runtime_error("feature mismatch: " + std::to_string(dims[dim_idx]) + " vs " + std::to_string(size.feature[0])); + } + return dims[dim_idx]; +} + +tensor::value_type layout::spatial(size_t spatial_idx) const { + if (spatial_idx >= format.spatial_num() ) + return 1; + auto dims = get_dims(); + const size_t dim_idx = (format::is_grouped(format) ? 3 : 2) + (format.spatial_num() - 1 - spatial_idx); + if (dims[dim_idx] != size.spatial[spatial_idx]) { + throw std::runtime_error("spatials mismatch: " + std::to_string(dims[dim_idx]) + " vs " + std::to_string(size.spatial[spatial_idx])); + } + return dims[dim_idx]; +} + +tensor::value_type layout::group() const { + auto dims = get_dims(); + if (!format::is_weights_format(format)) { + throw std::logic_error("[GPU] can't get group dimension for data layout"); + } + + if (!format::is_grouped(format)) + return 1; + + if (dims[0] != size.group[0]) { + throw std::runtime_error("groups mismatch: " + std::to_string(dims[0]) + " vs " + std::to_string(size.group[0])); + } + return dims[0]; +} + +tensor::value_type layout::ofm() const { + if (!format::is_weights_format(format)) { + throw std::logic_error("[GPU] can't get OFM dimension for data layout"); + } + auto dims = get_dims(); + const size_t dim_idx = format::is_grouped(format) ? 1 : 0; + + return dims[dim_idx]; +} + +tensor::value_type layout::ifm() const { + if (!format::is_weights_format(format)) { + throw std::logic_error("[GPU] can't get IFM dimension for data layout"); + } + auto dims = get_dims(); + const size_t dim_idx = format::is_grouped(format) ? 2 : 1; + return dims[dim_idx]; +} + +static format get_default_format(size_t rank, bool is_weights, bool is_grouped) { + auto default_fmt = cldnn::format::bfyx; + if (is_weights) { + if (is_grouped) { + if (rank == 5) { + default_fmt = cldnn::format::goiyx; + } else if (rank == 6) { + default_fmt = cldnn::format::goizyx; + } + } else { + if (rank == 4) { + default_fmt = cldnn::format::oiyx; + } else if (rank == 5) { + default_fmt = cldnn::format::oizyx; + } + } + } else { + if (rank == 5) { + default_fmt = cldnn::format::bfzyx; + } else if (rank == 6) { + default_fmt = cldnn::format::bfwzyx; + } + } + + return default_fmt; +} +std::vector layout::get_dims() const { + auto default_fmt = get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format)); + return size.sizes(default_fmt); +} + +std::vector layout::get_padded_dims() const { + auto default_fmt = get_default_format(format.dimension(), format::is_weights_format(format), format::is_grouped(format)); + auto padded_size = size.add(data_padding.lower_size()).add(data_padding.upper_size()); + return padded_size.sizes(default_fmt); +} + +static format to_weights_format(format f, bool is_grouped) { + if (format::is_weights_format(f)) + return f; + + switch (f) { + case format::bfyx: + return format::oiyx; + case format::fyxb: + return format::iyxo; + case format::byxf: + return format::oyxi; + case format::yxfb: + return format::yxio; + case format::bfzyx: + return is_grouped ? format::goiyx : format::oizyx; + case format::bfwzyx: { + if (!is_grouped) + throw std::runtime_error("Invalid conversion of data format to weights format. bfwzyx can't be non-grouped as 4D spatials are not supported"); + return format::goizyx; + } + case format::bs_xs_xsv8_bsv8: + return format::os_i_osv8__ai8; + default: + throw std::invalid_argument("Unable to convert data format " + f.to_string() + " to weights format"); + } +} + +layout layout::convert_to_weights_layout(bool is_grouped) const { + auto dims = size.sizes(format); + auto fmt = to_weights_format(format, is_grouped); + + return layout{data_type, fmt, tensor{fmt, dims}}; +} + +std::vector layout::get_ordered_dims() const { + return size.sizes(format); +} + +std::vector layout::get_dims_order() const { + return format::traits(format)._order; +} + +std::string layout::to_string() const { + // TODO: Extend with format/data-type info + return size.to_string(); +} +} // namespace cldnn diff --git a/src/plugins/intel_gpu/tests/module_tests/format_test.cpp b/src/plugins/intel_gpu/tests/module_tests/format_test.cpp new file mode 100644 index 00000000000..354ee52fcd5 --- /dev/null +++ b/src/plugins/intel_gpu/tests/module_tests/format_test.cpp @@ -0,0 +1,23 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include "intel_gpu/runtime/format.hpp" + +TEST(format, to_string) { + typedef std::underlying_type::type format_underlying_type; + for (format_underlying_type i = 0; i < static_cast(cldnn::format::format_num); i++) { + cldnn::format fmt = static_cast(i); + ASSERT_NO_THROW(fmt.to_string()) << "Can't convert to string format " << i; + } +} + +TEST(format, traits) { + typedef std::underlying_type::type format_underlying_type; + for (format_underlying_type i = 0; i < static_cast(cldnn::format::format_num); i++) { + cldnn::format fmt = static_cast(i); + ASSERT_NO_THROW(cldnn::format::traits(fmt)) << "Can't get traits for format " << i; + } +} diff --git a/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp b/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp new file mode 100644 index 00000000000..5093c07c3a3 --- /dev/null +++ b/src/plugins/intel_gpu/tests/module_tests/layout_test.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include "intel_gpu/runtime/layout.hpp" + +using namespace cldnn; +using namespace ::tests; + + +struct layout_test_params { + data_types dt; + format fmt; + std::vector size; + std::vector expected_aligned_size; + std::vector expected_order; +}; + +class data_layout_test : public testing::TestWithParam { }; + +TEST_P(data_layout_test, size_check) { + auto p = GetParam(); + auto default_fmt = format::bfyx; + + if (p.size.size() == 5) { + default_fmt = format::bfzyx; + } else if (p.size.size() == 6) { + default_fmt = format::bfwzyx; + } + + ASSERT_FALSE(format::is_weights_format(p.fmt)); + + auto l = layout(p.dt, p.fmt, tensor{default_fmt, p.size}); + + size_t expected_count = std::accumulate(p.size.begin(), p.size.end(), 1, std::multiplies()); + size_t expected_bytes_count = std::accumulate(p.expected_aligned_size.begin(), p.expected_aligned_size.end(), 1, std::multiplies()) * + data_type_traits::size_of(p.dt); + + ASSERT_EQ(l.bytes_count(), expected_bytes_count); + ASSERT_EQ(l.count(), expected_count); + ASSERT_EQ(l.get_rank(), p.size.size()); + + ASSERT_EQ(l.batch(), p.size[0]); + ASSERT_EQ(l.feature(), p.size[1]); + if (p.size.size() == 6) { + ASSERT_EQ(l.spatial(0), p.size[5]); + ASSERT_EQ(l.spatial(1), p.size[4]); + ASSERT_EQ(l.spatial(2), p.size[3]); + ASSERT_EQ(l.spatial(3), p.size[2]); + } else if (p.size.size() == 5) { + ASSERT_EQ(l.spatial(0), p.size[4]); + ASSERT_EQ(l.spatial(1), p.size[3]); + ASSERT_EQ(l.spatial(2), p.size[2]); + ASSERT_EQ(l.spatial(3), 1); + } else if (p.size.size() == 4) { + ASSERT_EQ(l.spatial(0), p.size[3]); + ASSERT_EQ(l.spatial(1), p.size[2]); + ASSERT_EQ(l.spatial(2), 1); + ASSERT_EQ(l.spatial(3), 1); + } + + auto dims = l.get_dims(); + auto ordered_dims = l.get_ordered_dims(); + + ASSERT_EQ(dims, p.size); + ASSERT_EQ(l.get_dims_order(), p.expected_order); + ASSERT_EQ(l.get_dims_order().size(), dims.size()); + + for (auto& dim_idx : l.get_dims_order()) { + ASSERT_LT(dim_idx, ordered_dims.size()); + } + + for (size_t i = 0; i < l.get_rank(); i++) { + ASSERT_EQ(ordered_dims[i], dims[p.expected_order[i]]); + ASSERT_EQ(ordered_dims[i], p.size[p.expected_order[i]]); + } +} + +INSTANTIATE_TEST_SUITE_P(smoke, data_layout_test, + testing::ValuesIn(std::vector{ + {data_types::f32, format::bfyx, {2, 33, 3, 5}, {2, 33, 3, 5}, {0, 1, 2, 3}}, + {data_types::f16, format::bfzyx, {2, 33, 3, 5, 4}, {2, 33, 3, 5, 4}, {0, 1, 2, 3, 4}}, + {data_types::i8, format::bfwzyx, {2, 33, 3, 5, 4, 6}, {2, 33, 3, 5, 4, 6}, {0, 1, 2, 3, 4, 5}}, + {data_types::u8, format::yxfb, {2, 33, 3, 5}, {2, 33, 3, 5}, {2, 3, 1, 0}}, + {data_types::f32, format::byxf, {2, 33, 3, 5}, {2, 33, 3, 5}, {0, 2, 3, 1}}, + {data_types::f32, format::fyxb, {2, 33, 3, 5}, {2, 33, 3, 5}, {1, 2, 3, 0}}, + {data_types::f32, format::b_fs_yx_fsv16, {2, 33, 3, 5}, {2, 48, 3, 5}, {0, 1, 2, 3}}, + {data_types::f32, format::b_fs_yx_fsv32, {2, 33, 3, 5}, {2, 64, 3, 5}, {0, 1, 2, 3}}, + {data_types::f32, format::b_fs_zyx_fsv16, {2, 33, 3, 5, 6}, {2, 48, 3, 5, 6}, {0, 1, 2, 3, 4}}, + {data_types::f32, format::b_fs_zyx_fsv32, {2, 33, 3, 5, 6}, {2, 64, 3, 5, 6}, {0, 1, 2, 3, 4}}, + {data_types::f32, format::bs_fs_zyx_bsv16_fsv16, {2, 33, 3, 5, 6}, {16, 48, 3, 5, 6}, {0, 1, 2, 3, 4}}, + {data_types::f32, format::bs_fs_yx_bsv16_fsv16, {2, 33, 3, 5}, {16, 48, 3, 5}, {0, 1, 2, 3}}, + {data_types::f32, format::bs_fs_yx_bsv4_fsv4, {2, 33, 3, 5}, {4, 36, 3, 5}, {0, 1, 2, 3}}, + })); + +class weights_layout_test : public testing::TestWithParam { }; + +TEST_P(weights_layout_test, size_check) { + auto p = GetParam(); + auto default_fmt = format::oiyx; + + if (format::is_weights_format(p.fmt)) { + if (p.size.size() == 5) { + default_fmt = format::goiyx; + } else if (p.size.size() == 6) { + default_fmt = format::goizyx; + } + } else { + if (p.size.size() == 4) { + default_fmt = format::oiyx; + } else if (p.size.size() == 5) { + default_fmt = format::oizyx; + } + } + + auto l = layout(p.dt, p.fmt, tensor{default_fmt, p.size}); + + size_t expected_count = std::accumulate(p.size.begin(), p.size.end(), 1, std::multiplies()); + size_t expected_bytes_count = std::accumulate(p.expected_aligned_size.begin(), p.expected_aligned_size.end(), 1, std::multiplies()) * + data_type_traits::size_of(p.dt); + + ASSERT_EQ(l.bytes_count(), expected_bytes_count); + ASSERT_EQ(l.count(), expected_count); + ASSERT_EQ(l.get_rank(), p.size.size()); + + if (format::is_weights_format(p.fmt)) { + if (format::is_grouped(p.fmt)) { + ASSERT_EQ(l.group(), p.size[0]); + ASSERT_EQ(l.ofm(), p.size[1]); + ASSERT_EQ(l.ifm(), p.size[2]); + if (p.size.size() == 6) { + ASSERT_EQ(l.spatial(0), p.size[5]); + ASSERT_EQ(l.spatial(1), p.size[4]); + ASSERT_EQ(l.spatial(2), p.size[3]); + } else if (p.size.size() == 5) { + ASSERT_EQ(l.spatial(0), p.size[4]); + ASSERT_EQ(l.spatial(1), p.size[3]); + } + } else { + ASSERT_EQ(l.ofm(), p.size[0]); + ASSERT_EQ(l.ifm(), p.size[1]); + if (p.size.size() == 6) { + ASSERT_EQ(l.spatial(0), p.size[4]); + ASSERT_EQ(l.spatial(1), p.size[3]); + ASSERT_EQ(l.spatial(2), p.size[2]); + } else if (p.size.size() == 5) { + ASSERT_EQ(l.spatial(0), p.size[3]); + ASSERT_EQ(l.spatial(1), p.size[2]); + } + } + } + auto dims = l.get_dims(); + auto ordered_dims = l.get_ordered_dims(); + + ASSERT_EQ(dims, p.size); + ASSERT_EQ(l.get_dims_order(), p.expected_order); + ASSERT_EQ(l.get_dims_order().size(), dims.size()); + + for (auto& dim_idx : l.get_dims_order()) { + ASSERT_LT(dim_idx, ordered_dims.size()); + } + + for (size_t i = 0; i < l.get_rank(); i++) { + ASSERT_EQ(ordered_dims[i], dims[p.expected_order[i]]); + ASSERT_EQ(ordered_dims[i], p.size[p.expected_order[i]]); + } +} + +INSTANTIATE_TEST_SUITE_P(smoke, weights_layout_test, + testing::ValuesIn(std::vector{ + {data_types::f32, format::oiyx, {2, 15, 3, 5}, {2, 15, 3, 5}, {0, 1, 2, 3}}, + {data_types::f32, format::ioyx, {2, 15, 3, 5}, {2, 15, 3, 5}, {1, 0, 2, 3}}, + {data_types::f32, format::yxio, {2, 15, 3, 5}, {2, 15, 3, 5}, {2, 3, 1, 0}}, + {data_types::f32, format::goiyx, {4, 2, 15, 3, 5}, {4, 2, 15, 3, 5}, {0, 1, 2, 3, 4}}, + {data_types::f32, format::goizyx, {4, 2, 15, 3, 5, 6}, {4, 2, 15, 3, 5, 6}, {0, 1, 2, 3, 4, 5}}, + {data_types::f32, format::giozyx, {4, 2, 15, 3, 5, 6}, {4, 2, 15, 3, 5, 6}, {0, 2, 1, 3, 4, 5}}, + {data_types::f32, format::g_os_is_yx_osa2_isa8_osv16_isv2, {4, 2, 15, 3, 5}, {4, 32, 16, 3, 5}, {0, 1, 2, 3, 4}}, + {data_types::f32, format::g_os_is_zyx_osa4_isa8_osv8_isv4, {4, 2, 15, 3, 5, 6}, {4, 32, 32, 3, 5, 6}, {0, 1, 2, 3, 4, 5}}, + }));