[IE CLDNN] Memory allocation optimizations (#2178)
This commit is contained in:
parent
bdd0247362
commit
a91e256d27
22
inference-engine/thirdparty/clDNN/api/layout.hpp
vendored
22
inference-engine/thirdparty/clDNN/api/layout.hpp
vendored
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -406,6 +406,26 @@ struct layout {
|
||||
sizes[1] = align_to(sizes[1], 32);
|
||||
} else if (this->format == cldnn::format::image_2d_rgba) {
|
||||
sizes[1] = 4;
|
||||
} else if (this->format == cldnn::format::gs_oi_yxs_gsv4_yxsv4 ||
|
||||
this->format == cldnn::format::gs_oi_yxs_gsv16_yxsv4 ||
|
||||
this->format == cldnn::format::gs_oi_yxs_gsv32_yxsv4) {
|
||||
sizes[3] = align_to(sizes[2] * sizes[3], 4);
|
||||
sizes[2] = 1;
|
||||
} else if (this->format == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(sizes[1], 32)) {
|
||||
sizes[1] = align_to(sizes[1], 32);
|
||||
} else if ((this->format == cldnn::format::iy_xs_os_xsv2_osv8__ao32 ||
|
||||
this->format == cldnn::format::iy_xs_os_xsv2_osv16__ao32 ||
|
||||
this->format == cldnn::format::giy_xs_os_xsv2_osv8__ao32 ||
|
||||
this->format == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(sizes[0], 32)) {
|
||||
sizes[0] = align_to(sizes[0], 32);
|
||||
sizes[3] = align_to(sizes[2] * sizes[3], 2);
|
||||
sizes[2] = 1;
|
||||
} else if (this->format == cldnn::format::i_yxs_os_yxsv2_osv16 || this->format == cldnn::format::gi_yxs_os_yxsv2_osv16) {
|
||||
sizes[3] = align_to(sizes[2] * sizes[3], 2);
|
||||
sizes[2] = 1;
|
||||
} else if (this->format == cldnn::format::os_i_yxs_osv4_yxsv4) {
|
||||
sizes[3] = align_to(sizes[2] * sizes[3], 4);
|
||||
sizes[2] = 1;
|
||||
}
|
||||
size_t total = std::accumulate(
|
||||
sizes.begin(),
|
||||
|
206
inference-engine/thirdparty/clDNN/api/tensor.hpp
vendored
206
inference-engine/thirdparty/clDNN/api/tensor.hpp
vendored
@ -58,9 +58,9 @@ struct format_traits {
|
||||
/// @brief Block sizes as a vector of pairs of dimension number and block size ordered from rare to often.
|
||||
std::vector<std::pair<size_t, int>> block_sizes;
|
||||
/// @brief Characters representing batch dimensions in an order.
|
||||
static const char* batch_chars() { return "bn"; }
|
||||
static const char* batch_chars() { return "bno"; }
|
||||
/// @brief Characters representing feature map/channel dimensions in an order.
|
||||
static const char* feature_chars() { return "fioc"; }
|
||||
static const char* feature_chars() { return "fic"; }
|
||||
/// @brief Characters representing spatial dimensions in an order.
|
||||
static const char* spatial_chars() { return "xyzhsw"; }
|
||||
/// @brief Characters representing local dimensions in an order.
|
||||
@ -122,8 +122,11 @@ struct format {
|
||||
oiyx, ///< the most common format for 2D weights
|
||||
yxio, ///< format used 2D weights
|
||||
oizyx, ///< the most common format for 3D convolution
|
||||
iyxo,
|
||||
os_iyx_osv16, ///< format used only for convolution weights:
|
||||
os_is_yx_osv16_isv16, ///< format used for convolution i8 weights
|
||||
os_is_yx_osv16_isv16, ///< format used for convolution i8 weights
|
||||
os_is_zyx_osv32_isv16,
|
||||
os_is_zyx_osv64_isv16,
|
||||
os_zyxi_osv16, ///< format used for weights for 3D convolution
|
||||
os_is_yx_isv16_osv16, ///< format used for blocked convolution
|
||||
os_is_zyx_isv16_osv16, ///< format used for weights for blocked 3D convolution
|
||||
@ -173,6 +176,11 @@ struct format {
|
||||
lstm_weights_dio, ///< dynamic_lstm, direction,
|
||||
///< than IO (I - input size, O - 4 * hidden_size)
|
||||
os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution
|
||||
os_iyx_osv32__ai32,
|
||||
iy_xs_os_xsv2_osv8__ao32,
|
||||
iy_xs_os_xsv2_osv16__ao32,
|
||||
i_yxs_os_yxsv2_osv16,
|
||||
os_i_yxs_osv4_yxsv4,
|
||||
|
||||
goiyx, ///< format used for weights for 2D convolution
|
||||
yxiog, ///< format used for weights for 2D convolution
|
||||
@ -196,6 +204,13 @@ struct format {
|
||||
g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution
|
||||
g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution
|
||||
g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution
|
||||
g_os_is_yx_isv16_osv16,
|
||||
gs_oi_yxs_gsv4_yxsv4,
|
||||
gs_oi_yxs_gsv16_yxsv4,
|
||||
gs_oi_yxs_gsv32_yxsv4,
|
||||
gi_yxs_os_yxsv2_osv16,
|
||||
giy_xs_os_xsv2_osv8__ao32,
|
||||
giy_xs_os_xsv2_osv16__ao32,
|
||||
|
||||
format_num, ///< number of format types
|
||||
any = -1
|
||||
@ -212,7 +227,7 @@ struct format {
|
||||
// Order - dims changing order from rare to often
|
||||
// Inner order - dims order for internal storage in _sizes array
|
||||
// Block sizes - vector of pairs of dimension number (by inner order) and block size ordered from rare to often
|
||||
// Format B F S L G Order Inner order Block sizes
|
||||
// Format B F S L G Order Inner order Block sizes
|
||||
{ yxfb, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}},
|
||||
{ byxf, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
|
||||
{ bfyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
@ -235,66 +250,81 @@ struct format {
|
||||
{ nv12, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ image_2d_rgba, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
|
||||
{ oiyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {}}},
|
||||
{ yxio, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}},
|
||||
{ oizyx, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {}}},
|
||||
{ os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}},
|
||||
{ os_iyx_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}}}},
|
||||
{ os_iyx_osv32, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}}}},
|
||||
{ os_iyx_osv64, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 64}}}},
|
||||
{ winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
|
||||
{ winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
|
||||
{ image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
|
||||
{ image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
|
||||
{ image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ lstm_weights_dio, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {}}},
|
||||
{ os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
|
||||
{ os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 8}, {1, 4}}}},
|
||||
{ os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}, {1, 32}}}},
|
||||
{ os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 32}}}},
|
||||
{ is_o_yx_isv32, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {{1, 32}}}},
|
||||
{ is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
|
||||
{ os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
|
||||
{ os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
|
||||
{ os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}, {1, 4}}}},
|
||||
{ os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
|
||||
{ os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "bfxy", "bfxy?", {}}},
|
||||
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
|
||||
{ is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "fbzyx", "bfxyz", {{1, 16}, {0, 16}}}},
|
||||
{ is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "fbyx", "bfxyz", {{1, 16}, {0, 16}}}},
|
||||
{ os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 32}}}},
|
||||
{ os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ os_zyxi_osv16, { 1, 1, 3, 0, 0, "bzyxf", "bfxyz", {{0, 16}}}},
|
||||
{ os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}},
|
||||
{ oiyx, { 1, 1, 2, 0, 0, "oiyx", "oixy", {}}},
|
||||
{ iyxo, { 1, 1, 2, 0, 0, "iyxo", "oixy", {}}},
|
||||
{ yxio, { 1, 1, 2, 0, 0, "yxio", "oixy?", {}}},
|
||||
{ oizyx, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {}}},
|
||||
{ os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}},
|
||||
{ os_iyx_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 16}}}},
|
||||
{ os_iyx_osv32, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}}}},
|
||||
{ os_iyx_osv64, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 64}}}},
|
||||
{ winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
|
||||
{ winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
|
||||
{ winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
|
||||
{ image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
|
||||
{ image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
|
||||
{ image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
|
||||
{ image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
|
||||
{ lstm_weights_dio, { 1, 1, 2, 0, 0, "oixy", "oixy?", {}}},
|
||||
{ os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
|
||||
{ os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
|
||||
{ os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}}},
|
||||
{ os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}, {1, 32}}}},
|
||||
{ os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 32}}}},
|
||||
{ is_o_yx_isv32, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {{1, 32}}}},
|
||||
{ is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
|
||||
{ os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
|
||||
{ os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
|
||||
{ os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 16}, {1, 4}}}},
|
||||
{ os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}},
|
||||
{ os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 4}}}},
|
||||
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "oixy", "oixy?", {}}},
|
||||
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}},
|
||||
{ is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "iozyx", "oixyz", {{1, 16}, {0, 16}}}},
|
||||
{ is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "ioyx", "oixyz", {{1, 16}, {0, 16}}}},
|
||||
{ os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 32}}}},
|
||||
{ os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ os_zyxi_osv16, { 1, 1, 3, 0, 0, "ozyxi", "oixyz", {{0, 16}}}},
|
||||
{ os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}},
|
||||
{ os_is_zyx_osv32_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 16}}}},
|
||||
{ os_is_zyx_osv64_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 64}, {1, 16}}}},
|
||||
{ os_iyx_osv32__ai32, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 32}}}},
|
||||
{ i_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{0, 16}}}},
|
||||
{ iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 8}}}},
|
||||
{ iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 16}}}},
|
||||
{ os_i_yxs_osv4_yxsv4, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 4}}}},
|
||||
|
||||
{ goiyx, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {}}},
|
||||
{ goizyx, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {}}},
|
||||
{ g_os_iyx_osv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}}}},
|
||||
{ g_os_iyx_osv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 32}}}},
|
||||
{ gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 16}}}},
|
||||
{ gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{8, 16}}}},
|
||||
{ gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 32}}}},
|
||||
{ gyxio, { 1, 1, 2, 0, 1, "gyxfb", "bfxy????g", {}}},
|
||||
{ g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gfbzyx", "bfxyz???g", {{1, 16}, {0, 16}}}},
|
||||
{ g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gfbyx", "bfxy????g", {{1, 16}, {0, 16}}}},
|
||||
{ g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}, {1, 4}}}},
|
||||
{ g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 4}}}},
|
||||
{ g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 32}}}},
|
||||
{ g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 4}}}},
|
||||
{ g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 32}}}},
|
||||
{ goiyx, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {}}},
|
||||
{ goizyx, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {}}},
|
||||
{ g_os_iyx_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 16}}}},
|
||||
{ g_os_iyx_osv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 32}}}},
|
||||
{ gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}},
|
||||
{ gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{8, 16}}}},
|
||||
{ gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}},
|
||||
{ gyxio, { 1, 1, 2, 0, 1, "gyxio", "oixy????g", {}}},
|
||||
{ g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "giozyx", "oixyz???g", {{1, 16}, {0, 16}}}},
|
||||
{ g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gioyx", "oixy????g", {{1, 16}, {0, 16}}}},
|
||||
{ g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 8}, {0, 16}, {1, 2}}}},
|
||||
{ g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "goixy", "oixy????g", {{0, 16}, {1, 4}}}},
|
||||
{ g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 4}}}},
|
||||
{ g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 32}}}},
|
||||
{ g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 4}}}},
|
||||
{ g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 16}}}},
|
||||
{ g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 32}}}},
|
||||
{ gs_oi_yxs_gsv4_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 4}}}},
|
||||
{ gs_oi_yxs_gsv16_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}},
|
||||
{ gs_oi_yxs_gsv32_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}},
|
||||
{ g_os_is_yx_isv16_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 16}, {0, 16}}}},
|
||||
{ gi_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{0, 16}}}},
|
||||
{ iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "giyxo", "oixy????g", {{2, 2}, {0, 8}}}},
|
||||
{ iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{2, 2}, {0, 16}}}},
|
||||
};
|
||||
return traits.at(fmt);
|
||||
}
|
||||
@ -334,6 +364,17 @@ struct format {
|
||||
fmt == nv12 ||
|
||||
fmt == image_2d_rgba);
|
||||
}
|
||||
/// @brief Checks if @p format is weights format
|
||||
static bool is_weights_format(type fmt) {
|
||||
const auto internal_order = traits(fmt).internal_order;
|
||||
const auto weights_chars = { "o", "i" };
|
||||
for (const auto& c : weights_chars) {
|
||||
if (internal_order.find_first_of(c) != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/// @brief Checks if @p format is of grouped type
|
||||
static bool is_grouped(type fmt) { return group_num(fmt) != 0; }
|
||||
/// @brief Checks if @p format is of image type
|
||||
@ -957,6 +998,45 @@ public:
|
||||
my_sizes[1] = align_to(my_sizes[1], 4);
|
||||
my_sizes[0] = align_to(my_sizes[0], 8);
|
||||
my_sizes[2] = align_to(my_sizes[2], 8);
|
||||
} else if (fmt == cldnn::format::gs_oi_yxs_gsv4_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv16_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv32_yxsv4) {
|
||||
const auto yxsv = 4;
|
||||
const auto flat_xy = adjusted_coords[4] + adjusted_coords[3] * my_sizes[4];
|
||||
|
||||
my_sizes.push_back(yxsv);
|
||||
my_sizes[4] = ceil_div(my_sizes[3] * my_sizes[4], yxsv);
|
||||
my_sizes[3] = 1;
|
||||
|
||||
adjusted_coords.push_back(flat_xy % yxsv);
|
||||
adjusted_coords[4] = flat_xy / yxsv;
|
||||
adjusted_coords[3] = 0;
|
||||
} else if (fmt == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(my_sizes[1], 32)) {
|
||||
my_sizes[1] = align_to(my_sizes[1], 32);
|
||||
} else if ((fmt == cldnn::format::iy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::iy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) {
|
||||
my_sizes[3] = align_to(my_sizes[3], 32);
|
||||
} else if (fmt == cldnn::format::i_yxs_os_yxsv2_osv16 || fmt == cldnn::format::gi_yxs_os_yxsv2_osv16) {
|
||||
const auto yxsv = 2;
|
||||
auto flat_xy = adjusted_coords[2] + adjusted_coords[1] * my_sizes[2];
|
||||
|
||||
my_sizes.insert(std::prev(my_sizes.end()), yxsv);
|
||||
my_sizes[2] = ceil_div(my_sizes[1] * my_sizes[2], yxsv);
|
||||
my_sizes[1] = 1;
|
||||
|
||||
adjusted_coords.insert(std::prev(adjusted_coords.end()), flat_xy % yxsv);
|
||||
adjusted_coords[2] = flat_xy / yxsv;
|
||||
adjusted_coords[1] = 0;
|
||||
} else if (fmt == cldnn::format::os_i_yxs_osv4_yxsv4) {
|
||||
const auto yxsv = 4;
|
||||
const auto flat_xy = adjusted_coords[3] + adjusted_coords[2] * my_sizes[3];
|
||||
|
||||
my_sizes.push_back(yxsv);
|
||||
my_sizes[3] = ceil_div(my_sizes[2] * my_sizes[3], yxsv);
|
||||
my_sizes[2] = 1;
|
||||
|
||||
adjusted_coords.push_back(flat_xy % yxsv);
|
||||
adjusted_coords[3] = flat_xy / yxsv;
|
||||
adjusted_coords[2] = 0;
|
||||
} else if ((fmt == cldnn::format::giy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) {
|
||||
my_sizes[4] = align_to(my_sizes[4], 32);
|
||||
}
|
||||
|
||||
assert(my_sizes.size() == adjusted_coords.size());
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -31,7 +31,7 @@ gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine,
|
||||
bool reset)
|
||||
: lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false),
|
||||
_buffer(_context->context(), CL_MEM_READ_WRITE, size()) {
|
||||
if (reset) zero_buffer();
|
||||
if (reset && is_memory_reset_needed(_layout)) zero_buffer();
|
||||
}
|
||||
|
||||
gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine,
|
||||
@ -256,7 +256,7 @@ gpu_usm::gpu_usm(const refcounted_obj_ptr<engine_impl>& engine, const layout& la
|
||||
"Unknown unified shared memory type!");
|
||||
}
|
||||
|
||||
if (reset) zero_buffer();
|
||||
if (reset && is_memory_reset_needed(_layout)) zero_buffer();
|
||||
}
|
||||
|
||||
void* gpu_usm::lock() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2016-2019 Intel Corporation
|
||||
// Copyright (c) 2016-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -43,6 +43,25 @@ struct memory_impl : refcounted_obj<memory_impl> {
|
||||
uint32_t get_net_id() const { return _net_id; }
|
||||
void set_net(uint32_t id) { _net_id = id; }
|
||||
allocation_type get_allocation_type() const { return _type; }
|
||||
virtual bool is_memory_reset_needed(layout l) {
|
||||
// To avoid memory reset, output memory must meet the following requirements:
|
||||
// - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors)
|
||||
// - To have zero paddings
|
||||
// - To be completely filled with data
|
||||
if (!format::is_weights_format(l.format) || format::is_winograd(l.format) || format::is_image_2d(l.format)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (l.data_padding.lower_size() != tensor(0) || l.data_padding.upper_size() != tensor(0)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (_bytes_count == (l.data_type == data_types::bin ? ceil_div(l.count(), 32) : l.count()) * data_type_traits::size_of(l.data_type)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
engine_impl *const _engine;
|
||||
|
@ -343,13 +343,15 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
|
||||
case kernel_selector::weights_layout::oyxi:
|
||||
return cldnn::format::byxf;
|
||||
case kernel_selector::weights_layout::io:
|
||||
return cldnn::format::fyxb;
|
||||
return cldnn::format::iyxo;
|
||||
case kernel_selector::weights_layout::iyxo:
|
||||
return cldnn::format::fyxb;
|
||||
return cldnn::format::iyxo;
|
||||
case kernel_selector::weights_layout::yxio:
|
||||
return cldnn::format::yxfb;
|
||||
case kernel_selector::weights_layout::os_iyx_osv16:
|
||||
return cldnn::format::os_iyx_osv16;
|
||||
case kernel_selector::weights_layout::os_is_yx_isv16_osv16:
|
||||
return cldnn::format::os_is_yx_isv16_osv16;
|
||||
case kernel_selector::weights_layout::os_is_yx_osv16_isv16:
|
||||
return cldnn::format::os_is_yx_osv16_isv16;
|
||||
case kernel_selector::weights_layout::os_iyx_osv32:
|
||||
@ -458,8 +460,46 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
|
||||
return cldnn::format::g_os_zyx_is_osv32_isv16;
|
||||
case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32:
|
||||
return cldnn::format::g_os_zyx_is_osv32_isv32;
|
||||
case kernel_selector::weights_layout::gs_oi_yxs_gsv4_yxsv4:
|
||||
return cldnn::format::gs_oi_yxs_gsv4_yxsv4;
|
||||
case kernel_selector::weights_layout::gs_oi_yxs_gsv16_yxsv4:
|
||||
return cldnn::format::gs_oi_yxs_gsv16_yxsv4;
|
||||
case kernel_selector::weights_layout::gs_oi_yxs_gsv32_yxsv4:
|
||||
return cldnn::format::gs_oi_yxs_gsv32_yxsv4;
|
||||
case kernel_selector::weights_layout::g_os_is_yx_osv16_isv4:
|
||||
return cldnn::format::g_os_is_yx_osv16_isv4;
|
||||
case kernel_selector::weights_layout::g_os_is_yx_isv16_osv16:
|
||||
return cldnn::format::g_os_is_yx_isv16_osv16;
|
||||
case kernel_selector::weights_layout::os_iyx_osv32__ai32:
|
||||
return cldnn::format::os_iyx_osv32__ai32;
|
||||
case kernel_selector::weights_layout::os_is_osv32_isv32_swizzled_by_4:
|
||||
return cldnn::format::os_is_osv32_isv32_swizzled_by_4;
|
||||
case kernel_selector::weights_layout::iy_xs_os_xsv2_osv16__ao32:
|
||||
return cldnn::format::iy_xs_os_xsv2_osv16__ao32;
|
||||
case kernel_selector::weights_layout::iy_xs_os_xsv2_osv8__ao32:
|
||||
return cldnn::format::iy_xs_os_xsv2_osv8__ao32;
|
||||
case kernel_selector::weights_layout::i_yxs_os_yxsv2_osv16:
|
||||
return cldnn::format::i_yxs_os_yxsv2_osv16;
|
||||
case kernel_selector::weights_layout::os_is_zyx_osv32_isv16:
|
||||
return cldnn::format::os_is_zyx_osv32_isv16;
|
||||
case kernel_selector::weights_layout::os_is_zyx_osv64_isv16:
|
||||
return cldnn::format::os_is_zyx_osv64_isv16;
|
||||
case kernel_selector::weights_layout::os_is_yx_isv8_osv16_isv2:
|
||||
return cldnn::format::os_is_yx_isv8_osv16_isv2;
|
||||
case kernel_selector::weights_layout::dlstm_dir_io:
|
||||
return cldnn::format::lstm_weights_dio;
|
||||
case kernel_selector::weights_layout::os_iyx_osv16_rotate_180:
|
||||
return cldnn::format::os_iyx_osv16;
|
||||
case kernel_selector::weights_layout::os_i_yxs_osv4_yxsv4:
|
||||
return cldnn::format::os_i_yxs_osv4_yxsv4;
|
||||
case kernel_selector::weights_layout::gi_yxs_os_yxsv2_osv16:
|
||||
return cldnn::format::gi_yxs_os_yxsv2_osv16;
|
||||
case kernel_selector::weights_layout::giy_xs_os_xsv2_osv8__ao32:
|
||||
return cldnn::format::giy_xs_os_xsv2_osv8__ao32;
|
||||
case kernel_selector::weights_layout::giy_xs_os_xsv2_osv16__ao32:
|
||||
return cldnn::format::giy_xs_os_xsv2_osv16__ao32;
|
||||
default:
|
||||
return cldnn::format::bfyx;
|
||||
throw std::invalid_argument("Unable to convert kernel selector Weights layout " + std::to_string((int)l) + " to cldnn format");
|
||||
}
|
||||
}
|
||||
|
||||
@ -561,14 +601,18 @@ layout from_weights_tensor(const kernel_selector::weights_tensor& l) {
|
||||
const auto format = from_weights_layout(l.GetLayout());
|
||||
const auto type = from_weights_type(l.GetDType());
|
||||
|
||||
tensor t = {static_cast<int>(l.OFM().v),
|
||||
static_cast<int>(l.IFM().v),
|
||||
static_cast<int>(l.X().v),
|
||||
static_cast<int>(l.Y().v),
|
||||
static_cast<int>(l.LX().v),
|
||||
static_cast<int>(l.LY().v)};
|
||||
tensor size(1);
|
||||
|
||||
return layout(type, format, t);
|
||||
size.group[0] = static_cast<int32_t>(l.G().v);
|
||||
size.batch[0] = static_cast<int32_t>(l.OFM().v);
|
||||
size.feature[0] = static_cast<int32_t>(l.IFM().v);
|
||||
size.spatial[0] = static_cast<int32_t>(l.X().v);
|
||||
size.spatial[1] = static_cast<int32_t>(l.Y().v);
|
||||
size.spatial[2] = static_cast<int32_t>(l.Z().v);
|
||||
size.local[0] = static_cast<int32_t>(l.LX().v);
|
||||
size.local[1] = static_cast<int32_t>(l.LY().v);
|
||||
|
||||
return layout(type, format, size);
|
||||
}
|
||||
|
||||
kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation) {
|
||||
|
@ -86,22 +86,7 @@ std::vector<std::pair<std::shared_ptr<primitive>, bool>> reorder_factory::get_we
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Add conversion of WeightsTensor to cldnn::tensor to have not flattened shape
|
||||
// layout expected_layout = from_weights_tensor(reorder_params.dest);
|
||||
|
||||
auto new_dtype = from_weights_type(reorder_params.dest.GetDType());
|
||||
const auto bpp = data_type_traits::size_of(new_dtype);
|
||||
tensor expected_size = { 1, 1, 1, (tensor::value_type)(reorder_params.dest.PhysicalSizeInBytes() / bpp) };
|
||||
|
||||
bool toImageType = IsImageType(reorder_params.dest.GetLayout());
|
||||
bool toDynamicLSTMType = IsDynamicLSTMType(reorder_params.dest.GetLayout());
|
||||
if (toImageType || toDynamicLSTMType)
|
||||
expected_size = old_layout.size;
|
||||
|
||||
layout expected_layout = { new_dtype,
|
||||
toImageType ? from_weights_layout(reorder_params.dest.GetLayout())
|
||||
: format::bfyx, // simple linear format (flatten to x channel)
|
||||
expected_size };
|
||||
layout expected_layout = from_weights_tensor(reorder_params.dest);
|
||||
|
||||
cache_key ckey{ input_id, expected_layout };
|
||||
auto itr = _cached_generic_reorders.find(ckey);
|
||||
|
@ -96,7 +96,7 @@ lstm_dynamic_input_inst::typed_primitive_inst(network_impl& network, lstm_dynami
|
||||
"weights format",
|
||||
node.weights().get_output_layout().format.value,
|
||||
"expected bfyx format",
|
||||
format::bfyx);
|
||||
format::oiyx, format::lstm_weights_dio, format::bfyx);
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(),
|
||||
"Weights batch size",
|
||||
weights_tensor.batch[0],
|
||||
|
Loading…
Reference in New Issue
Block a user