[IE CLDNN] Memory allocation optimizations (#2178)

This commit is contained in:
Sergey Shlyapnikov 2020-09-11 15:55:46 +03:00 committed by GitHub
parent bdd0247362
commit a91e256d27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 243 additions and 95 deletions

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2016-2019 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -406,6 +406,26 @@ struct layout {
sizes[1] = align_to(sizes[1], 32);
} else if (this->format == cldnn::format::image_2d_rgba) {
sizes[1] = 4;
} else if (this->format == cldnn::format::gs_oi_yxs_gsv4_yxsv4 ||
this->format == cldnn::format::gs_oi_yxs_gsv16_yxsv4 ||
this->format == cldnn::format::gs_oi_yxs_gsv32_yxsv4) {
sizes[3] = align_to(sizes[2] * sizes[3], 4);
sizes[2] = 1;
} else if (this->format == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(sizes[1], 32)) {
sizes[1] = align_to(sizes[1], 32);
} else if ((this->format == cldnn::format::iy_xs_os_xsv2_osv8__ao32 ||
this->format == cldnn::format::iy_xs_os_xsv2_osv16__ao32 ||
this->format == cldnn::format::giy_xs_os_xsv2_osv8__ao32 ||
this->format == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(sizes[0], 32)) {
sizes[0] = align_to(sizes[0], 32);
sizes[3] = align_to(sizes[2] * sizes[3], 2);
sizes[2] = 1;
} else if (this->format == cldnn::format::i_yxs_os_yxsv2_osv16 || this->format == cldnn::format::gi_yxs_os_yxsv2_osv16) {
sizes[3] = align_to(sizes[2] * sizes[3], 2);
sizes[2] = 1;
} else if (this->format == cldnn::format::os_i_yxs_osv4_yxsv4) {
sizes[3] = align_to(sizes[2] * sizes[3], 4);
sizes[2] = 1;
}
size_t total = std::accumulate(
sizes.begin(),

View File

@ -58,9 +58,9 @@ struct format_traits {
/// @brief Block sizes as a vector of pairs of dimension number and block size ordered from rare to often.
std::vector<std::pair<size_t, int>> block_sizes;
/// @brief Characters representing batch dimensions in an order.
static const char* batch_chars() { return "bn"; }
static const char* batch_chars() { return "bno"; }
/// @brief Characters representing feature map/channel dimensions in an order.
static const char* feature_chars() { return "fioc"; }
static const char* feature_chars() { return "fic"; }
/// @brief Characters representing spatial dimensions in an order.
static const char* spatial_chars() { return "xyzhsw"; }
/// @brief Characters representing local dimensions in an order.
@ -122,8 +122,11 @@ struct format {
oiyx, ///< the most common format for 2D weights
yxio, ///< format used 2D weights
oizyx, ///< the most common format for 3D convolution
iyxo,
os_iyx_osv16, ///< format used only for convolution weights:
os_is_yx_osv16_isv16, ///< format used for convolution i8 weights
os_is_yx_osv16_isv16, ///< format used for convolution i8 weights
os_is_zyx_osv32_isv16,
os_is_zyx_osv64_isv16,
os_zyxi_osv16, ///< format used for weights for 3D convolution
os_is_yx_isv16_osv16, ///< format used for blocked convolution
os_is_zyx_isv16_osv16, ///< format used for weights for blocked 3D convolution
@ -173,6 +176,11 @@ struct format {
lstm_weights_dio, ///< dynamic_lstm, direction,
///< than IO (I - input size, O - 4 * hidden_size)
os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution
os_iyx_osv32__ai32,
iy_xs_os_xsv2_osv8__ao32,
iy_xs_os_xsv2_osv16__ao32,
i_yxs_os_yxsv2_osv16,
os_i_yxs_osv4_yxsv4,
goiyx, ///< format used for weights for 2D convolution
yxiog, ///< format used for weights for 2D convolution
@ -196,6 +204,13 @@ struct format {
g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution
g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution
g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution
g_os_is_yx_isv16_osv16,
gs_oi_yxs_gsv4_yxsv4,
gs_oi_yxs_gsv16_yxsv4,
gs_oi_yxs_gsv32_yxsv4,
gi_yxs_os_yxsv2_osv16,
giy_xs_os_xsv2_osv8__ao32,
giy_xs_os_xsv2_osv16__ao32,
format_num, ///< number of format types
any = -1
@ -212,7 +227,7 @@ struct format {
// Order - dims changing order from rare to often
// Inner order - dims order for internal storage in _sizes array
// Block sizes - vector of pairs of dimension number (by inner order) and block size ordered from rare to often
// Format B F S L G Order Inner order Block sizes
// Format B F S L G Order Inner order Block sizes
{ yxfb, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}},
{ byxf, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
{ bfyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
@ -235,66 +250,81 @@ struct format {
{ nv12, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ image_2d_rgba, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ oiyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {}}},
{ yxio, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}},
{ oizyx, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {}}},
{ os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}},
{ os_iyx_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}}}},
{ os_iyx_osv32, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}}}},
{ os_iyx_osv64, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 64}}}},
{ winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
{ winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
{ image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
{ image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}},
{ image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ lstm_weights_dio, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {}}},
{ os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}},
{ os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 8}, {1, 4}}}},
{ os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}, {1, 32}}}},
{ os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 32}}}},
{ is_o_yx_isv32, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {{1, 32}}}},
{ is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
{ os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
{ os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}},
{ os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}, {1, 4}}}},
{ os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}},
{ os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "bfxy", "bfxy?", {}}},
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "fbzyx", "bfxyz", {{1, 16}, {0, 16}}}},
{ is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "fbyx", "bfxyz", {{1, 16}, {0, 16}}}},
{ os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 32}}}},
{ os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}},
{ os_zyxi_osv16, { 1, 1, 3, 0, 0, "bzyxf", "bfxyz", {{0, 16}}}},
{ os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}},
{ os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}},
{ oiyx, { 1, 1, 2, 0, 0, "oiyx", "oixy", {}}},
{ iyxo, { 1, 1, 2, 0, 0, "iyxo", "oixy", {}}},
{ yxio, { 1, 1, 2, 0, 0, "yxio", "oixy?", {}}},
{ oizyx, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {}}},
{ os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}},
{ os_iyx_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 16}}}},
{ os_iyx_osv32, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}}}},
{ os_iyx_osv64, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 64}}}},
{ winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
{ winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
{ winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
{ image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
{ image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}},
{ image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
{ image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
{ lstm_weights_dio, { 1, 1, 2, 0, 0, "oixy", "oixy?", {}}},
{ os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
{ os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}},
{ os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}}},
{ os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}, {1, 32}}}},
{ os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 32}}}},
{ is_o_yx_isv32, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {{1, 32}}}},
{ is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
{ os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
{ os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}},
{ os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 16}, {1, 4}}}},
{ os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}},
{ os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}},
{ os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 4}}}},
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "oixy", "oixy?", {}}},
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}},
{ is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "iozyx", "oixyz", {{1, 16}, {0, 16}}}},
{ is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "ioyx", "oixyz", {{1, 16}, {0, 16}}}},
{ os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 32}}}},
{ os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}},
{ os_zyxi_osv16, { 1, 1, 3, 0, 0, "ozyxi", "oixyz", {{0, 16}}}},
{ os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}},
{ os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}},
{ os_is_zyx_osv32_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 16}}}},
{ os_is_zyx_osv64_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 64}, {1, 16}}}},
{ os_iyx_osv32__ai32, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 32}}}},
{ i_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{0, 16}}}},
{ iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 8}}}},
{ iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 16}}}},
{ os_i_yxs_osv4_yxsv4, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 4}}}},
{ goiyx, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {}}},
{ goizyx, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {}}},
{ g_os_iyx_osv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}}}},
{ g_os_iyx_osv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 32}}}},
{ gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 16}}}},
{ gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{8, 16}}}},
{ gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 32}}}},
{ gyxio, { 1, 1, 2, 0, 1, "gyxfb", "bfxy????g", {}}},
{ g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gfbzyx", "bfxyz???g", {{1, 16}, {0, 16}}}},
{ g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gfbyx", "bfxy????g", {{1, 16}, {0, 16}}}},
{ g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{1, 8}, {0, 16}, {1, 2}}}},
{ g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{1, 8}, {0, 16}, {1, 2}}}},
{ g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
{ g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}, {1, 4}}}},
{ g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
{ g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 4}}}},
{ g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 16}}}},
{ g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 32}}}},
{ g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 4}}}},
{ g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 16}}}},
{ g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 32}}}},
{ goiyx, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {}}},
{ goizyx, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {}}},
{ g_os_iyx_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 16}}}},
{ g_os_iyx_osv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 32}}}},
{ gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}},
{ gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{8, 16}}}},
{ gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}},
{ gyxio, { 1, 1, 2, 0, 1, "gyxio", "oixy????g", {}}},
{ g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "giozyx", "oixyz???g", {{1, 16}, {0, 16}}}},
{ g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gioyx", "oixy????g", {{1, 16}, {0, 16}}}},
{ g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{1, 8}, {0, 16}, {1, 2}}}},
{ g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 8}, {0, 16}, {1, 2}}}},
{ g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}},
{ g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "goixy", "oixy????g", {{0, 16}, {1, 4}}}},
{ g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}},
{ g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 4}}}},
{ g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 16}}}},
{ g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 32}}}},
{ g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 4}}}},
{ g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 16}}}},
{ g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 32}}}},
{ gs_oi_yxs_gsv4_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 4}}}},
{ gs_oi_yxs_gsv16_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}},
{ gs_oi_yxs_gsv32_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}},
{ g_os_is_yx_isv16_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 16}, {0, 16}}}},
{ gi_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{0, 16}}}},
{ iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "giyxo", "oixy????g", {{2, 2}, {0, 8}}}},
{ iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{2, 2}, {0, 16}}}},
};
return traits.at(fmt);
}
@ -334,6 +364,17 @@ struct format {
fmt == nv12 ||
fmt == image_2d_rgba);
}
/// @brief Checks if @p format is weights format
static bool is_weights_format(type fmt) {
const auto internal_order = traits(fmt).internal_order;
const auto weights_chars = { "o", "i" };
for (const auto& c : weights_chars) {
if (internal_order.find_first_of(c) != std::string::npos) {
return true;
}
}
return false;
}
/// @brief Checks if @p format is of grouped type
static bool is_grouped(type fmt) { return group_num(fmt) != 0; }
/// @brief Checks if @p format is of image type
@ -957,6 +998,45 @@ public:
my_sizes[1] = align_to(my_sizes[1], 4);
my_sizes[0] = align_to(my_sizes[0], 8);
my_sizes[2] = align_to(my_sizes[2], 8);
} else if (fmt == cldnn::format::gs_oi_yxs_gsv4_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv16_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv32_yxsv4) {
const auto yxsv = 4;
const auto flat_xy = adjusted_coords[4] + adjusted_coords[3] * my_sizes[4];
my_sizes.push_back(yxsv);
my_sizes[4] = ceil_div(my_sizes[3] * my_sizes[4], yxsv);
my_sizes[3] = 1;
adjusted_coords.push_back(flat_xy % yxsv);
adjusted_coords[4] = flat_xy / yxsv;
adjusted_coords[3] = 0;
} else if (fmt == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(my_sizes[1], 32)) {
my_sizes[1] = align_to(my_sizes[1], 32);
} else if ((fmt == cldnn::format::iy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::iy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) {
my_sizes[3] = align_to(my_sizes[3], 32);
} else if (fmt == cldnn::format::i_yxs_os_yxsv2_osv16 || fmt == cldnn::format::gi_yxs_os_yxsv2_osv16) {
const auto yxsv = 2;
auto flat_xy = adjusted_coords[2] + adjusted_coords[1] * my_sizes[2];
my_sizes.insert(std::prev(my_sizes.end()), yxsv);
my_sizes[2] = ceil_div(my_sizes[1] * my_sizes[2], yxsv);
my_sizes[1] = 1;
adjusted_coords.insert(std::prev(adjusted_coords.end()), flat_xy % yxsv);
adjusted_coords[2] = flat_xy / yxsv;
adjusted_coords[1] = 0;
} else if (fmt == cldnn::format::os_i_yxs_osv4_yxsv4) {
const auto yxsv = 4;
const auto flat_xy = adjusted_coords[3] + adjusted_coords[2] * my_sizes[3];
my_sizes.push_back(yxsv);
my_sizes[3] = ceil_div(my_sizes[2] * my_sizes[3], yxsv);
my_sizes[2] = 1;
adjusted_coords.push_back(flat_xy % yxsv);
adjusted_coords[3] = flat_xy / yxsv;
adjusted_coords[2] = 0;
} else if ((fmt == cldnn::format::giy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) {
my_sizes[4] = align_to(my_sizes[4], 32);
}
assert(my_sizes.size() == adjusted_coords.size());

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -31,7 +31,7 @@ gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine,
bool reset)
: lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false),
_buffer(_context->context(), CL_MEM_READ_WRITE, size()) {
if (reset) zero_buffer();
if (reset && is_memory_reset_needed(_layout)) zero_buffer();
}
gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine,
@ -256,7 +256,7 @@ gpu_usm::gpu_usm(const refcounted_obj_ptr<engine_impl>& engine, const layout& la
"Unknown unified shared memory type!");
}
if (reset) zero_buffer();
if (reset && is_memory_reset_needed(_layout)) zero_buffer();
}
void* gpu_usm::lock() {

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2016-2019 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -43,6 +43,25 @@ struct memory_impl : refcounted_obj<memory_impl> {
uint32_t get_net_id() const { return _net_id; }
void set_net(uint32_t id) { _net_id = id; }
allocation_type get_allocation_type() const { return _type; }
virtual bool is_memory_reset_needed(layout l) {
// To avoid memory reset, output memory must meet the following requirements:
// - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors)
// - To have zero paddings
// - To be completely filled with data
if (!format::is_weights_format(l.format) || format::is_winograd(l.format) || format::is_image_2d(l.format)) {
return true;
}
if (l.data_padding.lower_size() != tensor(0) || l.data_padding.upper_size() != tensor(0)) {
return true;
}
if (_bytes_count == (l.data_type == data_types::bin ? ceil_div(l.count(), 32) : l.count()) * data_type_traits::size_of(l.data_type)) {
return false;
}
return true;
}
protected:
engine_impl *const _engine;

View File

@ -343,13 +343,15 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
case kernel_selector::weights_layout::oyxi:
return cldnn::format::byxf;
case kernel_selector::weights_layout::io:
return cldnn::format::fyxb;
return cldnn::format::iyxo;
case kernel_selector::weights_layout::iyxo:
return cldnn::format::fyxb;
return cldnn::format::iyxo;
case kernel_selector::weights_layout::yxio:
return cldnn::format::yxfb;
case kernel_selector::weights_layout::os_iyx_osv16:
return cldnn::format::os_iyx_osv16;
case kernel_selector::weights_layout::os_is_yx_isv16_osv16:
return cldnn::format::os_is_yx_isv16_osv16;
case kernel_selector::weights_layout::os_is_yx_osv16_isv16:
return cldnn::format::os_is_yx_osv16_isv16;
case kernel_selector::weights_layout::os_iyx_osv32:
@ -458,8 +460,46 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
return cldnn::format::g_os_zyx_is_osv32_isv16;
case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32:
return cldnn::format::g_os_zyx_is_osv32_isv32;
case kernel_selector::weights_layout::gs_oi_yxs_gsv4_yxsv4:
return cldnn::format::gs_oi_yxs_gsv4_yxsv4;
case kernel_selector::weights_layout::gs_oi_yxs_gsv16_yxsv4:
return cldnn::format::gs_oi_yxs_gsv16_yxsv4;
case kernel_selector::weights_layout::gs_oi_yxs_gsv32_yxsv4:
return cldnn::format::gs_oi_yxs_gsv32_yxsv4;
case kernel_selector::weights_layout::g_os_is_yx_osv16_isv4:
return cldnn::format::g_os_is_yx_osv16_isv4;
case kernel_selector::weights_layout::g_os_is_yx_isv16_osv16:
return cldnn::format::g_os_is_yx_isv16_osv16;
case kernel_selector::weights_layout::os_iyx_osv32__ai32:
return cldnn::format::os_iyx_osv32__ai32;
case kernel_selector::weights_layout::os_is_osv32_isv32_swizzled_by_4:
return cldnn::format::os_is_osv32_isv32_swizzled_by_4;
case kernel_selector::weights_layout::iy_xs_os_xsv2_osv16__ao32:
return cldnn::format::iy_xs_os_xsv2_osv16__ao32;
case kernel_selector::weights_layout::iy_xs_os_xsv2_osv8__ao32:
return cldnn::format::iy_xs_os_xsv2_osv8__ao32;
case kernel_selector::weights_layout::i_yxs_os_yxsv2_osv16:
return cldnn::format::i_yxs_os_yxsv2_osv16;
case kernel_selector::weights_layout::os_is_zyx_osv32_isv16:
return cldnn::format::os_is_zyx_osv32_isv16;
case kernel_selector::weights_layout::os_is_zyx_osv64_isv16:
return cldnn::format::os_is_zyx_osv64_isv16;
case kernel_selector::weights_layout::os_is_yx_isv8_osv16_isv2:
return cldnn::format::os_is_yx_isv8_osv16_isv2;
case kernel_selector::weights_layout::dlstm_dir_io:
return cldnn::format::lstm_weights_dio;
case kernel_selector::weights_layout::os_iyx_osv16_rotate_180:
return cldnn::format::os_iyx_osv16;
case kernel_selector::weights_layout::os_i_yxs_osv4_yxsv4:
return cldnn::format::os_i_yxs_osv4_yxsv4;
case kernel_selector::weights_layout::gi_yxs_os_yxsv2_osv16:
return cldnn::format::gi_yxs_os_yxsv2_osv16;
case kernel_selector::weights_layout::giy_xs_os_xsv2_osv8__ao32:
return cldnn::format::giy_xs_os_xsv2_osv8__ao32;
case kernel_selector::weights_layout::giy_xs_os_xsv2_osv16__ao32:
return cldnn::format::giy_xs_os_xsv2_osv16__ao32;
default:
return cldnn::format::bfyx;
throw std::invalid_argument("Unable to convert kernel selector Weights layout " + std::to_string((int)l) + " to cldnn format");
}
}
@ -561,14 +601,18 @@ layout from_weights_tensor(const kernel_selector::weights_tensor& l) {
const auto format = from_weights_layout(l.GetLayout());
const auto type = from_weights_type(l.GetDType());
tensor t = {static_cast<int>(l.OFM().v),
static_cast<int>(l.IFM().v),
static_cast<int>(l.X().v),
static_cast<int>(l.Y().v),
static_cast<int>(l.LX().v),
static_cast<int>(l.LY().v)};
tensor size(1);
return layout(type, format, t);
size.group[0] = static_cast<int32_t>(l.G().v);
size.batch[0] = static_cast<int32_t>(l.OFM().v);
size.feature[0] = static_cast<int32_t>(l.IFM().v);
size.spatial[0] = static_cast<int32_t>(l.X().v);
size.spatial[1] = static_cast<int32_t>(l.Y().v);
size.spatial[2] = static_cast<int32_t>(l.Z().v);
size.local[0] = static_cast<int32_t>(l.LX().v);
size.local[1] = static_cast<int32_t>(l.LY().v);
return layout(type, format, size);
}
kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation) {

View File

@ -86,22 +86,7 @@ std::vector<std::pair<std::shared_ptr<primitive>, bool>> reorder_factory::get_we
}
}
// TODO: Add conversion of WeightsTensor to cldnn::tensor to have not flattened shape
// layout expected_layout = from_weights_tensor(reorder_params.dest);
auto new_dtype = from_weights_type(reorder_params.dest.GetDType());
const auto bpp = data_type_traits::size_of(new_dtype);
tensor expected_size = { 1, 1, 1, (tensor::value_type)(reorder_params.dest.PhysicalSizeInBytes() / bpp) };
bool toImageType = IsImageType(reorder_params.dest.GetLayout());
bool toDynamicLSTMType = IsDynamicLSTMType(reorder_params.dest.GetLayout());
if (toImageType || toDynamicLSTMType)
expected_size = old_layout.size;
layout expected_layout = { new_dtype,
toImageType ? from_weights_layout(reorder_params.dest.GetLayout())
: format::bfyx, // simple linear format (flatten to x channel)
expected_size };
layout expected_layout = from_weights_tensor(reorder_params.dest);
cache_key ckey{ input_id, expected_layout };
auto itr = _cached_generic_reorders.find(ckey);

View File

@ -96,7 +96,7 @@ lstm_dynamic_input_inst::typed_primitive_inst(network_impl& network, lstm_dynami
"weights format",
node.weights().get_output_layout().format.value,
"expected bfyx format",
format::bfyx);
format::oiyx, format::lstm_weights_dio, format::bfyx);
CLDNN_ERROR_NOT_EQUAL(node.id(),
"Weights batch size",
weights_tensor.batch[0],