diff --git a/inference-engine/thirdparty/clDNN/api/layout.hpp b/inference-engine/thirdparty/clDNN/api/layout.hpp index ee2ac021d07..09c168c66b7 100644 --- a/inference-engine/thirdparty/clDNN/api/layout.hpp +++ b/inference-engine/thirdparty/clDNN/api/layout.hpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -406,6 +406,26 @@ struct layout { sizes[1] = align_to(sizes[1], 32); } else if (this->format == cldnn::format::image_2d_rgba) { sizes[1] = 4; + } else if (this->format == cldnn::format::gs_oi_yxs_gsv4_yxsv4 || + this->format == cldnn::format::gs_oi_yxs_gsv16_yxsv4 || + this->format == cldnn::format::gs_oi_yxs_gsv32_yxsv4) { + sizes[3] = align_to(sizes[2] * sizes[3], 4); + sizes[2] = 1; + } else if (this->format == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(sizes[1], 32)) { + sizes[1] = align_to(sizes[1], 32); + } else if ((this->format == cldnn::format::iy_xs_os_xsv2_osv8__ao32 || + this->format == cldnn::format::iy_xs_os_xsv2_osv16__ao32 || + this->format == cldnn::format::giy_xs_os_xsv2_osv8__ao32 || + this->format == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(sizes[0], 32)) { + sizes[0] = align_to(sizes[0], 32); + sizes[3] = align_to(sizes[2] * sizes[3], 2); + sizes[2] = 1; + } else if (this->format == cldnn::format::i_yxs_os_yxsv2_osv16 || this->format == cldnn::format::gi_yxs_os_yxsv2_osv16) { + sizes[3] = align_to(sizes[2] * sizes[3], 2); + sizes[2] = 1; + } else if (this->format == cldnn::format::os_i_yxs_osv4_yxsv4) { + sizes[3] = align_to(sizes[2] * sizes[3], 4); + sizes[2] = 1; } size_t total = std::accumulate( sizes.begin(), diff --git a/inference-engine/thirdparty/clDNN/api/tensor.hpp b/inference-engine/thirdparty/clDNN/api/tensor.hpp index ed49673d44a..10cff6908e5 100644 --- a/inference-engine/thirdparty/clDNN/api/tensor.hpp +++ b/inference-engine/thirdparty/clDNN/api/tensor.hpp @@ -58,9 +58,9 @@ struct format_traits { /// @brief Block sizes as a vector of pairs of dimension number and block size ordered from rare to often. std::vector> block_sizes; /// @brief Characters representing batch dimensions in an order. - static const char* batch_chars() { return "bn"; } + static const char* batch_chars() { return "bno"; } /// @brief Characters representing feature map/channel dimensions in an order. - static const char* feature_chars() { return "fioc"; } + static const char* feature_chars() { return "fic"; } /// @brief Characters representing spatial dimensions in an order. static const char* spatial_chars() { return "xyzhsw"; } /// @brief Characters representing local dimensions in an order. @@ -122,8 +122,11 @@ struct format { oiyx, ///< the most common format for 2D weights yxio, ///< format used 2D weights oizyx, ///< the most common format for 3D convolution + iyxo, os_iyx_osv16, ///< format used only for convolution weights: - os_is_yx_osv16_isv16, ///< format used for convolution i8 weights + os_is_yx_osv16_isv16, ///< format used for convolution i8 weights + os_is_zyx_osv32_isv16, + os_is_zyx_osv64_isv16, os_zyxi_osv16, ///< format used for weights for 3D convolution os_is_yx_isv16_osv16, ///< format used for blocked convolution os_is_zyx_isv16_osv16, ///< format used for weights for blocked 3D convolution @@ -173,6 +176,11 @@ struct format { lstm_weights_dio, ///< dynamic_lstm, direction, ///< than IO (I - input size, O - 4 * hidden_size) os_is_osv32_isv32_swizzled_by_4, ///< format for weights for 1x1 IMAD convolution + os_iyx_osv32__ai32, + iy_xs_os_xsv2_osv8__ao32, + iy_xs_os_xsv2_osv16__ao32, + i_yxs_os_yxsv2_osv16, + os_i_yxs_osv4_yxsv4, goiyx, ///< format used for weights for 2D convolution yxiog, ///< format used for weights for 2D convolution @@ -196,6 +204,13 @@ struct format { g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution + g_os_is_yx_isv16_osv16, + gs_oi_yxs_gsv4_yxsv4, + gs_oi_yxs_gsv16_yxsv4, + gs_oi_yxs_gsv32_yxsv4, + gi_yxs_os_yxsv2_osv16, + giy_xs_os_xsv2_osv8__ao32, + giy_xs_os_xsv2_osv16__ao32, format_num, ///< number of format types any = -1 @@ -212,7 +227,7 @@ struct format { // Order - dims changing order from rare to often // Inner order - dims order for internal storage in _sizes array // Block sizes - vector of pairs of dimension number (by inner order) and block size ordered from rare to often - // Format B F S L G Order Inner order Block sizes + // Format B F S L G Order Inner order Block sizes { yxfb, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}}, { byxf, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}}, { bfyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, @@ -235,66 +250,81 @@ struct format { { nv12, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, { image_2d_rgba, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { oiyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {}}}, - { yxio, { 1, 1, 2, 0, 0, "yxfb", "bfxy?", {}}}, - { oizyx, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {}}}, - { os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}}, - { os_iyx_osv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}}}}, - { os_iyx_osv32, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}}}}, - { os_iyx_osv64, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 64}}}}, - { winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}}, - { winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}}, - { image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}}, - { image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyfb", "bfxy?", {}}}, - { image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { lstm_weights_dio, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {}}}, - { os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {}}}, - { os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 8}, {1, 4}}}}, - { os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 32}, {1, 32}}}}, - { os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 32}}}}, - { is_o_yx_isv32, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {{1, 32}}}}, - { is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}}, - { os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}}, - { os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "byxf", "bfxy?", {}}}, - { os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "bfyx", "bfxy?", {{0, 16}, {1, 4}}}}, - { os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}}, - { os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}}, - { os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 4}}}}, - { os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 32}, {1, 4}}}}, - { os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "bfxy", "bfxy?", {}}}, - { os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}}, - { is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "fbzyx", "bfxyz", {{1, 16}, {0, 16}}}}, - { is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "fbyx", "bfxyz", {{1, 16}, {0, 16}}}}, - { os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 32}}}}, - { os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}}, - { os_zyxi_osv16, { 1, 1, 3, 0, 0, "bzyxf", "bfxyz", {{0, 16}}}}, - { os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}}, - { os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "bfyx", "bfxy", {{1, 16}, {0, 16}}}}, + { oiyx, { 1, 1, 2, 0, 0, "oiyx", "oixy", {}}}, + { iyxo, { 1, 1, 2, 0, 0, "iyxo", "oixy", {}}}, + { yxio, { 1, 1, 2, 0, 0, "yxio", "oixy?", {}}}, + { oizyx, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {}}}, + { os_is_yx_isv16_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}}, + { os_iyx_osv16, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 16}}}}, + { os_iyx_osv32, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}}}}, + { os_iyx_osv64, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 64}}}}, + { winograd_2x3_s1_weights, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}}, + { winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}}, + { winograd_6x3_s1_fused_weights, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}}, + { image_2d_weights_winograd_6x3_s1_fbxyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}}, + { image_2d_weights_winograd_6x3_s1_xfbyb, { 1, 1, 2, 0, 0, "xyio", "oixy?", {}}}, + { image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}}, + { image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}}, + { lstm_weights_dio, { 1, 1, 2, 0, 0, "oixy", "oixy?", {}}}, + { os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}}, + { os_is_yx_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {}}}, + { os_is_zyx_isa8_osv8_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}}}, + { os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oiyx", "oixy?", {{0, 32}, {1, 32}}}}, + { os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 32}}}}, + { is_o_yx_isv32, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {{1, 32}}}}, + { is_o32_yx_isv32_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}}, + { os_is_y_x8_osv8_isv4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}}, + { os_is_y_x8_osv8_isv4_swizzled_by_4, { 1, 1, 2, 0, 0, "oyxi", "oixy?", {}}}, + { os_is_yx_osv16_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 16}, {1, 4}}}}, + { os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}}, + { os_is_yx_osv32_isv4_swizzled_by_2, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}}, + { os_is_yx_osv32_isv4, { 1, 1, 2, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 4}}}}, + { os_is_zyx_osv32_isv4, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 4}}}}, + { os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "oixy", "oixy?", {}}}, + { os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 16}, {1, 16}}}}, + { is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "iozyx", "oixyz", {{1, 16}, {0, 16}}}}, + { is_os_yx_isv16_osv16, { 1, 1, 2, 0, 0, "ioyx", "oixyz", {{1, 16}, {0, 16}}}}, + { os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "oixy", "oixy?", {{0, 32}, {1, 32}}}}, + { os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}}, + { os_zyxi_osv16, { 1, 1, 3, 0, 0, "ozyxi", "oixyz", {{0, 16}}}}, + { os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 0, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}}}, + { os_is_yx_osv16_isv16, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{1, 16}, {0, 16}}}}, + { os_is_zyx_osv32_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 32}, {1, 16}}}}, + { os_is_zyx_osv64_isv16, { 1, 1, 3, 0, 0, "oizyx", "oixyz", {{0, 64}, {1, 16}}}}, + { os_iyx_osv32__ai32, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 32}}}}, + { i_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{0, 16}}}}, + { iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 8}}}}, + { iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 0, "iyxo", "oixy", {{2, 2}, {0, 16}}}}, + { os_i_yxs_osv4_yxsv4, { 1, 1, 2, 0, 0, "oiyx", "oixy", {{0, 4}}}}, - { goiyx, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {}}}, - { goizyx, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {}}}, - { g_os_iyx_osv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}}}}, - { g_os_iyx_osv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 32}}}}, - { gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 16}}}}, - { gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{8, 16}}}}, - { gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 32}}}}, - { gyxio, { 1, 1, 2, 0, 1, "gyxfb", "bfxy????g", {}}}, - { g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gfbzyx", "bfxyz???g", {{1, 16}, {0, 16}}}}, - { g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gfbyx", "bfxy????g", {{1, 16}, {0, 16}}}}, - { g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{1, 8}, {0, 16}, {1, 2}}}}, - { g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{1, 8}, {0, 16}, {1, 2}}}}, - { g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}}, - { g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}, {1, 4}}}}, - { g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}}, - { g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 4}}}}, - { g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 16}}}}, - { g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 32}}}}, - { g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 4}}}}, - { g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 16}}}}, - { g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 32}}}}, + { goiyx, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {}}}, + { goizyx, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {}}}, + { g_os_iyx_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 16}}}}, + { g_os_iyx_osv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{0, 32}}}}, + { gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}}, + { gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{8, 16}}}}, + { gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}}, + { gyxio, { 1, 1, 2, 0, 1, "gyxio", "oixy????g", {}}}, + { g_is_os_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "giozyx", "oixyz???g", {{1, 16}, {0, 16}}}}, + { g_is_os_yx_isv16_osv16, { 1, 1, 2, 0, 1, "gioyx", "oixy????g", {{1, 16}, {0, 16}}}}, + { g_os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{1, 8}, {0, 16}, {1, 2}}}}, + { g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 8}, {0, 16}, {1, 2}}}}, + { g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}}, + { g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "goixy", "oixy????g", {{0, 16}, {1, 4}}}}, + { g_os_is_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "goizyx", "oixyz???g", {{0, 16}, {1, 16}}}}, + { g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 4}}}}, + { g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 16}}}}, + { g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 16}, {1, 32}}}}, + { g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 4}}}}, + { g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 16}}}}, + { g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gozyxi", "oixyz???g", {{0, 32}, {1, 32}}}}, + { gs_oi_yxs_gsv4_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 4}}}}, + { gs_oi_yxs_gsv16_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 16}}}}, + { gs_oi_yxs_gsv32_yxsv4, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{8, 32}}}}, + { g_os_is_yx_isv16_osv16, { 1, 1, 2, 0, 1, "goiyx", "oixy????g", {{1, 16}, {0, 16}}}}, + { gi_yxs_os_yxsv2_osv16, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{0, 16}}}}, + { iy_xs_os_xsv2_osv8__ao32, { 1, 1, 2, 0, 0, "giyxo", "oixy????g", {{2, 2}, {0, 8}}}}, + { iy_xs_os_xsv2_osv16__ao32, { 1, 1, 2, 0, 1, "giyxo", "oixy????g", {{2, 2}, {0, 16}}}}, }; return traits.at(fmt); } @@ -334,6 +364,17 @@ struct format { fmt == nv12 || fmt == image_2d_rgba); } + /// @brief Checks if @p format is weights format + static bool is_weights_format(type fmt) { + const auto internal_order = traits(fmt).internal_order; + const auto weights_chars = { "o", "i" }; + for (const auto& c : weights_chars) { + if (internal_order.find_first_of(c) != std::string::npos) { + return true; + } + } + return false; + } /// @brief Checks if @p format is of grouped type static bool is_grouped(type fmt) { return group_num(fmt) != 0; } /// @brief Checks if @p format is of image type @@ -957,6 +998,45 @@ public: my_sizes[1] = align_to(my_sizes[1], 4); my_sizes[0] = align_to(my_sizes[0], 8); my_sizes[2] = align_to(my_sizes[2], 8); + } else if (fmt == cldnn::format::gs_oi_yxs_gsv4_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv16_yxsv4 || fmt == cldnn::format::gs_oi_yxs_gsv32_yxsv4) { + const auto yxsv = 4; + const auto flat_xy = adjusted_coords[4] + adjusted_coords[3] * my_sizes[4]; + + my_sizes.push_back(yxsv); + my_sizes[4] = ceil_div(my_sizes[3] * my_sizes[4], yxsv); + my_sizes[3] = 1; + + adjusted_coords.push_back(flat_xy % yxsv); + adjusted_coords[4] = flat_xy / yxsv; + adjusted_coords[3] = 0; + } else if (fmt == cldnn::format::os_iyx_osv32__ai32 && !is_aligned_to(my_sizes[1], 32)) { + my_sizes[1] = align_to(my_sizes[1], 32); + } else if ((fmt == cldnn::format::iy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::iy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) { + my_sizes[3] = align_to(my_sizes[3], 32); + } else if (fmt == cldnn::format::i_yxs_os_yxsv2_osv16 || fmt == cldnn::format::gi_yxs_os_yxsv2_osv16) { + const auto yxsv = 2; + auto flat_xy = adjusted_coords[2] + adjusted_coords[1] * my_sizes[2]; + + my_sizes.insert(std::prev(my_sizes.end()), yxsv); + my_sizes[2] = ceil_div(my_sizes[1] * my_sizes[2], yxsv); + my_sizes[1] = 1; + + adjusted_coords.insert(std::prev(adjusted_coords.end()), flat_xy % yxsv); + adjusted_coords[2] = flat_xy / yxsv; + adjusted_coords[1] = 0; + } else if (fmt == cldnn::format::os_i_yxs_osv4_yxsv4) { + const auto yxsv = 4; + const auto flat_xy = adjusted_coords[3] + adjusted_coords[2] * my_sizes[3]; + + my_sizes.push_back(yxsv); + my_sizes[3] = ceil_div(my_sizes[2] * my_sizes[3], yxsv); + my_sizes[2] = 1; + + adjusted_coords.push_back(flat_xy % yxsv); + adjusted_coords[3] = flat_xy / yxsv; + adjusted_coords[2] = 0; + } else if ((fmt == cldnn::format::giy_xs_os_xsv2_osv8__ao32 || fmt == cldnn::format::giy_xs_os_xsv2_osv16__ao32) && !is_aligned_to(my_sizes[3], 32)) { + my_sizes[4] = align_to(my_sizes[4], 32); } assert(my_sizes.size() == adjusted_coords.size()); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp index a09538ae4f3..ae7f12eac16 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, bool reset) : lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false), _buffer(_context->context(), CL_MEM_READ_WRITE, size()) { - if (reset) zero_buffer(); + if (reset && is_memory_reset_needed(_layout)) zero_buffer(); } gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, @@ -256,7 +256,7 @@ gpu_usm::gpu_usm(const refcounted_obj_ptr& engine, const layout& la "Unknown unified shared memory type!"); } - if (reset) zero_buffer(); + if (reset && is_memory_reset_needed(_layout)) zero_buffer(); } void* gpu_usm::lock() { diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h index b3b942ba0db..e346f545019 100644 --- a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016-2019 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,6 +43,25 @@ struct memory_impl : refcounted_obj { uint32_t get_net_id() const { return _net_id; } void set_net(uint32_t id) { _net_id = id; } allocation_type get_allocation_type() const { return _type; } + virtual bool is_memory_reset_needed(layout l) { + // To avoid memory reset, output memory must meet the following requirements: + // - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors) + // - To have zero paddings + // - To be completely filled with data + if (!format::is_weights_format(l.format) || format::is_winograd(l.format) || format::is_image_2d(l.format)) { + return true; + } + + if (l.data_padding.lower_size() != tensor(0) || l.data_padding.upper_size() != tensor(0)) { + return true; + } + + if (_bytes_count == (l.data_type == data_types::bin ? ceil_div(l.count(), 32) : l.count()) * data_type_traits::size_of(l.data_type)) { + return false; + } + + return true; + } protected: engine_impl *const _engine; diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index fe0703aab35..c0e0683833b 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -343,13 +343,15 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { case kernel_selector::weights_layout::oyxi: return cldnn::format::byxf; case kernel_selector::weights_layout::io: - return cldnn::format::fyxb; + return cldnn::format::iyxo; case kernel_selector::weights_layout::iyxo: - return cldnn::format::fyxb; + return cldnn::format::iyxo; case kernel_selector::weights_layout::yxio: return cldnn::format::yxfb; case kernel_selector::weights_layout::os_iyx_osv16: return cldnn::format::os_iyx_osv16; + case kernel_selector::weights_layout::os_is_yx_isv16_osv16: + return cldnn::format::os_is_yx_isv16_osv16; case kernel_selector::weights_layout::os_is_yx_osv16_isv16: return cldnn::format::os_is_yx_osv16_isv16; case kernel_selector::weights_layout::os_iyx_osv32: @@ -458,8 +460,46 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) { return cldnn::format::g_os_zyx_is_osv32_isv16; case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32: return cldnn::format::g_os_zyx_is_osv32_isv32; + case kernel_selector::weights_layout::gs_oi_yxs_gsv4_yxsv4: + return cldnn::format::gs_oi_yxs_gsv4_yxsv4; + case kernel_selector::weights_layout::gs_oi_yxs_gsv16_yxsv4: + return cldnn::format::gs_oi_yxs_gsv16_yxsv4; + case kernel_selector::weights_layout::gs_oi_yxs_gsv32_yxsv4: + return cldnn::format::gs_oi_yxs_gsv32_yxsv4; + case kernel_selector::weights_layout::g_os_is_yx_osv16_isv4: + return cldnn::format::g_os_is_yx_osv16_isv4; + case kernel_selector::weights_layout::g_os_is_yx_isv16_osv16: + return cldnn::format::g_os_is_yx_isv16_osv16; + case kernel_selector::weights_layout::os_iyx_osv32__ai32: + return cldnn::format::os_iyx_osv32__ai32; + case kernel_selector::weights_layout::os_is_osv32_isv32_swizzled_by_4: + return cldnn::format::os_is_osv32_isv32_swizzled_by_4; + case kernel_selector::weights_layout::iy_xs_os_xsv2_osv16__ao32: + return cldnn::format::iy_xs_os_xsv2_osv16__ao32; + case kernel_selector::weights_layout::iy_xs_os_xsv2_osv8__ao32: + return cldnn::format::iy_xs_os_xsv2_osv8__ao32; + case kernel_selector::weights_layout::i_yxs_os_yxsv2_osv16: + return cldnn::format::i_yxs_os_yxsv2_osv16; + case kernel_selector::weights_layout::os_is_zyx_osv32_isv16: + return cldnn::format::os_is_zyx_osv32_isv16; + case kernel_selector::weights_layout::os_is_zyx_osv64_isv16: + return cldnn::format::os_is_zyx_osv64_isv16; + case kernel_selector::weights_layout::os_is_yx_isv8_osv16_isv2: + return cldnn::format::os_is_yx_isv8_osv16_isv2; + case kernel_selector::weights_layout::dlstm_dir_io: + return cldnn::format::lstm_weights_dio; + case kernel_selector::weights_layout::os_iyx_osv16_rotate_180: + return cldnn::format::os_iyx_osv16; + case kernel_selector::weights_layout::os_i_yxs_osv4_yxsv4: + return cldnn::format::os_i_yxs_osv4_yxsv4; + case kernel_selector::weights_layout::gi_yxs_os_yxsv2_osv16: + return cldnn::format::gi_yxs_os_yxsv2_osv16; + case kernel_selector::weights_layout::giy_xs_os_xsv2_osv8__ao32: + return cldnn::format::giy_xs_os_xsv2_osv8__ao32; + case kernel_selector::weights_layout::giy_xs_os_xsv2_osv16__ao32: + return cldnn::format::giy_xs_os_xsv2_osv16__ao32; default: - return cldnn::format::bfyx; + throw std::invalid_argument("Unable to convert kernel selector Weights layout " + std::to_string((int)l) + " to cldnn format"); } } @@ -561,14 +601,18 @@ layout from_weights_tensor(const kernel_selector::weights_tensor& l) { const auto format = from_weights_layout(l.GetLayout()); const auto type = from_weights_type(l.GetDType()); - tensor t = {static_cast(l.OFM().v), - static_cast(l.IFM().v), - static_cast(l.X().v), - static_cast(l.Y().v), - static_cast(l.LX().v), - static_cast(l.LY().v)}; + tensor size(1); - return layout(type, format, t); + size.group[0] = static_cast(l.G().v); + size.batch[0] = static_cast(l.OFM().v); + size.feature[0] = static_cast(l.IFM().v); + size.spatial[0] = static_cast(l.X().v); + size.spatial[1] = static_cast(l.Y().v); + size.spatial[2] = static_cast(l.Z().v); + size.local[0] = static_cast(l.LX().v); + size.local[1] = static_cast(l.LY().v); + + return layout(type, format, size); } kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation) { diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 5765c08face..7db5918f4a2 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -86,22 +86,7 @@ std::vector, bool>> reorder_factory::get_we } } - // TODO: Add conversion of WeightsTensor to cldnn::tensor to have not flattened shape - // layout expected_layout = from_weights_tensor(reorder_params.dest); - - auto new_dtype = from_weights_type(reorder_params.dest.GetDType()); - const auto bpp = data_type_traits::size_of(new_dtype); - tensor expected_size = { 1, 1, 1, (tensor::value_type)(reorder_params.dest.PhysicalSizeInBytes() / bpp) }; - - bool toImageType = IsImageType(reorder_params.dest.GetLayout()); - bool toDynamicLSTMType = IsDynamicLSTMType(reorder_params.dest.GetLayout()); - if (toImageType || toDynamicLSTMType) - expected_size = old_layout.size; - - layout expected_layout = { new_dtype, - toImageType ? from_weights_layout(reorder_params.dest.GetLayout()) - : format::bfyx, // simple linear format (flatten to x channel) - expected_size }; + layout expected_layout = from_weights_tensor(reorder_params.dest); cache_key ckey{ input_id, expected_layout }; auto itr = _cached_generic_reorders.find(ckey); diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp index 4d11734f5ee..aa72f3ae7e7 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp @@ -96,7 +96,7 @@ lstm_dynamic_input_inst::typed_primitive_inst(network_impl& network, lstm_dynami "weights format", node.weights().get_output_layout().format.value, "expected bfyx format", - format::bfyx); + format::oiyx, format::lstm_weights_dio, format::bfyx); CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights batch size", weights_tensor.batch[0],