[GPU] Align Tile parameters with ngraph (#12345)

2022-08-11 14:08:22 +04:00 · 2022-08-11 14:08:22 +04:00 · 85e2229a80
commit 85e2229a80
parent e98cdcf496
6 changed files with 115 additions and 67 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp
@ -19,26 +19,19 @@ namespace cldnn {
 struct tile : public primitive_base<tile> {
    CLDNN_DECLARE_PRIMITIVE(tile)

-    enum tile_axis {
-        along_b,
-        along_f,
-        along_x,
-        along_y,
-        along_z
-    };
-
    /// @brief Constructs tile primitive.
    /// @param id This primitive id.
-    /// @param out_shape The shape of tiled tensor.
+    /// @param repeats Per-dimension replication factor.
    tile(const primitive_id& id,
         const primitive_id& input,
-         const tensor out_shape,
+         const std::vector<int64_t> repeats,
         const primitive_id& ext_prim_id = "",
         const padding& output_padding = padding())
-        : primitive_base(id, {input}, ext_prim_id, output_padding), out_shape(out_shape) {}
+        : primitive_base(id, {input}, ext_prim_id, output_padding),
+          repeats(repeats) {}

-    /// @brief Shape of the output tensor
-    tensor out_shape;
+    /// @brief A per-dimension replication factor
+    std::vector<int64_t> repeats;
 };
 /// @}
 /// @}
--- a/src/plugins/intel_gpu/src/graph/tile.cpp
+++ b/src/plugins/intel_gpu/src/graph/tile.cpp
@ -6,6 +6,7 @@
 #include "primitive_type_base.h"
 #include "intel_gpu/runtime/memory.hpp"
 #include "intel_gpu/runtime/error_handler.hpp"
+#include "intel_gpu/runtime/format.hpp"
 #include "json_object.h"
 #include <string>

@ -22,7 +23,14 @@ layout tile_inst::calc_output_layout(tile_node const& node, kernel_impl_params c

    auto input_layout = impl_param.get_input_layout();
    auto input_format = input_layout.format;
-    return layout{input_layout.data_type, input_format, desc->out_shape};
+
+    std::vector<int64_t> repeats = desc->repeats;
+
+    auto out_shape = input_layout.get_dims();
+    for (size_t i = 0; i < repeats.size(); ++i) {
+        out_shape[i] *= repeats[i];
+    }
+    return layout{input_layout.data_type, input_format, tensor(input_format, out_shape)};
 }

 std::string tile_inst::to_string(tile_node const& node) {
--- a/src/plugins/intel_gpu/src/plugin/ops/tile.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/tile.cpp
@ -8,6 +8,7 @@
 #include "ngraph/op/tile.hpp"

 #include "intel_gpu/primitives/tile.hpp"
+#include "intel_gpu/primitives/reshape.hpp"

 namespace ov {
 namespace intel_gpu {
@ -16,10 +17,39 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
    p.ValidateInputs(op, {2});
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
    std::string layerName = layer_type_name_ID(op);
+    size_t rank = op->get_input_shape(0).size();
+
+    auto repeatsNode = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+    if (!repeatsNode)
+        IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() <<
+                                                        " (" << op->get_type_name() << ")";
+    std::vector<int64_t> repeats = repeatsNode->cast_vector<int64_t>();
+
+    int64_t defaultSize = 1;
+    for (size_t i = repeats.size(); i < rank; ++i) {
+        repeats.insert(repeats.begin(), defaultSize);
+    }
+
+    if (repeats.size() > rank) {
+        std::string reshapeName = layerName + "_reshape";
+        auto inputDims = op->get_input_shape(0);
+
+        // Extend input dimensions to the same size as repeats dimensions by prepending ones
+        inputDims.insert(inputDims.begin(), repeats.size() - rank, defaultSize);
+
+        auto targetShape = tensor_from_dims(inputDims);
+
+        auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[0], targetShape, op->get_friendly_name());
+
+        p.AddPrimitive(reshapePrim);
+        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+        inputPrimitives[0] = reshapeName;
+    }

    auto tilePrim = cldnn::tile(layerName,
                                inputPrimitives[0],
-                                tensor_from_dims(op->get_output_shape(0)),
+                                repeats,
                                op->get_friendly_name());

    p.AddPrimitive(tilePrim);
--- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp
@ -65,15 +65,23 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
 }

 template<typename data_t>
-void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) {
-    auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int> {
+void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
+    auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
        switch (axis) {
-        case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
-        case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
-        case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]);
-        case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]);
-        case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]);
-        default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
+            case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
+            case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
+            case 2:
+                if (rank > 4)
+                    return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
+                else
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
+            case 3:
+                if (rank > 4)
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
+                else
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
+            case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
+            default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
        }
    };

@ -82,8 +90,9 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,

    const data_t* psrc = src.data();
    data_t* pdst = dst.data();
+    const auto& input_layout = input->get_layout();

-    auto sizes = get_sizes(input->get_layout().get_tensor(), axis);
+    auto sizes = get_sizes(input_layout, axis, input_layout.get_rank());
    int outer_dim = sizes.first;
    int inner_dim = sizes.second;

@ -107,11 +116,11 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) {
    topology topology;
    topology.add(input_layout("input", input->get_layout()));
    topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
-    topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4)));
+    topology.add(tile("tile", "reshape", { 1, 1, 4, 1 }));

    std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_y, 4);
+    tile_ref<float>(input, output_ref, 2, 4);

    network network(engine, topology);
    network.set_input_data("input", input);
--- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp
@ -15,17 +15,22 @@ using namespace cldnn;
 using namespace ::tests;

 template<typename data_t>
-void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles)
-{
-    auto get_sizes = [](const layout& l, tile::tile_axis axis) -> std::pair<int, int>
-    {
-        switch (axis)
-        {
-            case tile::along_b: return std::make_pair(1, l.batch()*l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
-            case tile::along_f: return std::make_pair(l.batch(), l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
-            case tile::along_z: return std::make_pair(l.batch()*l.feature(), l.spatial(2)*l.spatial(1)*l.spatial(0));
-            case tile::along_y: return std::make_pair(l.batch()*l.feature()*l.spatial(2), l.spatial(1)*l.spatial(0));
-            case tile::along_x: return std::make_pair(l.batch()*l.feature()*l.spatial(2)*l.spatial(1), l.spatial(0));
+void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
+    auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
+        switch (axis) {
+            case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
+            case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
+            case 2:
+                if (rank > 4)
+                    return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
+                else
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
+            case 3:
+                if (rank > 4)
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
+                else
+                    return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
+            case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
            default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
        }
    };
@ -36,16 +41,13 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,
    const data_t* psrc = src.data();
    data_t* pdst = dst.data();

-    auto sizes = get_sizes(input->get_layout(), axis);
+    auto sizes = get_sizes(input->get_layout(), axis, input->get_layout().get_rank());
    int outer_dim = sizes.first;
    int inner_dim = sizes.second;

-    for (int i = 0; i < outer_dim; i++)
-    {
-        for (int t = 0; t < num_tiles; t++)
-        {
-            for (int j = 0; j < inner_dim; j++)
-            {
+    for (int i = 0; i < outer_dim; i++) {
+        for (int t = 0; t < num_tiles; t++) {
+            for (int j = 0; j < inner_dim; j++) {
                pdst[j] = psrc[j];
            }
            pdst += inner_dim;
@ -62,12 +64,12 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(2, 2, 2, 2)));
+    topology.add(tile("tile", "input", { 2, 1, 1, 1 }));

    std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
                                     2.f, 0.f, 6.f, 5.2f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_b, 2);
+    tile_ref<float>(input, output_ref, 0, 2);

    network network(engine, topology);
    network.set_input_data("input", input);
@ -91,7 +93,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(1, 4, 2, 2)));
+    topology.add(tile("tile", "input", { 1, 2, 1, 1 }));

    std::vector<float> input_vec = { 1.f, 0.f,
                                     5.f, 1.5f,
@ -99,7 +101,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
                                     2.f, 0.f,
                                     6.f, 5.2f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_f, 2);
+    tile_ref<float>(input, output_ref, 1, 2);

    network network(engine, topology);
    network.set_input_data("input", input);
@ -118,20 +120,24 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
 TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
    auto& engine = get_test_engine();

-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
-    auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 4 } });
+    auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 8 } });

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(1, 2, 2, 4)));
+    topology.add(tile("tile", "input", { 1, 1, 2, 1 }));

-    std::vector<float> input_vec = { 1.f, 0.f,
-                                     5.f, 1.5f,
+    std::vector<float> input_vec = { 0.f, 1.f, 2.f,
+                                     3.f, 4.f, 5.f,
+                                     6.f, 7.f, 8.f,
+                                     9.f, 10.f, 11.f,

-                                     2.f, 0.f,
-                                     6.f, 5.2f };
+                                     12.f, 13.f, 14.f,
+                                     15.f, 16.f, 17.f,
+                                     18.f, 19.f, 20.f,
+                                     21.f, 22.f, 23.f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_y, 2);
+    tile_ref<float>(input, output_ref, 2, 2);

    network network(engine, topology);
    network.set_input_data("input", input);
@ -155,15 +161,15 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
+    topology.add(tile("tile", "input", { 1, 1, 1, 2 }));

-    std::vector<float> input_vec = { 1.f, 0.f,
-                                     5.f, 1.5f,
+    std::vector<float> input_vec = { 0.f, 1.f,
+                                     2.f, 3.f,

-                                     2.f, 0.f,
-                                     6.f, 5.2f };
+                                     4.f, 5.f,
+                                     6.f, 7.f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_x, 2);
+    tile_ref<float>(input, output_ref, 3, 2);

    network network(engine, topology);
    network.set_input_data("input", input);
@ -187,11 +193,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
+    topology.add(tile("tile", "input", { 1, 1, 1, 4 }));

-    std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f};
+    std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_x, 4);
+    tile_ref<float>(input, output_ref, 3, 4);

    network network(engine, topology);
    network.set_input_data("input", input);
@ -215,7 +221,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {

    topology topology;
    topology.add(input_layout("input", input->get_layout()));
-    topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4)));
+    topology.add(tile("tile", "input", { 1, 1, 2, 1, 1 }));

    std::vector<float> input_vec = {
        1.f, 0.f,
@ -228,7 +234,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
        6.f, 5.2f
    };
    set_values(input, input_vec);
-    tile_ref<float>(input, output_ref, tile::along_z, 2);
+    tile_ref<float>(input, output_ref, 2, 2);

    network network(engine, topology);
    network.set_input_data("input", input);
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp
@ -15,10 +15,12 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };

 const std::vector<std::vector<int64_t>> repeats = {
+        {2, 3},
        {1, 2, 3},
        {2, 1, 1},
        {2, 3, 1},
        {2, 2, 2},
+        {2, 3, 4, 5},
 };

 INSTANTIATE_TEST_SUITE_P(smoke_Tile, TileLayerTest,