diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp index 3c04d8d0829..7b835bb6332 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp @@ -19,26 +19,19 @@ namespace cldnn { struct tile : public primitive_base { CLDNN_DECLARE_PRIMITIVE(tile) - enum tile_axis { - along_b, - along_f, - along_x, - along_y, - along_z - }; - /// @brief Constructs tile primitive. /// @param id This primitive id. - /// @param out_shape The shape of tiled tensor. + /// @param repeats Per-dimension replication factor. tile(const primitive_id& id, const primitive_id& input, - const tensor out_shape, + const std::vector repeats, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) - : primitive_base(id, {input}, ext_prim_id, output_padding), out_shape(out_shape) {} + : primitive_base(id, {input}, ext_prim_id, output_padding), + repeats(repeats) {} - /// @brief Shape of the output tensor - tensor out_shape; + /// @brief A per-dimension replication factor + std::vector repeats; }; /// @} /// @} diff --git a/src/plugins/intel_gpu/src/graph/tile.cpp b/src/plugins/intel_gpu/src/graph/tile.cpp index af80ea3b5c2..69f7bdf22fb 100644 --- a/src/plugins/intel_gpu/src/graph/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/tile.cpp @@ -6,6 +6,7 @@ #include "primitive_type_base.h" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/error_handler.hpp" +#include "intel_gpu/runtime/format.hpp" #include "json_object.h" #include @@ -22,7 +23,14 @@ layout tile_inst::calc_output_layout(tile_node const& node, kernel_impl_params c auto input_layout = impl_param.get_input_layout(); auto input_format = input_layout.format; - return layout{input_layout.data_type, input_format, desc->out_shape}; + + std::vector repeats = desc->repeats; + + auto out_shape = input_layout.get_dims(); + for (size_t i = 0; i < repeats.size(); ++i) { + out_shape[i] *= repeats[i]; + } + return layout{input_layout.data_type, input_format, tensor(input_format, out_shape)}; } std::string tile_inst::to_string(tile_node const& node) { diff --git a/src/plugins/intel_gpu/src/plugin/ops/tile.cpp b/src/plugins/intel_gpu/src/plugin/ops/tile.cpp index 849d7644224..a7a7e62a7cc 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/tile.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/tile.cpp @@ -8,6 +8,7 @@ #include "ngraph/op/tile.hpp" #include "intel_gpu/primitives/tile.hpp" +#include "intel_gpu/primitives/reshape.hpp" namespace ov { namespace intel_gpu { @@ -16,10 +17,39 @@ static void CreateTileOp(Program& p, const std::shared_ptr p.ValidateInputs(op, {2}); auto inputPrimitives = p.GetInputPrimitiveIDs(op); std::string layerName = layer_type_name_ID(op); + size_t rank = op->get_input_shape(0).size(); + + auto repeatsNode = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); + if (!repeatsNode) + IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() << + " (" << op->get_type_name() << ")"; + std::vector repeats = repeatsNode->cast_vector(); + + int64_t defaultSize = 1; + for (size_t i = repeats.size(); i < rank; ++i) { + repeats.insert(repeats.begin(), defaultSize); + } + + if (repeats.size() > rank) { + std::string reshapeName = layerName + "_reshape"; + auto inputDims = op->get_input_shape(0); + + // Extend input dimensions to the same size as repeats dimensions by prepending ones + inputDims.insert(inputDims.begin(), repeats.size() - rank, defaultSize); + + auto targetShape = tensor_from_dims(inputDims); + + auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[0], targetShape, op->get_friendly_name()); + + p.AddPrimitive(reshapePrim); + p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op); + + inputPrimitives[0] = reshapeName; + } auto tilePrim = cldnn::tile(layerName, inputPrimitives[0], - tensor_from_dims(op->get_output_shape(0)), + repeats, op->get_friendly_name()); p.AddPrimitive(tilePrim); diff --git a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp index 14f9171126c..959df21f730 100644 --- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp @@ -65,15 +65,23 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) { } template -void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) { - auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair { +void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) { + auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair { switch (axis) { - case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]); - case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]); - case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]); - case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]); - case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]); - default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast(axis)) + ") in tile ref version"); + case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0)); + case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0)); + case 2: + if (rank > 4) + return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0)); + else + return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0)); + case 3: + if (rank > 4) + return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0)); + else + return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0)); + case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0)); + default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast(axis)) + ") in tile ref version"); } }; @@ -82,8 +90,9 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, const data_t* psrc = src.data(); data_t* pdst = dst.data(); + const auto& input_layout = input->get_layout(); - auto sizes = get_sizes(input->get_layout().get_tensor(), axis); + auto sizes = get_sizes(input_layout, axis, input_layout.get_rank()); int outer_dim = sizes.first; int inner_dim = sizes.second; @@ -107,11 +116,11 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) { topology topology; topology.add(input_layout("input", input->get_layout())); topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1))); - topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4))); + topology.add(tile("tile", "reshape", { 1, 1, 4, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_y, 4); + tile_ref(input, output_ref, 2, 4); network network(engine, topology); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp index 3e2b7142f30..c2ec9af7734 100644 --- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp @@ -15,17 +15,22 @@ using namespace cldnn; using namespace ::tests; template -void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) -{ - auto get_sizes = [](const layout& l, tile::tile_axis axis) -> std::pair - { - switch (axis) - { - case tile::along_b: return std::make_pair(1, l.batch()*l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0)); - case tile::along_f: return std::make_pair(l.batch(), l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0)); - case tile::along_z: return std::make_pair(l.batch()*l.feature(), l.spatial(2)*l.spatial(1)*l.spatial(0)); - case tile::along_y: return std::make_pair(l.batch()*l.feature()*l.spatial(2), l.spatial(1)*l.spatial(0)); - case tile::along_x: return std::make_pair(l.batch()*l.feature()*l.spatial(2)*l.spatial(1), l.spatial(0)); +void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) { + auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair { + switch (axis) { + case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0)); + case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0)); + case 2: + if (rank > 4) + return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0)); + else + return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0)); + case 3: + if (rank > 4) + return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0)); + else + return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0)); + case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0)); default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast(axis)) + ") in tile ref version"); } }; @@ -36,16 +41,13 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, const data_t* psrc = src.data(); data_t* pdst = dst.data(); - auto sizes = get_sizes(input->get_layout(), axis); + auto sizes = get_sizes(input->get_layout(), axis, input->get_layout().get_rank()); int outer_dim = sizes.first; int inner_dim = sizes.second; - for (int i = 0; i < outer_dim; i++) - { - for (int t = 0; t < num_tiles; t++) - { - for (int j = 0; j < inner_dim; j++) - { + for (int i = 0; i < outer_dim; i++) { + for (int t = 0; t < num_tiles; t++) { + for (int j = 0; j < inner_dim; j++) { pdst[j] = psrc[j]; } pdst += inner_dim; @@ -62,12 +64,12 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(2, 2, 2, 2))); + topology.add(tile("tile", "input", { 2, 1, 1, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_b, 2); + tile_ref(input, output_ref, 0, 2); network network(engine, topology); network.set_input_data("input", input); @@ -91,7 +93,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(1, 4, 2, 2))); + topology.add(tile("tile", "input", { 1, 2, 1, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, @@ -99,7 +101,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) { 2.f, 0.f, 6.f, 5.2f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_f, 2); + tile_ref(input, output_ref, 1, 2); network network(engine, topology); network.set_input_data("input", input); @@ -118,20 +120,24 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) { TEST(tile_gpu, basic_in1x2x2x2_axis_y) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 4 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 8 } }); topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(1, 2, 2, 4))); + topology.add(tile("tile", "input", { 1, 1, 2, 1 })); - std::vector input_vec = { 1.f, 0.f, - 5.f, 1.5f, + std::vector input_vec = { 0.f, 1.f, 2.f, + 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, - 2.f, 0.f, - 6.f, 5.2f }; + 12.f, 13.f, 14.f, + 15.f, 16.f, 17.f, + 18.f, 19.f, 20.f, + 21.f, 22.f, 23.f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_y, 2); + tile_ref(input, output_ref, 2, 2); network network(engine, topology); network.set_input_data("input", input); @@ -155,15 +161,15 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(1, 2, 4, 2))); + topology.add(tile("tile", "input", { 1, 1, 1, 2 })); - std::vector input_vec = { 1.f, 0.f, - 5.f, 1.5f, + std::vector input_vec = { 0.f, 1.f, + 2.f, 3.f, - 2.f, 0.f, - 6.f, 5.2f }; + 4.f, 5.f, + 6.f, 7.f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_x, 2); + tile_ref(input, output_ref, 3, 2); network network(engine, topology); network.set_input_data("input", input); @@ -187,11 +193,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(1, 2, 4, 2))); + topology.add(tile("tile", "input", { 1, 1, 1, 4 })); - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f}; + std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_x, 4); + tile_ref(input, output_ref, 3, 4); network network(engine, topology); network.set_input_data("input", input); @@ -215,7 +221,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4))); + topology.add(tile("tile", "input", { 1, 1, 2, 1, 1 })); std::vector input_vec = { 1.f, 0.f, @@ -228,7 +234,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) { 6.f, 5.2f }; set_values(input, input_vec); - tile_ref(input, output_ref, tile::along_z, 2); + tile_ref(input, output_ref, 2, 2); network network(engine, topology); network.set_input_data("input", input); diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp index 04b4e3df6b4..186c1f3f1fc 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/tile.cpp @@ -15,10 +15,12 @@ const std::vector netPrecisions = { }; const std::vector> repeats = { + {2, 3}, {1, 2, 3}, {2, 1, 1}, {2, 3, 1}, {2, 2, 2}, + {2, 3, 4, 5}, }; INSTANTIATE_TEST_SUITE_P(smoke_Tile, TileLayerTest,