diff --git a/src/core/shape_inference/include/tile_shape_inference.hpp b/src/core/shape_inference/include/tile_shape_inference.hpp index 01dac2af9cd..f6304a010a0 100644 --- a/src/core/shape_inference/include/tile_shape_inference.hpp +++ b/src/core/shape_inference/include/tile_shape_inference.hpp @@ -35,7 +35,7 @@ void shape_infer(const Tile* op, if (axes_are_known) { auto remain_arg = output_rank - data_rank; auto remain_axes = output_rank - repeats_rank; - for (size_t i = 0; i < output_rank; i++) { + for (int64_t i = 0; i < output_rank; i++) { auto data_tmp = i < remain_arg ? DimType(1) : arg_shape[i - (remain_arg)]; auto repeat_tmp = i < remain_axes ? DimType(1) : axes_val[i - remain_axes]; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp index 7b835bb6332..9212e45ce58 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/tile.hpp @@ -19,7 +19,7 @@ namespace cldnn { struct tile : public primitive_base { CLDNN_DECLARE_PRIMITIVE(tile) - /// @brief Constructs tile primitive. + /// @brief Constructs tile primitive with static input. /// @param id This primitive id. /// @param repeats Per-dimension replication factor. tile(const primitive_id& id, @@ -30,6 +30,15 @@ struct tile : public primitive_base { : primitive_base(id, {input}, ext_prim_id, output_padding), repeats(repeats) {} + // @brief Constructs tile primitive with dynamic input. + tile(const primitive_id& id, + const primitive_id& input, + const primitive_id& repeats_id, + const primitive_id& ext_prim_id = "", + const padding& output_padding = padding()) + : primitive_base(id, {input, repeats_id}, ext_prim_id, output_padding), + repeats({}) {} + /// @brief A per-dimension replication factor std::vector repeats; }; diff --git a/src/plugins/intel_gpu/src/graph/include/tile_inst.h b/src/plugins/intel_gpu/src/graph/include/tile_inst.h index 4e4bbe183b2..bf41986673d 100644 --- a/src/plugins/intel_gpu/src/graph/include/tile_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/tile_inst.h @@ -30,6 +30,9 @@ class typed_primitive_inst : public typed_primitive_inst_base { public: static layout calc_output_layout(tile_node const& node, kernel_impl_params const& impl_param); + template + static std::vector calc_output_layouts(tile_node const& /*node*/, const kernel_impl_params& impl_param); + static std::string to_string(tile_node const& node); public: diff --git a/src/plugins/intel_gpu/src/graph/tile.cpp b/src/plugins/intel_gpu/src/graph/tile.cpp index 69f7bdf22fb..0225454f004 100644 --- a/src/plugins/intel_gpu/src/graph/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/tile.cpp @@ -3,6 +3,8 @@ // #include "tile_inst.h" +#include "tile_shape_inference.hpp" + #include "primitive_type_base.h" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/error_handler.hpp" @@ -33,6 +35,46 @@ layout tile_inst::calc_output_layout(tile_node const& node, kernel_impl_params c return layout{input_layout.data_type, input_format, tensor(input_format, out_shape)}; } +template +std::vector tile_inst::calc_output_layouts(tile_node const& /*node*/, const kernel_impl_params& impl_param) { + auto desc = impl_param.typed_desc(); + auto input0_layout = impl_param.get_input_layout(0); + + auto output_type = input0_layout.data_type; + if (impl_param.has_fused_primitives()) { + output_type = impl_param.get_fused_output_layout().data_type; + } + + ShapeType repeats_shape = impl_param.input_layouts.size() == 2 ? impl_param.get_input_layout(1).get() + : ov::Shape{ desc->repeats.size() }; + ov::op::v0::Tile op; + std::vector output_shapes = {ShapeType{}}; + std::vector input_shapes = { + input0_layout.get(), + repeats_shape + }; + + auto& constant_mem = impl_param.memory_deps; + if (!constant_mem.empty()) { + auto repeats_mem = constant_mem.at(1); + cldnn::mem_lock repeats_lock(repeats_mem, impl_param.prog.get_stream()); + std::map const_data = { + {1, make_host_tensor(repeats_mem->get_layout(), repeats_lock.data())} + }; + ov::op::v0::shape_infer(&op, input_shapes, output_shapes, const_data); + } else { + auto repeats_data = desc->repeats; + auto repeats_tensor = make_host_tensor({repeats_shape, data_types::i64, format::bfyx}, static_cast(repeats_data.data())); + std::map const_data = { + {1, repeats_tensor} + }; + ov::op::v0::shape_infer(&op, input_shapes, output_shapes, const_data); + } + format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); + + return { layout{output_shapes[0], output_type, output_format} }; +} + std::string tile_inst::to_string(tile_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/tests/shape_infer/tile_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/tile_si_test.cpp new file mode 100644 index 00000000000..7a34bc938e0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/shape_infer/tile_si_test.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include + +#include "tile_inst.h" + +#include "program_wrapper.h" + +#include +#include + +using namespace cldnn; +using namespace ::tests; + +namespace shape_infer_tests { + +struct tile_test_params { + layout data_layout; + layout repeats_layout; + std::vector repeats_data; + layout expected_layout; +}; + +class tile_test_two_inputs : public testing::TestWithParam { }; + +TEST_P(tile_test_two_inputs, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto data_layout_prim = std::make_shared("data", p.data_layout); + auto repeats_layout_prim = std::make_shared("repeats", p.repeats_layout); + auto tile_prim = std::make_shared("output", "data", "repeats"); + + cldnn::program prog(engine); + + auto repeats_mem = engine.allocate_memory(p.repeats_layout); + set_values(repeats_mem, p.repeats_data); + + auto& data_node = prog.get_or_create(data_layout_prim); + auto& repeats_node = prog.get_or_create(repeats_layout_prim); + auto& tile_node = prog.get_or_create(tile_prim); + program_wrapper::add_connection(prog, data_node, tile_node); + program_wrapper::add_connection(prog, repeats_node, tile_node); + + auto params = tile_node.get_kernel_impl_params(); + params->memory_deps = {{1, repeats_mem}}; + auto res = tile_inst::calc_output_layouts(tile_node, *params); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, tile_test_two_inputs, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{2, 3, 4}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {1, 2, 3}, + layout{ov::PartialShape{2, 6, 12}, data_types::f32, format::bfyx} + } + })); + +class tile_test_single_input : public testing::TestWithParam { }; + +TEST_P(tile_test_single_input, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto data_layout_prim = std::make_shared("data", p.data_layout); + auto tile_prim = std::make_shared("output", "data", p.repeats_data); + + cldnn::program prog(engine); + + auto& data_node = prog.get_or_create(data_layout_prim); + auto& tile_node = prog.get_or_create(tile_prim); + program_wrapper::add_connection(prog, data_node, tile_node); + + auto res = tile_inst::calc_output_layouts(tile_node, *tile_node.get_kernel_impl_params()); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, tile_test_single_input, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{2, 3, 4}, data_types::f32, format::bfyx}, + layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {1, 2, 3}, + layout{ov::PartialShape{2, 6, 12}, data_types::f32, format::bfyx} + } + })); + +} // shape_infer_tests diff --git a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp index 959df21f730..1d1b7614d9a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp @@ -116,7 +116,7 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) { topology topology; topology.add(input_layout("input", input->get_layout())); topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1))); - topology.add(tile("tile", "reshape", { 1, 1, 4, 1 })); + topology.add(tile("tile", "reshape", std::vector{ 1, 1, 4, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f }; set_values(input, input_vec); diff --git a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp index c2ec9af7734..bc04f1bd597 100644 --- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp @@ -64,7 +64,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 2, 1, 1, 1 })); + topology.add(tile("tile", "input", std::vector{ 2, 1, 1, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f }; @@ -93,7 +93,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 1, 2, 1, 1 })); + topology.add(tile("tile", "input", std::vector{ 1, 2, 1, 1 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, @@ -125,7 +125,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_y) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 1, 1, 2, 1 })); + topology.add(tile("tile", "input", std::vector{ 1, 1, 2, 1 })); std::vector input_vec = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, @@ -161,7 +161,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 1, 1, 1, 2 })); + topology.add(tile("tile", "input", std::vector{ 1, 1, 1, 2 })); std::vector input_vec = { 0.f, 1.f, 2.f, 3.f, @@ -193,7 +193,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 1, 1, 1, 4 })); + topology.add(tile("tile", "input", std::vector{ 1, 1, 1, 4 })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f }; set_values(input, input_vec); @@ -221,7 +221,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(tile("tile", "input", { 1, 1, 2, 1, 1 })); + topology.add(tile("tile", "input", std::vector{ 1, 1, 2, 1, 1 })); std::vector input_vec = { 1.f, 0.f,