[GPU] Align Tile parameters with ngraph (#12345)

This commit is contained in:
Roman Lyamin 2022-08-11 14:08:22 +04:00 committed by GitHub
parent e98cdcf496
commit 85e2229a80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 67 deletions

View File

@ -19,26 +19,19 @@ namespace cldnn {
struct tile : public primitive_base<tile> {
CLDNN_DECLARE_PRIMITIVE(tile)
enum tile_axis {
along_b,
along_f,
along_x,
along_y,
along_z
};
/// @brief Constructs tile primitive.
/// @param id This primitive id.
/// @param out_shape The shape of tiled tensor.
/// @param repeats Per-dimension replication factor.
tile(const primitive_id& id,
const primitive_id& input,
const tensor out_shape,
const std::vector<int64_t> repeats,
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
: primitive_base(id, {input}, ext_prim_id, output_padding), out_shape(out_shape) {}
: primitive_base(id, {input}, ext_prim_id, output_padding),
repeats(repeats) {}
/// @brief Shape of the output tensor
tensor out_shape;
/// @brief A per-dimension replication factor
std::vector<int64_t> repeats;
};
/// @}
/// @}

View File

@ -6,6 +6,7 @@
#include "primitive_type_base.h"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/error_handler.hpp"
#include "intel_gpu/runtime/format.hpp"
#include "json_object.h"
#include <string>
@ -22,7 +23,14 @@ layout tile_inst::calc_output_layout(tile_node const& node, kernel_impl_params c
auto input_layout = impl_param.get_input_layout();
auto input_format = input_layout.format;
return layout{input_layout.data_type, input_format, desc->out_shape};
std::vector<int64_t> repeats = desc->repeats;
auto out_shape = input_layout.get_dims();
for (size_t i = 0; i < repeats.size(); ++i) {
out_shape[i] *= repeats[i];
}
return layout{input_layout.data_type, input_format, tensor(input_format, out_shape)};
}
std::string tile_inst::to_string(tile_node const& node) {

View File

@ -8,6 +8,7 @@
#include "ngraph/op/tile.hpp"
#include "intel_gpu/primitives/tile.hpp"
#include "intel_gpu/primitives/reshape.hpp"
namespace ov {
namespace intel_gpu {
@ -16,10 +17,39 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
size_t rank = op->get_input_shape(0).size();
auto repeatsNode = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!repeatsNode)
IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() <<
" (" << op->get_type_name() << ")";
std::vector<int64_t> repeats = repeatsNode->cast_vector<int64_t>();
int64_t defaultSize = 1;
for (size_t i = repeats.size(); i < rank; ++i) {
repeats.insert(repeats.begin(), defaultSize);
}
if (repeats.size() > rank) {
std::string reshapeName = layerName + "_reshape";
auto inputDims = op->get_input_shape(0);
// Extend input dimensions to the same size as repeats dimensions by prepending ones
inputDims.insert(inputDims.begin(), repeats.size() - rank, defaultSize);
auto targetShape = tensor_from_dims(inputDims);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[0], targetShape, op->get_friendly_name());
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
inputPrimitives[0] = reshapeName;
}
auto tilePrim = cldnn::tile(layerName,
inputPrimitives[0],
tensor_from_dims(op->get_output_shape(0)),
repeats,
op->get_friendly_name());
p.AddPrimitive(tilePrim);

View File

@ -65,15 +65,23 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
}
template<typename data_t>
void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) {
auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int> {
void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
switch (axis) {
case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]);
case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]);
case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]);
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
case 2:
if (rank > 4)
return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
else
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
case 3:
if (rank > 4)
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
else
return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
}
};
@ -82,8 +90,9 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,
const data_t* psrc = src.data();
data_t* pdst = dst.data();
const auto& input_layout = input->get_layout();
auto sizes = get_sizes(input->get_layout().get_tensor(), axis);
auto sizes = get_sizes(input_layout, axis, input_layout.get_rank());
int outer_dim = sizes.first;
int inner_dim = sizes.second;
@ -107,11 +116,11 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4)));
topology.add(tile("tile", "reshape", { 1, 1, 4, 1 }));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_y, 4);
tile_ref<float>(input, output_ref, 2, 4);
network network(engine, topology);
network.set_input_data("input", input);

View File

@ -15,17 +15,22 @@ using namespace cldnn;
using namespace ::tests;
template<typename data_t>
void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles)
{
auto get_sizes = [](const layout& l, tile::tile_axis axis) -> std::pair<int, int>
{
switch (axis)
{
case tile::along_b: return std::make_pair(1, l.batch()*l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
case tile::along_f: return std::make_pair(l.batch(), l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
case tile::along_z: return std::make_pair(l.batch()*l.feature(), l.spatial(2)*l.spatial(1)*l.spatial(0));
case tile::along_y: return std::make_pair(l.batch()*l.feature()*l.spatial(2), l.spatial(1)*l.spatial(0));
case tile::along_x: return std::make_pair(l.batch()*l.feature()*l.spatial(2)*l.spatial(1), l.spatial(0));
void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
switch (axis) {
case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
case 2:
if (rank > 4)
return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
else
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
case 3:
if (rank > 4)
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
else
return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
}
};
@ -36,16 +41,13 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,
const data_t* psrc = src.data();
data_t* pdst = dst.data();
auto sizes = get_sizes(input->get_layout(), axis);
auto sizes = get_sizes(input->get_layout(), axis, input->get_layout().get_rank());
int outer_dim = sizes.first;
int inner_dim = sizes.second;
for (int i = 0; i < outer_dim; i++)
{
for (int t = 0; t < num_tiles; t++)
{
for (int j = 0; j < inner_dim; j++)
{
for (int i = 0; i < outer_dim; i++) {
for (int t = 0; t < num_tiles; t++) {
for (int j = 0; j < inner_dim; j++) {
pdst[j] = psrc[j];
}
pdst += inner_dim;
@ -62,12 +64,12 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(2, 2, 2, 2)));
topology.add(tile("tile", "input", { 2, 1, 1, 1 }));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
2.f, 0.f, 6.f, 5.2f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_b, 2);
tile_ref<float>(input, output_ref, 0, 2);
network network(engine, topology);
network.set_input_data("input", input);
@ -91,7 +93,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(1, 4, 2, 2)));
topology.add(tile("tile", "input", { 1, 2, 1, 1 }));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
@ -99,7 +101,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
2.f, 0.f,
6.f, 5.2f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_f, 2);
tile_ref<float>(input, output_ref, 1, 2);
network network(engine, topology);
network.set_input_data("input", input);
@ -118,20 +120,24 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 4 } });
auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 8 } });
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(1, 2, 2, 4)));
topology.add(tile("tile", "input", { 1, 1, 2, 1 }));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
std::vector<float> input_vec = { 0.f, 1.f, 2.f,
3.f, 4.f, 5.f,
6.f, 7.f, 8.f,
9.f, 10.f, 11.f,
2.f, 0.f,
6.f, 5.2f };
12.f, 13.f, 14.f,
15.f, 16.f, 17.f,
18.f, 19.f, 20.f,
21.f, 22.f, 23.f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_y, 2);
tile_ref<float>(input, output_ref, 2, 2);
network network(engine, topology);
network.set_input_data("input", input);
@ -155,15 +161,15 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
topology.add(tile("tile", "input", { 1, 1, 1, 2 }));
std::vector<float> input_vec = { 1.f, 0.f,
5.f, 1.5f,
std::vector<float> input_vec = { 0.f, 1.f,
2.f, 3.f,
2.f, 0.f,
6.f, 5.2f };
4.f, 5.f,
6.f, 7.f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_x, 2);
tile_ref<float>(input, output_ref, 3, 2);
network network(engine, topology);
network.set_input_data("input", input);
@ -187,11 +193,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
topology.add(tile("tile", "input", { 1, 1, 1, 4 }));
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f};
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_x, 4);
tile_ref<float>(input, output_ref, 3, 4);
network network(engine, topology);
network.set_input_data("input", input);
@ -215,7 +221,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4)));
topology.add(tile("tile", "input", { 1, 1, 2, 1, 1 }));
std::vector<float> input_vec = {
1.f, 0.f,
@ -228,7 +234,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
6.f, 5.2f
};
set_values(input, input_vec);
tile_ref<float>(input, output_ref, tile::along_z, 2);
tile_ref<float>(input, output_ref, 2, 2);
network network(engine, topology);
network.set_input_data("input", input);

View File

@ -15,10 +15,12 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
};
const std::vector<std::vector<int64_t>> repeats = {
{2, 3},
{1, 2, 3},
{2, 1, 1},
{2, 3, 1},
{2, 2, 2},
{2, 3, 4, 5},
};
INSTANTIATE_TEST_SUITE_P(smoke_Tile, TileLayerTest,