[GPU] Align Tile parameters with ngraph (#12345)
This commit is contained in:
parent
e98cdcf496
commit
85e2229a80
@ -19,26 +19,19 @@ namespace cldnn {
|
||||
struct tile : public primitive_base<tile> {
|
||||
CLDNN_DECLARE_PRIMITIVE(tile)
|
||||
|
||||
enum tile_axis {
|
||||
along_b,
|
||||
along_f,
|
||||
along_x,
|
||||
along_y,
|
||||
along_z
|
||||
};
|
||||
|
||||
/// @brief Constructs tile primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param out_shape The shape of tiled tensor.
|
||||
/// @param repeats Per-dimension replication factor.
|
||||
tile(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
const tensor out_shape,
|
||||
const std::vector<int64_t> repeats,
|
||||
const primitive_id& ext_prim_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, ext_prim_id, output_padding), out_shape(out_shape) {}
|
||||
: primitive_base(id, {input}, ext_prim_id, output_padding),
|
||||
repeats(repeats) {}
|
||||
|
||||
/// @brief Shape of the output tensor
|
||||
tensor out_shape;
|
||||
/// @brief A per-dimension replication factor
|
||||
std::vector<int64_t> repeats;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "primitive_type_base.h"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "intel_gpu/runtime/format.hpp"
|
||||
#include "json_object.h"
|
||||
#include <string>
|
||||
|
||||
@ -22,7 +23,14 @@ layout tile_inst::calc_output_layout(tile_node const& node, kernel_impl_params c
|
||||
|
||||
auto input_layout = impl_param.get_input_layout();
|
||||
auto input_format = input_layout.format;
|
||||
return layout{input_layout.data_type, input_format, desc->out_shape};
|
||||
|
||||
std::vector<int64_t> repeats = desc->repeats;
|
||||
|
||||
auto out_shape = input_layout.get_dims();
|
||||
for (size_t i = 0; i < repeats.size(); ++i) {
|
||||
out_shape[i] *= repeats[i];
|
||||
}
|
||||
return layout{input_layout.data_type, input_format, tensor(input_format, out_shape)};
|
||||
}
|
||||
|
||||
std::string tile_inst::to_string(tile_node const& node) {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "ngraph/op/tile.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/tile.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
@ -16,10 +17,39 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
|
||||
p.ValidateInputs(op, {2});
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
std::string layerName = layer_type_name_ID(op);
|
||||
size_t rank = op->get_input_shape(0).size();
|
||||
|
||||
auto repeatsNode = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
|
||||
if (!repeatsNode)
|
||||
IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() <<
|
||||
" (" << op->get_type_name() << ")";
|
||||
std::vector<int64_t> repeats = repeatsNode->cast_vector<int64_t>();
|
||||
|
||||
int64_t defaultSize = 1;
|
||||
for (size_t i = repeats.size(); i < rank; ++i) {
|
||||
repeats.insert(repeats.begin(), defaultSize);
|
||||
}
|
||||
|
||||
if (repeats.size() > rank) {
|
||||
std::string reshapeName = layerName + "_reshape";
|
||||
auto inputDims = op->get_input_shape(0);
|
||||
|
||||
// Extend input dimensions to the same size as repeats dimensions by prepending ones
|
||||
inputDims.insert(inputDims.begin(), repeats.size() - rank, defaultSize);
|
||||
|
||||
auto targetShape = tensor_from_dims(inputDims);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[0], targetShape, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
|
||||
|
||||
inputPrimitives[0] = reshapeName;
|
||||
}
|
||||
|
||||
auto tilePrim = cldnn::tile(layerName,
|
||||
inputPrimitives[0],
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
repeats,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(tilePrim);
|
||||
|
@ -65,15 +65,23 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
|
||||
}
|
||||
|
||||
template<typename data_t>
|
||||
void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) {
|
||||
auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int> {
|
||||
void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
|
||||
auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
|
||||
switch (axis) {
|
||||
case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
|
||||
case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
|
||||
case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]);
|
||||
case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]);
|
||||
case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]);
|
||||
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
|
||||
case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
case 2:
|
||||
if (rank > 4)
|
||||
return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
else
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
|
||||
case 3:
|
||||
if (rank > 4)
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
|
||||
else
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
|
||||
case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
|
||||
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
|
||||
}
|
||||
};
|
||||
|
||||
@ -82,8 +90,9 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,
|
||||
|
||||
const data_t* psrc = src.data();
|
||||
data_t* pdst = dst.data();
|
||||
const auto& input_layout = input->get_layout();
|
||||
|
||||
auto sizes = get_sizes(input->get_layout().get_tensor(), axis);
|
||||
auto sizes = get_sizes(input_layout, axis, input_layout.get_rank());
|
||||
int outer_dim = sizes.first;
|
||||
int inner_dim = sizes.second;
|
||||
|
||||
@ -107,11 +116,11 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) {
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
|
||||
topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4)));
|
||||
topology.add(tile("tile", "reshape", { 1, 1, 4, 1 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_y, 4);
|
||||
tile_ref<float>(input, output_ref, 2, 4);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
|
@ -15,17 +15,22 @@ using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
template<typename data_t>
|
||||
void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles)
|
||||
{
|
||||
auto get_sizes = [](const layout& l, tile::tile_axis axis) -> std::pair<int, int>
|
||||
{
|
||||
switch (axis)
|
||||
{
|
||||
case tile::along_b: return std::make_pair(1, l.batch()*l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
|
||||
case tile::along_f: return std::make_pair(l.batch(), l.feature()*l.spatial(2)*l.spatial(1)*l.spatial(0));
|
||||
case tile::along_z: return std::make_pair(l.batch()*l.feature(), l.spatial(2)*l.spatial(1)*l.spatial(0));
|
||||
case tile::along_y: return std::make_pair(l.batch()*l.feature()*l.spatial(2), l.spatial(1)*l.spatial(0));
|
||||
case tile::along_x: return std::make_pair(l.batch()*l.feature()*l.spatial(2)*l.spatial(1), l.spatial(0));
|
||||
void tile_ref(const memory::ptr input, memory::ptr output, int64_t axis, int num_tiles) {
|
||||
auto get_sizes = [](const layout& l, int64_t axis, size_t rank) -> std::pair<int, int> {
|
||||
switch (axis) {
|
||||
case 0: return std::make_pair(1, l.batch() * l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
case 1: return std::make_pair(l.batch(), l.feature() * l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
case 2:
|
||||
if (rank > 4)
|
||||
return std::make_pair(l.batch() * l.feature(), l.spatial(2) * l.spatial(1) * l.spatial(0));
|
||||
else
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
|
||||
case 3:
|
||||
if (rank > 4)
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2), l.spatial(1) * l.spatial(0));
|
||||
else
|
||||
return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
|
||||
case 4: return std::make_pair(l.batch() * l.feature() * l.spatial(2) * l.spatial(1), l.spatial(0));
|
||||
default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
|
||||
}
|
||||
};
|
||||
@ -36,16 +41,13 @@ void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis,
|
||||
const data_t* psrc = src.data();
|
||||
data_t* pdst = dst.data();
|
||||
|
||||
auto sizes = get_sizes(input->get_layout(), axis);
|
||||
auto sizes = get_sizes(input->get_layout(), axis, input->get_layout().get_rank());
|
||||
int outer_dim = sizes.first;
|
||||
int inner_dim = sizes.second;
|
||||
|
||||
for (int i = 0; i < outer_dim; i++)
|
||||
{
|
||||
for (int t = 0; t < num_tiles; t++)
|
||||
{
|
||||
for (int j = 0; j < inner_dim; j++)
|
||||
{
|
||||
for (int i = 0; i < outer_dim; i++) {
|
||||
for (int t = 0; t < num_tiles; t++) {
|
||||
for (int j = 0; j < inner_dim; j++) {
|
||||
pdst[j] = psrc[j];
|
||||
}
|
||||
pdst += inner_dim;
|
||||
@ -62,12 +64,12 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(2, 2, 2, 2)));
|
||||
topology.add(tile("tile", "input", { 2, 1, 1, 1 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
|
||||
2.f, 0.f, 6.f, 5.2f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_b, 2);
|
||||
tile_ref<float>(input, output_ref, 0, 2);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@ -91,7 +93,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(1, 4, 2, 2)));
|
||||
topology.add(tile("tile", "input", { 1, 2, 1, 1 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f,
|
||||
5.f, 1.5f,
|
||||
@ -99,7 +101,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
|
||||
2.f, 0.f,
|
||||
6.f, 5.2f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_f, 2);
|
||||
tile_ref<float>(input, output_ref, 1, 2);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@ -118,20 +120,24 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
|
||||
TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
|
||||
auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 4 } });
|
||||
auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 3, 8 } });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(1, 2, 2, 4)));
|
||||
topology.add(tile("tile", "input", { 1, 1, 2, 1 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f,
|
||||
5.f, 1.5f,
|
||||
std::vector<float> input_vec = { 0.f, 1.f, 2.f,
|
||||
3.f, 4.f, 5.f,
|
||||
6.f, 7.f, 8.f,
|
||||
9.f, 10.f, 11.f,
|
||||
|
||||
2.f, 0.f,
|
||||
6.f, 5.2f };
|
||||
12.f, 13.f, 14.f,
|
||||
15.f, 16.f, 17.f,
|
||||
18.f, 19.f, 20.f,
|
||||
21.f, 22.f, 23.f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_y, 2);
|
||||
tile_ref<float>(input, output_ref, 2, 2);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@ -155,15 +161,15 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
|
||||
topology.add(tile("tile", "input", { 1, 1, 1, 2 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f,
|
||||
5.f, 1.5f,
|
||||
std::vector<float> input_vec = { 0.f, 1.f,
|
||||
2.f, 3.f,
|
||||
|
||||
2.f, 0.f,
|
||||
6.f, 5.2f };
|
||||
4.f, 5.f,
|
||||
6.f, 7.f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_x, 2);
|
||||
tile_ref<float>(input, output_ref, 3, 2);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@ -187,11 +193,11 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(1, 2, 4, 2)));
|
||||
topology.add(tile("tile", "input", { 1, 1, 1, 4 }));
|
||||
|
||||
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f};
|
||||
std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_x, 4);
|
||||
tile_ref<float>(input, output_ref, 3, 4);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@ -215,7 +221,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4)));
|
||||
topology.add(tile("tile", "input", { 1, 1, 2, 1, 1 }));
|
||||
|
||||
std::vector<float> input_vec = {
|
||||
1.f, 0.f,
|
||||
@ -228,7 +234,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) {
|
||||
6.f, 5.2f
|
||||
};
|
||||
set_values(input, input_vec);
|
||||
tile_ref<float>(input, output_ref, tile::along_z, 2);
|
||||
tile_ref<float>(input, output_ref, 2, 2);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
|
@ -15,10 +15,12 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
};
|
||||
|
||||
const std::vector<std::vector<int64_t>> repeats = {
|
||||
{2, 3},
|
||||
{1, 2, 3},
|
||||
{2, 1, 1},
|
||||
{2, 3, 1},
|
||||
{2, 2, 2},
|
||||
{2, 3, 4, 5},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Tile, TileLayerTest,
|
||||
|
Loading…
Reference in New Issue
Block a user