[GPU] Enable blocked formats in Border primitive (#11652)

* fix default_value of border tensor parameters
* Enable all in/out layout
This commit is contained in:
Felix Dohyun Kim
2022-06-07 17:39:57 +09:00
committed by GitHub
parent 49942c2f80
commit c1f4cc04de
4 changed files with 427 additions and 166 deletions

View File

@@ -26,10 +26,7 @@ layout border_inst::calc_output_layout(border_node const& node) {
new_size += desc->left_top_sizes.sub(tensor(0));
new_size += desc->right_bottom_sizes.sub(tensor(0));
auto ret_data_t = input_layout.data_type;
auto ret_format = input_layout.format;
return layout{ ret_data_t, ret_format, new_size };
return layout{ input_layout.data_type, input_layout.format, new_size };
}
std::string border_inst::to_string(border_node const& node) {

View File

@@ -70,43 +70,99 @@ attach_border_impl::attach_border_impl() {
implementation_map<border>::add(impl_types::ocl, border_impl::create, {
std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::i32, format::yxfb),
std::make_tuple(data_types::i8, format::yxfb),
std::make_tuple(data_types::u8, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::f32, format::byxf),
std::make_tuple(data_types::f16, format::byxf),
std::make_tuple(data_types::i32, format::byxf),
std::make_tuple(data_types::i8, format::byxf),
std::make_tuple(data_types::u8, format::byxf),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i8, format::bfzyx),
std::make_tuple(data_types::u8, format::bfzyx),
std::make_tuple(data_types::f32, format::bfwzyx),
std::make_tuple(data_types::f16, format::bfwzyx),
std::make_tuple(data_types::i32, format::bfwzyx),
std::make_tuple(data_types::i8, format::bfwzyx),
std::make_tuple(data_types::u8, format::bfwzyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv2),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv4),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv2),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv2),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv2),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv2),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv2),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
});
}

View File

@@ -10,31 +10,18 @@ ParamsKey BorderKernelRef::GetSupportedKey() const {
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::byxf);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::bfwzyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
k.EnableTensorPitches();

View File

@@ -8,6 +8,7 @@
#include <intel_gpu/primitives/border.hpp>
#include <cstddef>
#include <array>
using namespace cldnn;
using namespace ::tests;
@@ -30,6 +31,267 @@ static std::vector<T> generate_rnd_real_input(
return data;
}
template <class T>
static int mult(T arr) {
return std::accumulate(arr.begin(), arr.end(), 1, [](int x, int y) {
return x * y;
});
}
std::ostream& operator<<(std::ostream& os, FLOAT16 x) {
return os << float(x);
}
#define PAD_MODES \
border_type::constant, border_type::edge, border_type::mirror, border_type::mirror_101 //,border_type::zero
#define FORMATS \
format::type::bfyx, format::type::yxfb, format::type::b_fs_yx_fsv4, format::type::b_fs_yx_fsv16, \
format::type::b_fs_yx_fsv32, format::type::bs_fs_yx_bsv4_fsv2, format::type::bs_fs_yx_bsv16_fsv16, \
format::type::bs_fs_yx_bsv32_fsv16, format::type::bs_fs_yx_bsv32_fsv32, format::type::bs_fs_yx_bsv4_fsv4, \
format::type::bs_fs_yx_bsv8_fsv2, format::type::bs_fs_yx_bsv8_fsv4
template <class T>
using border_test_param = std::tuple<border_type, // pad mode
T, // pad value
format::type, // format
std::array<int, 4>, // shape in
std::array<int, 4>, // coord diff lt
std::array<int, 4>>; // coord diff rb
template <class T, data_types T_dt>
class border_test : public ::testing::TestWithParam<border_test_param<T>> {
public:
border_type pad_mode;
T pad_value;
format::type fmt;
std::array<int, 4> sh_in, cd_lt, cd_rb, sh_out;
void SetUp() override {
::testing::TestWithParam<border_test_param<T>>::SetUp();
std::tie(pad_mode, pad_value, fmt, sh_in, cd_lt, cd_rb) = this->GetParam();
sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0],
sh_in[1] + cd_lt[1] + cd_rb[1],
sh_in[2] + cd_lt[2] + cd_rb[2],
sh_in[3] + cd_lt[3] + cd_rb[3]};
auto& engine = get_test_engine();
auto input_data = generate_random_1d<T>(mult(sh_in), -9, 9, 1);
auto input = engine.allocate_memory({T_dt, format::bfyx, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}});
set_values(input, input_data);
topology target_topology;
target_topology.add(input_layout("input", input->get_layout()));
target_topology.add(reorder("border_input", "input", fmt, T_dt),
border("border",
"border_input",
tensor(format::bfyx,std::vector<tensor::value_type>(cd_lt.begin(),cd_lt.end()),0),
tensor(format::bfyx,std::vector<tensor::value_type>(cd_rb.begin(),cd_rb.end()),0),
pad_mode,
pad_value),
reorder("output", "border", cldnn::format::bfyx, T_dt));
cldnn::network target_network(engine, target_topology);
target_network.set_input_data("input", input);
auto target_output = target_network.execute().at("output").get_memory();
cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
topology base_topology;
base_topology.add(input_layout("input", input->get_layout()));
base_topology.add(border("border",
"input",
tensor(format::bfyx, std::vector<tensor::value_type>(cd_lt.begin(), cd_lt.end()), 0),
tensor(format::bfyx, std::vector<tensor::value_type>(cd_rb.begin(), cd_rb.end()), 0),
pad_mode,
pad_value));
cldnn::network base_network(engine, base_topology);
base_network.set_input_data("input", input);
auto base_output = base_network.execute().at("border").get_memory();
cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
EXPECT_TRUE(!memcmp(target_output_ptr.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out)));
}
};
using border_test_i8 = border_test<char, data_types::i8>;
TEST_P(border_test_i8, border_test_i8) {}
INSTANTIATE_TEST_SUITE_P(border_test_i8,
border_test_i8,
testing::Combine(testing::Values(PAD_MODES),
testing::Values(99),
testing::Values(FORMATS),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
using border_test_u8 = border_test<char, data_types::u8>;
TEST_P(border_test_u8, border_test_u8) {}
INSTANTIATE_TEST_SUITE_P(border_test_u8,
border_test_u8,
testing::Combine(testing::Values(border_type::edge),
testing::Values(99),
testing::Values(format::type::bs_fs_yx_bsv16_fsv16),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
using border_test_i32 = border_test<int, data_types::i32>;
TEST_P(border_test_i32, border_test_i32) {}
INSTANTIATE_TEST_SUITE_P(border_test_i32,
border_test_i32,
testing::Combine(testing::Values(border_type::mirror),
testing::Values(11),
testing::Values(format::type::b_fs_yx_fsv16),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
using border_test_f16 = border_test<FLOAT16, data_types::f16>;
TEST_P(border_test_f16, border_test_f16) {}
INSTANTIATE_TEST_SUITE_P(border_test_f16,
border_test_f16,
testing::Combine(testing::Values(border_type::mirror_101),
testing::Values(FLOAT16(123)),
testing::Values(format::type::bs_fs_yx_bsv32_fsv16),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
using border_test_f32 = border_test<float, data_types::f32>;
TEST_P(border_test_f32, border_test_f32) {}
INSTANTIATE_TEST_SUITE_P(border_test_f32,
border_test_f32,
testing::Combine(testing::Values(border_type::edge),
testing::Values(12.34),
testing::Values(format::type::bs_fs_yx_bsv4_fsv2),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
INSTANTIATE_TEST_SUITE_P(bsv16fsv16_reorder,
border_test_i32,
testing::Combine(testing::Values(border_type::mirror),
testing::Values(99),
testing::Values(format::type::bs_fs_yx_bsv16_fsv16),
testing::Values(std::array<int, 4>{2, 3, 4, 5}),
testing::Values(std::array<int, 4>{1, 2, 3, 4}),
testing::Values(std::array<int, 4>{1, 1, 1, 1})));
TEST(border_gpu, bsv16fsv16_without_reorder) {
using T = int;
data_types T_dt = data_types::i32;
border_type pad_mode = border_type::constant;
T pad_value = 0;
std::array<int, 4> sh_in = {16, 16, 2, 3}, cd_lt = {0, 0, 1, 1}, cd_rb = {0, 0, 1, 1}, sh_out;
sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0],
sh_in[1] + cd_lt[1] + cd_rb[1],
sh_in[2] + cd_lt[2] + cd_rb[2],
sh_in[3] + cd_lt[3] + cd_rb[3]};
auto& engine = get_test_engine();
auto input_data = generate_random_1d<T>(mult(sh_in), -9, 9, 1);
auto input = engine.allocate_memory({T_dt, format::bfyx, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}});
set_values(input, input_data);
auto index_bfyx = [=](std::array<int, 4> s, int b, int f, int y, int x) {
return b * s[1] * s[2] * s[3] + f * s[2] * s[3] + y * s[3] + x;
};
auto index_bsv16fsv16 = [=](std::array<int, 4> s, int b, int f, int y, int x) {
int b0 = b / 16, b1 = b % 16, f0 = f / 16, f1 = f % 16;
return b0 * s[1] / 16 * s[2] * s[3] * 16 * 16 +
f0 * s[2] * s[3] * 16 * 16 +
y * s[3] * 16 * 16 +
x * 16 * 16 +
b1 * 16 +
f1;
};
auto input_data_b16f16 = input_data;
for (int b = 0; b < sh_in[0]; b++)
for (int f = 0; f < sh_in[1]; f++)
for (int y = 0; y < sh_in[2]; y++)
for (int x = 0; x < sh_in[3]; x++)
input_data_b16f16[index_bsv16fsv16(sh_in, b, f, y, x)] = input_data[index_bfyx(sh_in, b, f, y, x)];
auto input_b16f16 = engine.allocate_memory({T_dt, format::bs_fs_yx_bsv16_fsv16, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}});
set_values(input_b16f16, input_data_b16f16);
topology target_topology;
target_topology.add(input_layout("input", input_b16f16->get_layout()));
target_topology.add(border("border",
"input",
tensor(format::bfyx, std::vector<tensor::value_type>(cd_lt.begin(), cd_lt.end()), 0),
tensor(format::bfyx, std::vector<tensor::value_type>(cd_rb.begin(), cd_rb.end()), 0),
pad_mode,
pad_value));
cldnn::network target_network(engine, target_topology);
target_network.set_input_data("input", input_b16f16);
auto target_output = target_network.execute().at("border").get_memory();
cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
topology base_topology;
base_topology.add(input_layout("input", input->get_layout()));
base_topology.add(border("border",
"input",
tensor(format::bfyx, std::vector<tensor::value_type>(cd_lt.begin(), cd_lt.end()), 0),
tensor(format::bfyx, std::vector<tensor::value_type>(cd_rb.begin(), cd_rb.end()), 0),
pad_mode,
pad_value));
cldnn::network base_network(engine, base_topology);
base_network.set_input_data("input", input);
auto base_output = base_network.execute().at("border").get_memory();
cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
std::vector<T> b16f16_to_bfyx(mult(sh_out));
for (int b = 0; b < sh_out[0]; b++)
for (int f = 0; f < sh_out[1]; f++)
for (int y = 0; y < sh_out[2]; y++)
for (int x = 0; x < sh_out[3]; x++)
b16f16_to_bfyx[index_bfyx(sh_out, b, f, y, x)] =
target_output_ptr.data()[index_bsv16fsv16(sh_out, b, f, y, x)];
EXPECT_TRUE(!memcmp(b16f16_to_bfyx.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out)));
}
TEST(border_gpu, zyx_bsv16fsv16) {
using T = int;
data_types T_dt = data_types::i32;
border_type pad_mode = border_type::mirror_101;
T pad_value = 0;
std::array<int, 5> sh_in = {16, 16, 4, 5, 6}, cd_lt = {0, 0, 1, 1, 1}, cd_rb = {0, 0, 2, 3, 4}, sh_out;
sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0],
sh_in[1] + cd_lt[1] + cd_rb[1],
sh_in[2] + cd_lt[2] + cd_rb[2],
sh_in[3] + cd_lt[3] + cd_rb[3],
sh_in[4] + cd_lt[4] + cd_rb[4]};
auto& engine = get_test_engine();
auto input_data = generate_random_1d<T>(mult(sh_in), -9, 9, 1);
auto input = engine.allocate_memory({T_dt, format::bfzyx, {sh_in[0], sh_in[1], sh_in[4], sh_in[3], sh_in[2]}});
set_values(input, input_data);
topology target_topology;
target_topology.add(input_layout("input", input->get_layout()));
target_topology.add(reorder("border_input", "input", format::bs_fs_zyx_bsv16_fsv16, T_dt),
border("border",
"border_input",
tensor(format::bfzyx, std::vector<tensor::value_type>(cd_lt.begin(), cd_lt.end()), 0),
tensor(format::bfzyx, std::vector<tensor::value_type>(cd_rb.begin(), cd_rb.end()), 0),
pad_mode,
pad_value),
reorder("output", "border", cldnn::format::bfzyx, T_dt));
cldnn::network target_network(engine, target_topology);
target_network.set_input_data("input", input);
auto target_output = target_network.execute().at("output").get_memory();
cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
topology base_topology;
base_topology.add(input_layout("input", input->get_layout()));
base_topology.add(border("border",
"input",
tensor(format::bfzyx, std::vector<tensor::value_type>(cd_lt.begin(), cd_lt.end()), 0),
tensor(format::bfzyx, std::vector<tensor::value_type>(cd_rb.begin(), cd_rb.end()), 0),
pad_mode,
pad_value));
cldnn::network base_network(engine, base_topology);
base_network.set_input_data("input", input);
auto base_output = base_network.execute().at("border").get_memory();
cldnn::mem_lock<T> base_output_ptr(base_output, get_test_stream());
EXPECT_TRUE(!memcmp(target_output_ptr.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out)));
}
TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
// Input (XY) : 4x3
// Output (XY): 10x7
@@ -58,15 +320,13 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::constant, 0.0f)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::constant,
0.0f));
std::vector<float> input_data = {
1, -2, 3, -4,
@@ -134,17 +394,15 @@ TEST(border_gpu, basic_fsv16_0x0x1x2_0x0x3x4_border_constant) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
reorder("border_input", "input", cldnn::format::b_fs_yx_fsv16, cldnn::data_types::f32),
border("border", "border_input",
{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::constant, 0.0f),
reorder("output", "border", cldnn::format::yxfb, cldnn::data_types::f32)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(reorder("border_input", "input", cldnn::format::b_fs_yx_fsv16, cldnn::data_types::f32),
border("border",
"border_input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::constant,
0.0f),
reorder("output", "border", cldnn::format::yxfb, cldnn::data_types::f32));
std::vector<float> input_data = {
1, -2, 3, -4,
@@ -214,15 +472,13 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) {
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
{ blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
{ brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
border_type::constant, 0.0f)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::constant,
0.0f));
std::vector<float> input_data = {
1, -2,
@@ -323,15 +579,13 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) {
auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
border_type::constant, 0.0f)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::constant,
0.0f));
std::vector<float> input_data = {
1, -2,
@@ -427,15 +681,13 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::constant, 1.0f)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::constant,
1.0f));
std::vector<float> input_data = {
1, -2, 3, -4,
@@ -503,15 +755,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::mirror)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::mirror));
std::vector<float> input_data = {
1, -2, 3, -4,
@@ -581,15 +830,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) {
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
{ blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
{ brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
border_type::mirror)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::mirror));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x),
@@ -661,15 +907,13 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) {
auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
border_type::mirror)
);
border("output",
"input",
tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::mirror));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x),
@@ -738,15 +982,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::mirror_101)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::mirror_101));
std::vector<float> input_data = {
1, -2, 3, -4, 4,
@@ -817,15 +1058,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) {
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, tensor{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } });
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{ blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z },
tensor{ brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z },
border_type::mirror_101)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::mirror_101));
std::vector<float> input_data = {
1, -2, 3, -4, 4,
@@ -903,15 +1141,13 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) {
auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } });
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) },
tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) },
border_type::mirror_101)
);
border("output",
"input",
tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0),
tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0),
border_type::mirror_101));
std::vector<float> input_data = {
1, -2, 3, -4,
@@ -995,15 +1231,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::edge)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::edge));
std::vector<float> input_data = {
1, -2, 3, -4, 4,
@@ -1070,16 +1303,13 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) {
auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::constant,
0.0f)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::constant,
0.0f));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x)};
@@ -1142,15 +1372,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::mirror)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::mirror));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x) };
@@ -1209,15 +1436,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::mirror_101)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::mirror_101));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x) };
std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);
@@ -1275,15 +1499,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}});
topology topology;
topology.add(
input_layout("input", input->get_layout())
);
topology.add(
border("output", "input",
tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y},
tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y},
border_type::edge)
);
topology.add(input_layout("input", input->get_layout()));
topology.add(border("output",
"input",
tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0),
tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0),
border_type::edge));
const std::vector<size_t> sizes{ static_cast<std::size_t>(in_size_b), static_cast<std::size_t>(in_size_f),
static_cast<std::size_t>(in_size_y), static_cast<std::size_t>(in_size_x) };
std::vector<float> input_data = generate_rnd_real_input<float>(sizes, -8.0f, 8.0f);