diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp index 06a7f5ae0a2..eca6da1120c 100644 --- a/src/plugins/intel_gpu/src/graph/border.cpp +++ b/src/plugins/intel_gpu/src/graph/border.cpp @@ -26,10 +26,7 @@ layout border_inst::calc_output_layout(border_node const& node) { new_size += desc->left_top_sizes.sub(tensor(0)); new_size += desc->right_bottom_sizes.sub(tensor(0)); - auto ret_data_t = input_layout.data_type; - auto ret_format = input_layout.format; - - return layout{ ret_data_t, ret_format, new_size }; + return layout{ input_layout.data_type, input_layout.format, new_size }; } std::string border_inst::to_string(border_node const& node) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp index fb5da6467b1..702005b5447 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp @@ -70,43 +70,99 @@ attach_border_impl::attach_border_impl() { implementation_map::add(impl_types::ocl, border_impl::create, { std::make_tuple(data_types::f32, format::yxfb), std::make_tuple(data_types::f16, format::yxfb), + std::make_tuple(data_types::i32, format::yxfb), std::make_tuple(data_types::i8, format::yxfb), std::make_tuple(data_types::u8, format::yxfb), std::make_tuple(data_types::f32, format::bfyx), std::make_tuple(data_types::f16, format::bfyx), + std::make_tuple(data_types::i32, format::bfyx), std::make_tuple(data_types::i8, format::bfyx), std::make_tuple(data_types::u8, format::bfyx), std::make_tuple(data_types::f32, format::byxf), std::make_tuple(data_types::f16, format::byxf), + std::make_tuple(data_types::i32, format::byxf), std::make_tuple(data_types::i8, format::byxf), std::make_tuple(data_types::u8, format::byxf), std::make_tuple(data_types::f32, format::bfzyx), std::make_tuple(data_types::f16, format::bfzyx), + std::make_tuple(data_types::i32, format::bfzyx), std::make_tuple(data_types::i8, format::bfzyx), std::make_tuple(data_types::u8, format::bfzyx), std::make_tuple(data_types::f32, format::bfwzyx), std::make_tuple(data_types::f16, format::bfwzyx), + std::make_tuple(data_types::i32, format::bfwzyx), std::make_tuple(data_types::i8, format::bfwzyx), std::make_tuple(data_types::u8, format::bfwzyx), std::make_tuple(data_types::f32, format::b_fs_yx_fsv16), std::make_tuple(data_types::f16, format::b_fs_yx_fsv16), + std::make_tuple(data_types::i32, format::b_fs_yx_fsv16), std::make_tuple(data_types::i8, format::b_fs_yx_fsv16), std::make_tuple(data_types::u8, format::b_fs_yx_fsv16), std::make_tuple(data_types::f32, format::b_fs_yx_fsv32), std::make_tuple(data_types::f16, format::b_fs_yx_fsv32), + std::make_tuple(data_types::i32, format::b_fs_yx_fsv32), std::make_tuple(data_types::i8, format::b_fs_yx_fsv32), std::make_tuple(data_types::u8, format::b_fs_yx_fsv32), std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16), std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16), + std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16), std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16), std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv2), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv2), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv2), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv2), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv2), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv2), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv32), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32), + + std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16), + std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16), + std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16), + std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16), + std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16), }); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp index 76073957c47..ec6c0a08ffe 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp @@ -10,31 +10,18 @@ ParamsKey BorderKernelRef::GetSupportedKey() const { k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT32); k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::UINT8); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableInputLayout(DataLayout::yxfb); - k.EnableInputLayout(DataLayout::byxf); - k.EnableInputLayout(DataLayout::bfzyx); - k.EnableInputLayout(DataLayout::bfwzyx); - k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); - k.EnableInputLayout(DataLayout::b_fs_yx_fsv32); - k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16); - - k.EnableOutputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::yxfb); - k.EnableOutputLayout(DataLayout::byxf); - k.EnableOutputLayout(DataLayout::bfzyx); - k.EnableOutputLayout(DataLayout::bfwzyx); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); - k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); k.EnableTensorOffset(); k.EnableTensorPitches(); diff --git a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp index 6538182858e..f9384855ad4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp @@ -8,6 +8,7 @@ #include #include +#include using namespace cldnn; using namespace ::tests; @@ -30,6 +31,267 @@ static std::vector generate_rnd_real_input( return data; } +template +static int mult(T arr) { + return std::accumulate(arr.begin(), arr.end(), 1, [](int x, int y) { + return x * y; + }); +} + +std::ostream& operator<<(std::ostream& os, FLOAT16 x) { + return os << float(x); +} + +#define PAD_MODES \ + border_type::constant, border_type::edge, border_type::mirror, border_type::mirror_101 //,border_type::zero +#define FORMATS \ + format::type::bfyx, format::type::yxfb, format::type::b_fs_yx_fsv4, format::type::b_fs_yx_fsv16, \ + format::type::b_fs_yx_fsv32, format::type::bs_fs_yx_bsv4_fsv2, format::type::bs_fs_yx_bsv16_fsv16, \ + format::type::bs_fs_yx_bsv32_fsv16, format::type::bs_fs_yx_bsv32_fsv32, format::type::bs_fs_yx_bsv4_fsv4, \ + format::type::bs_fs_yx_bsv8_fsv2, format::type::bs_fs_yx_bsv8_fsv4 + +template +using border_test_param = std::tuple, // shape in + std::array, // coord diff lt + std::array>; // coord diff rb + +template +class border_test : public ::testing::TestWithParam> { +public: + border_type pad_mode; + T pad_value; + format::type fmt; + std::array sh_in, cd_lt, cd_rb, sh_out; + void SetUp() override { + ::testing::TestWithParam>::SetUp(); + std::tie(pad_mode, pad_value, fmt, sh_in, cd_lt, cd_rb) = this->GetParam(); + sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0], + sh_in[1] + cd_lt[1] + cd_rb[1], + sh_in[2] + cd_lt[2] + cd_rb[2], + sh_in[3] + cd_lt[3] + cd_rb[3]}; + auto& engine = get_test_engine(); + auto input_data = generate_random_1d(mult(sh_in), -9, 9, 1); + auto input = engine.allocate_memory({T_dt, format::bfyx, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}}); + set_values(input, input_data); + + topology target_topology; + target_topology.add(input_layout("input", input->get_layout())); + target_topology.add(reorder("border_input", "input", fmt, T_dt), + border("border", + "border_input", + tensor(format::bfyx,std::vector(cd_lt.begin(),cd_lt.end()),0), + tensor(format::bfyx,std::vector(cd_rb.begin(),cd_rb.end()),0), + pad_mode, + pad_value), + reorder("output", "border", cldnn::format::bfyx, T_dt)); + cldnn::network target_network(engine, target_topology); + target_network.set_input_data("input", input); + auto target_output = target_network.execute().at("output").get_memory(); + cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); + + topology base_topology; + base_topology.add(input_layout("input", input->get_layout())); + base_topology.add(border("border", + "input", + tensor(format::bfyx, std::vector(cd_lt.begin(), cd_lt.end()), 0), + tensor(format::bfyx, std::vector(cd_rb.begin(), cd_rb.end()), 0), + pad_mode, + pad_value)); + + cldnn::network base_network(engine, base_topology); + base_network.set_input_data("input", input); + auto base_output = base_network.execute().at("border").get_memory(); + cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); + + EXPECT_TRUE(!memcmp(target_output_ptr.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out))); + } +}; +using border_test_i8 = border_test; +TEST_P(border_test_i8, border_test_i8) {} +INSTANTIATE_TEST_SUITE_P(border_test_i8, + border_test_i8, + testing::Combine(testing::Values(PAD_MODES), + testing::Values(99), + testing::Values(FORMATS), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); +using border_test_u8 = border_test; +TEST_P(border_test_u8, border_test_u8) {} +INSTANTIATE_TEST_SUITE_P(border_test_u8, + border_test_u8, + testing::Combine(testing::Values(border_type::edge), + testing::Values(99), + testing::Values(format::type::bs_fs_yx_bsv16_fsv16), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); +using border_test_i32 = border_test; +TEST_P(border_test_i32, border_test_i32) {} +INSTANTIATE_TEST_SUITE_P(border_test_i32, + border_test_i32, + testing::Combine(testing::Values(border_type::mirror), + testing::Values(11), + testing::Values(format::type::b_fs_yx_fsv16), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); +using border_test_f16 = border_test; +TEST_P(border_test_f16, border_test_f16) {} +INSTANTIATE_TEST_SUITE_P(border_test_f16, + border_test_f16, + testing::Combine(testing::Values(border_type::mirror_101), + testing::Values(FLOAT16(123)), + testing::Values(format::type::bs_fs_yx_bsv32_fsv16), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); +using border_test_f32 = border_test; +TEST_P(border_test_f32, border_test_f32) {} +INSTANTIATE_TEST_SUITE_P(border_test_f32, + border_test_f32, + testing::Combine(testing::Values(border_type::edge), + testing::Values(12.34), + testing::Values(format::type::bs_fs_yx_bsv4_fsv2), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); + +INSTANTIATE_TEST_SUITE_P(bsv16fsv16_reorder, + border_test_i32, + testing::Combine(testing::Values(border_type::mirror), + testing::Values(99), + testing::Values(format::type::bs_fs_yx_bsv16_fsv16), + testing::Values(std::array{2, 3, 4, 5}), + testing::Values(std::array{1, 2, 3, 4}), + testing::Values(std::array{1, 1, 1, 1}))); + +TEST(border_gpu, bsv16fsv16_without_reorder) { + using T = int; + data_types T_dt = data_types::i32; + border_type pad_mode = border_type::constant; + T pad_value = 0; + std::array sh_in = {16, 16, 2, 3}, cd_lt = {0, 0, 1, 1}, cd_rb = {0, 0, 1, 1}, sh_out; + sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0], + sh_in[1] + cd_lt[1] + cd_rb[1], + sh_in[2] + cd_lt[2] + cd_rb[2], + sh_in[3] + cd_lt[3] + cd_rb[3]}; + auto& engine = get_test_engine(); + + auto input_data = generate_random_1d(mult(sh_in), -9, 9, 1); + auto input = engine.allocate_memory({T_dt, format::bfyx, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}}); + set_values(input, input_data); + + auto index_bfyx = [=](std::array s, int b, int f, int y, int x) { + return b * s[1] * s[2] * s[3] + f * s[2] * s[3] + y * s[3] + x; + }; + auto index_bsv16fsv16 = [=](std::array s, int b, int f, int y, int x) { + int b0 = b / 16, b1 = b % 16, f0 = f / 16, f1 = f % 16; + return b0 * s[1] / 16 * s[2] * s[3] * 16 * 16 + + f0 * s[2] * s[3] * 16 * 16 + + y * s[3] * 16 * 16 + + x * 16 * 16 + + b1 * 16 + + f1; + }; + + auto input_data_b16f16 = input_data; + for (int b = 0; b < sh_in[0]; b++) + for (int f = 0; f < sh_in[1]; f++) + for (int y = 0; y < sh_in[2]; y++) + for (int x = 0; x < sh_in[3]; x++) + input_data_b16f16[index_bsv16fsv16(sh_in, b, f, y, x)] = input_data[index_bfyx(sh_in, b, f, y, x)]; + + auto input_b16f16 = engine.allocate_memory({T_dt, format::bs_fs_yx_bsv16_fsv16, {sh_in[0], sh_in[1], sh_in[3], sh_in[2]}}); + set_values(input_b16f16, input_data_b16f16); + + topology target_topology; + target_topology.add(input_layout("input", input_b16f16->get_layout())); + target_topology.add(border("border", + "input", + tensor(format::bfyx, std::vector(cd_lt.begin(), cd_lt.end()), 0), + tensor(format::bfyx, std::vector(cd_rb.begin(), cd_rb.end()), 0), + pad_mode, + pad_value)); + cldnn::network target_network(engine, target_topology); + target_network.set_input_data("input", input_b16f16); + auto target_output = target_network.execute().at("border").get_memory(); + cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); + + topology base_topology; + base_topology.add(input_layout("input", input->get_layout())); + base_topology.add(border("border", + "input", + tensor(format::bfyx, std::vector(cd_lt.begin(), cd_lt.end()), 0), + tensor(format::bfyx, std::vector(cd_rb.begin(), cd_rb.end()), 0), + pad_mode, + pad_value)); + cldnn::network base_network(engine, base_topology); + base_network.set_input_data("input", input); + auto base_output = base_network.execute().at("border").get_memory(); + cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); + + std::vector b16f16_to_bfyx(mult(sh_out)); + for (int b = 0; b < sh_out[0]; b++) + for (int f = 0; f < sh_out[1]; f++) + for (int y = 0; y < sh_out[2]; y++) + for (int x = 0; x < sh_out[3]; x++) + b16f16_to_bfyx[index_bfyx(sh_out, b, f, y, x)] = + target_output_ptr.data()[index_bsv16fsv16(sh_out, b, f, y, x)]; + + EXPECT_TRUE(!memcmp(b16f16_to_bfyx.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out))); +} + +TEST(border_gpu, zyx_bsv16fsv16) { + using T = int; + data_types T_dt = data_types::i32; + border_type pad_mode = border_type::mirror_101; + T pad_value = 0; + std::array sh_in = {16, 16, 4, 5, 6}, cd_lt = {0, 0, 1, 1, 1}, cd_rb = {0, 0, 2, 3, 4}, sh_out; + sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0], + sh_in[1] + cd_lt[1] + cd_rb[1], + sh_in[2] + cd_lt[2] + cd_rb[2], + sh_in[3] + cd_lt[3] + cd_rb[3], + sh_in[4] + cd_lt[4] + cd_rb[4]}; + auto& engine = get_test_engine(); + auto input_data = generate_random_1d(mult(sh_in), -9, 9, 1); + auto input = engine.allocate_memory({T_dt, format::bfzyx, {sh_in[0], sh_in[1], sh_in[4], sh_in[3], sh_in[2]}}); + set_values(input, input_data); + + topology target_topology; + target_topology.add(input_layout("input", input->get_layout())); + target_topology.add(reorder("border_input", "input", format::bs_fs_zyx_bsv16_fsv16, T_dt), + border("border", + "border_input", + tensor(format::bfzyx, std::vector(cd_lt.begin(), cd_lt.end()), 0), + tensor(format::bfzyx, std::vector(cd_rb.begin(), cd_rb.end()), 0), + pad_mode, + pad_value), + reorder("output", "border", cldnn::format::bfzyx, T_dt)); + cldnn::network target_network(engine, target_topology); + target_network.set_input_data("input", input); + auto target_output = target_network.execute().at("output").get_memory(); + cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); + + topology base_topology; + base_topology.add(input_layout("input", input->get_layout())); + base_topology.add(border("border", + "input", + tensor(format::bfzyx, std::vector(cd_lt.begin(), cd_lt.end()), 0), + tensor(format::bfzyx, std::vector(cd_rb.begin(), cd_rb.end()), 0), + pad_mode, + pad_value)); + cldnn::network base_network(engine, base_topology); + base_network.set_input_data("input", input); + auto base_output = base_network.execute().at("border").get_memory(); + cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); + + EXPECT_TRUE(!memcmp(target_output_ptr.data(), base_output_ptr.data(), sizeof(T) * mult(sh_out))); +} + TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { // Input (XY) : 4x3 // Output (XY): 10x7 @@ -58,15 +320,13 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - {blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - {brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::constant, 0.0f) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::constant, + 0.0f)); std::vector input_data = { 1, -2, 3, -4, @@ -134,17 +394,15 @@ TEST(border_gpu, basic_fsv16_0x0x1x2_0x0x3x4_border_constant) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - reorder("border_input", "input", cldnn::format::b_fs_yx_fsv16, cldnn::data_types::f32), - border("border", "border_input", - {blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - {brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::constant, 0.0f), - reorder("output", "border", cldnn::format::yxfb, cldnn::data_types::f32) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(reorder("border_input", "input", cldnn::format::b_fs_yx_fsv16, cldnn::data_types::f32), + border("border", + "border_input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::constant, + 0.0f), + reorder("output", "border", cldnn::format::yxfb, cldnn::data_types::f32)); std::vector input_data = { 1, -2, 3, -4, @@ -214,15 +472,13 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) { auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - { blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z }, - { brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z }, - border_type::constant, 0.0f) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::constant, + 0.0f)); std::vector input_data = { 1, -2, @@ -323,15 +579,13 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) { auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) }, - tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) }, - border_type::constant, 0.0f) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::constant, + 0.0f)); std::vector input_data = { 1, -2, @@ -427,15 +681,13 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::constant, 1.0f) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::constant, + 1.0f)); std::vector input_data = { 1, -2, 3, -4, @@ -503,15 +755,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - {blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - {brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::mirror) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::mirror)); std::vector input_data = { 1, -2, 3, -4, @@ -581,15 +830,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) { auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - { blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z }, - { brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z }, - border_type::mirror) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::mirror)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x), @@ -661,15 +907,13 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) { auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; + topology.add(input_layout("input", input->get_layout())); topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) }, - tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) }, - border_type::mirror) - ); + border("output", + "input", + tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::mirror)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x), @@ -738,15 +982,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::mirror_101) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::mirror_101)); std::vector input_data = { 1, -2, 3, -4, 4, @@ -817,15 +1058,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) { auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, tensor{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{ blt_size_b, blt_size_f, blt_size_x, blt_size_y, blt_size_z }, - tensor{ brb_size_b, brb_size_f, brb_size_x, brb_size_y, brb_size_z }, - border_type::mirror_101) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfzyx, {blt_size_b, blt_size_f, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfzyx, {brb_size_b, brb_size_f, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::mirror_101)); std::vector input_data = { 1, -2, 3, -4, 4, @@ -903,15 +1141,13 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) { auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; + topology.add(input_layout("input", input->get_layout())); topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{ batch(blt_size_b), feature(blt_size_f), spatial(blt_size_x, blt_size_y, blt_size_z, blt_size_w) }, - tensor{ batch(brb_size_b), feature(brb_size_f), spatial(brb_size_x, brb_size_y, brb_size_z, brb_size_w) }, - border_type::mirror_101) - ); + border("output", + "input", + tensor(format::bfwzyx, {blt_size_b, blt_size_f, blt_size_w, blt_size_z, blt_size_y, blt_size_x}, 0), + tensor(format::bfwzyx, {brb_size_b, brb_size_f, brb_size_w, brb_size_z, brb_size_y, brb_size_x}, 0), + border_type::mirror_101)); std::vector input_data = { 1, -2, 3, -4, @@ -995,15 +1231,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) { auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::edge) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::edge)); std::vector input_data = { 1, -2, 3, -4, 4, @@ -1070,16 +1303,13 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) { auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::constant, - 0.0f) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::constant, + 0.0f)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x)}; @@ -1142,15 +1372,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) { auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::mirror) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::mirror)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x) }; @@ -1209,15 +1436,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) { auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::mirror_101) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::mirror_101)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x) }; std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); @@ -1275,15 +1499,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) { auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; - topology.add( - input_layout("input", input->get_layout()) - ); - topology.add( - border("output", "input", - tensor{blt_size_b, blt_size_f, blt_size_x, blt_size_y}, - tensor{brb_size_b, brb_size_f, brb_size_x, brb_size_y}, - border_type::edge) - ); + topology.add(input_layout("input", input->get_layout())); + topology.add(border("output", + "input", + tensor(format::bfyx, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}, 0), + tensor(format::bfyx, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}, 0), + border_type::edge)); const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), static_cast(in_size_y), static_cast(in_size_x) }; std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f);