[GPU] Added zero input support for Pad (#19720)

This commit is contained in:
Roman Lyamin 2023-09-14 09:59:53 +04:00 committed by GitHub
parent 66dd347d38
commit 5ba60f845e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 126 additions and 8 deletions

View File

@ -109,6 +109,31 @@ struct border_impl : typed_primitive_impl_ocl<border> {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<border>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);
// In case of zero input shape and non-zero output (kernel execution is not skipped), we need to add fake input buffer
// So as not to get an error during the argument setting stage
if (instance.get_input_layout().count() == 0) {
args.inputs[0] = instance.get_intermediates_memories().front();
}
return args;
}
std::vector<layout> get_internal_buffer_layouts_impl() const override {
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
std::vector<layout> layouts;
if (prim_params.inputs[0].LogicalSize() == 0) {
layout any_layout = {data_types::u8, format::bfyx, {1, 1, 1, 1}};
layouts.push_back(any_layout);
}
return layouts;
}
};
namespace detail {

View File

@ -773,11 +773,11 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
update_shape();
// Check successor reorder if layouts are same
// Need to set can_be_optimized for user reorder at predesescor because
// Need to set can_be_optimized for user reorder at predecessor because
// if the user is can_be_optimized and output node then current nodes' output should be allocated to host.
do_runtime_skip_reorder();
if (_impl_params->output_layouts[0].count() == 0) {
GPU_DEBUG_TRACE_DETAIL << id() << " : Skipping becuase output data is empty " << std::endl;
GPU_DEBUG_TRACE_DETAIL << id() << " : Skipping because output data is empty " << std::endl;
auto ev = get_network().get_stream().create_user_event(true);
update_shape_done_by_other = false; // reset
return ev;

View File

@ -128,8 +128,8 @@ std::string strided_slice_inst::to_string(strided_slice_node const& node) {
json_composite strided_slice_info;
strided_slice_info.add("input id", input.id());
std::vector<std::string> dependencies_info = {"begin_param id", "end_param id", "stride_param id"};
for (size_t i = 0; i < node.get_dependencies().size(); ++i) {
strided_slice_info.add(dependencies_info[i], node.get_dependency(i).id());
for (size_t i = 1; i < node.get_dependencies().size(); ++i) {
strided_slice_info.add(dependencies_info[i - 1], node.get_dependency(i).id());
}
strided_slice_info.add("begin", node.get_primitive()->begin);
strided_slice_info.add("end", node.get_primitive()->end);

View File

@ -74,7 +74,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
auto dispatchData = SetDefault(prim_params);
KernelData k_data = KernelData::Default<border_params>(params);
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const border_params&>(params);
const auto& prim_params = static_cast<const border_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;

View File

@ -1684,12 +1684,12 @@ public:
blt_size_f = p.lt[1];
blt_size_y = p.lt[2];
blt_size_x = p.lt[3];
brb_size_b = p.rb[0];
brb_size_f = p.rb[1];
brb_size_y = p.rb[2];
brb_size_x = p.rb[3];
out_size_b = in_size_b + blt_size_b + brb_size_b;
out_size_f = in_size_f + blt_size_f + brb_size_f;
out_size_y = in_size_y + blt_size_y + brb_size_y;
@ -1825,5 +1825,98 @@ TEST_P(border_dynamic_test, border_dynamic_test) {}
INSTANTIATE_TEST_SUITE_P(border_dynamic_test,
border_dynamic_test,
::testing::ValuesIn(dynamic_params));
}; // namespace
TEST(border_gpu, basic_zero_input_dynamic) {
auto& engine = get_test_engine();
// WA to avoid crash due to attempt to allocate 0 bytes for USM memory
layout fake_input_layout = {{1}, data_types::bin, format::bfyx};
auto input = engine.allocate_memory(fake_input_layout);
layout zero_input_layout = {{0, 1}, data_types::f32, format::bfyx};
input = engine.reinterpret_buffer(*input, zero_input_layout);
layout input_layout_dynamic = {ov::PartialShape::dynamic(2), data_types::f32, format::bfyx};
ov::CoordinateDiff pads_begin = {4, 0};
ov::CoordinateDiff pads_end = {0, 0};
topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(border("border", {input_info("input")}, 0, pads_begin, pads_end, ov::op::PadMode::CONSTANT, 1.0f));
std::vector<float> ref_output = {
1, 1, 1, 1
};
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
cldnn::network network(engine, topology, config);
network.set_input_data("input", input);
auto inst = network.get_primitive("border");
auto impl = inst->get_impl();
ASSERT_TRUE(impl != nullptr);
ASSERT_TRUE(impl->is_dynamic());
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "border");
auto output = outputs.at("border").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
ASSERT_EQ(ref_output.size(), output_ptr.size());
for (size_t i = 0; i < output_ptr.size(); ++i) {
ASSERT_EQ(ref_output[i], output_ptr[i]);
}
}
TEST(border_gpu, basic_zero_input) {
auto& engine = get_test_engine();
// WA to avoid crash due to attempt to allocate 0 bytes for USM memory
layout fake_input_layout = {{1}, data_types::bin, format::bfyx};
auto input = engine.allocate_memory(fake_input_layout);
layout zero_input_layout = {{0, 1}, data_types::f32, format::bfyx};
input = engine.reinterpret_buffer(*input, zero_input_layout);
ov::CoordinateDiff pads_begin = {4, 0};
ov::PartialShape pads_begin_shape = { ov::Dimension(pads_begin.size()) };
auto pads_begin_input = engine.allocate_memory({pads_begin_shape, data_types::i32, format::bfyx});
set_values(pads_begin_input, pads_begin);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(input_layout("pads_begin", pads_begin_input->get_layout()));
topology.add(border("border", {input_info("input"), input_info("pads_begin")},
border::PAD_NON_CONST_INPUT::BEGIN,
/*pads_begin*/{}, /*pads_end*/{0, 0},
ov::op::PadMode::CONSTANT,
2.0f));
std::vector<float> ref_output = {
2, 2, 2, 2
};
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
cldnn::network network(engine, topology, config);
network.set_input_data("input", input);
network.set_input_data("pads_begin", pads_begin_input);
auto outputs = network.execute();
auto output = outputs.at("border").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
ASSERT_EQ(ref_output.size(), output_ptr.size());
for (size_t i = 0; i < output_ptr.size(); ++i) {
ASSERT_EQ(ref_output[i], output_ptr[i]);
}
}
}; // namespace