[GPU] Added zero input support for Pad (#19720)
This commit is contained in:
parent
66dd347d38
commit
5ba60f845e
@ -109,6 +109,31 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
|||||||
auto kernel_params = get_kernel_params(impl_param, true);
|
auto kernel_params = get_kernel_params(impl_param, true);
|
||||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
kernel_arguments_data get_arguments(const typed_primitive_inst<border>& instance) const override {
|
||||||
|
kernel_arguments_data args = parent::get_arguments(instance);
|
||||||
|
|
||||||
|
// In case of zero input shape and non-zero output (kernel execution is not skipped), we need to add fake input buffer
|
||||||
|
// So as not to get an error during the argument setting stage
|
||||||
|
if (instance.get_input_layout().count() == 0) {
|
||||||
|
args.inputs[0] = instance.get_intermediates_memories().front();
|
||||||
|
}
|
||||||
|
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<layout> get_internal_buffer_layouts_impl() const override {
|
||||||
|
const auto& prim_params = static_cast<const kernel_selector::border_params&>(*_kernel_data.params);
|
||||||
|
std::vector<layout> layouts;
|
||||||
|
|
||||||
|
if (prim_params.inputs[0].LogicalSize() == 0) {
|
||||||
|
layout any_layout = {data_types::u8, format::bfyx, {1, 1, 1, 1}};
|
||||||
|
layouts.push_back(any_layout);
|
||||||
|
}
|
||||||
|
|
||||||
|
return layouts;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
@ -773,11 +773,11 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
|||||||
update_shape();
|
update_shape();
|
||||||
|
|
||||||
// Check successor reorder if layouts are same
|
// Check successor reorder if layouts are same
|
||||||
// Need to set can_be_optimized for user reorder at predesescor because
|
// Need to set can_be_optimized for user reorder at predecessor because
|
||||||
// if the user is can_be_optimized and output node then current nodes' output should be allocated to host.
|
// if the user is can_be_optimized and output node then current nodes' output should be allocated to host.
|
||||||
do_runtime_skip_reorder();
|
do_runtime_skip_reorder();
|
||||||
if (_impl_params->output_layouts[0].count() == 0) {
|
if (_impl_params->output_layouts[0].count() == 0) {
|
||||||
GPU_DEBUG_TRACE_DETAIL << id() << " : Skipping becuase output data is empty " << std::endl;
|
GPU_DEBUG_TRACE_DETAIL << id() << " : Skipping because output data is empty " << std::endl;
|
||||||
auto ev = get_network().get_stream().create_user_event(true);
|
auto ev = get_network().get_stream().create_user_event(true);
|
||||||
update_shape_done_by_other = false; // reset
|
update_shape_done_by_other = false; // reset
|
||||||
return ev;
|
return ev;
|
||||||
|
@ -128,8 +128,8 @@ std::string strided_slice_inst::to_string(strided_slice_node const& node) {
|
|||||||
json_composite strided_slice_info;
|
json_composite strided_slice_info;
|
||||||
strided_slice_info.add("input id", input.id());
|
strided_slice_info.add("input id", input.id());
|
||||||
std::vector<std::string> dependencies_info = {"begin_param id", "end_param id", "stride_param id"};
|
std::vector<std::string> dependencies_info = {"begin_param id", "end_param id", "stride_param id"};
|
||||||
for (size_t i = 0; i < node.get_dependencies().size(); ++i) {
|
for (size_t i = 1; i < node.get_dependencies().size(); ++i) {
|
||||||
strided_slice_info.add(dependencies_info[i], node.get_dependency(i).id());
|
strided_slice_info.add(dependencies_info[i - 1], node.get_dependency(i).id());
|
||||||
}
|
}
|
||||||
strided_slice_info.add("begin", node.get_primitive()->begin);
|
strided_slice_info.add("begin", node.get_primitive()->begin);
|
||||||
strided_slice_info.add("end", node.get_primitive()->end);
|
strided_slice_info.add("end", node.get_primitive()->end);
|
||||||
|
@ -74,7 +74,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
|||||||
auto dispatchData = SetDefault(prim_params);
|
auto dispatchData = SetDefault(prim_params);
|
||||||
KernelData k_data = KernelData::Default<border_params>(params);
|
KernelData k_data = KernelData::Default<border_params>(params);
|
||||||
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
k_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||||
const auto& prim_params = static_cast<const border_params&>(params);
|
const auto& prim_params = static_cast<const border_params&>(params);
|
||||||
auto dispatchData = SetDefault(prim_params);
|
auto dispatchData = SetDefault(prim_params);
|
||||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||||
|
@ -1684,12 +1684,12 @@ public:
|
|||||||
blt_size_f = p.lt[1];
|
blt_size_f = p.lt[1];
|
||||||
blt_size_y = p.lt[2];
|
blt_size_y = p.lt[2];
|
||||||
blt_size_x = p.lt[3];
|
blt_size_x = p.lt[3];
|
||||||
|
|
||||||
brb_size_b = p.rb[0];
|
brb_size_b = p.rb[0];
|
||||||
brb_size_f = p.rb[1];
|
brb_size_f = p.rb[1];
|
||||||
brb_size_y = p.rb[2];
|
brb_size_y = p.rb[2];
|
||||||
brb_size_x = p.rb[3];
|
brb_size_x = p.rb[3];
|
||||||
|
|
||||||
out_size_b = in_size_b + blt_size_b + brb_size_b;
|
out_size_b = in_size_b + blt_size_b + brb_size_b;
|
||||||
out_size_f = in_size_f + blt_size_f + brb_size_f;
|
out_size_f = in_size_f + blt_size_f + brb_size_f;
|
||||||
out_size_y = in_size_y + blt_size_y + brb_size_y;
|
out_size_y = in_size_y + blt_size_y + brb_size_y;
|
||||||
@ -1825,5 +1825,98 @@ TEST_P(border_dynamic_test, border_dynamic_test) {}
|
|||||||
INSTANTIATE_TEST_SUITE_P(border_dynamic_test,
|
INSTANTIATE_TEST_SUITE_P(border_dynamic_test,
|
||||||
border_dynamic_test,
|
border_dynamic_test,
|
||||||
::testing::ValuesIn(dynamic_params));
|
::testing::ValuesIn(dynamic_params));
|
||||||
}; // namespace
|
|
||||||
|
|
||||||
|
TEST(border_gpu, basic_zero_input_dynamic) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
// WA to avoid crash due to attempt to allocate 0 bytes for USM memory
|
||||||
|
layout fake_input_layout = {{1}, data_types::bin, format::bfyx};
|
||||||
|
auto input = engine.allocate_memory(fake_input_layout);
|
||||||
|
|
||||||
|
layout zero_input_layout = {{0, 1}, data_types::f32, format::bfyx};
|
||||||
|
input = engine.reinterpret_buffer(*input, zero_input_layout);
|
||||||
|
|
||||||
|
layout input_layout_dynamic = {ov::PartialShape::dynamic(2), data_types::f32, format::bfyx};
|
||||||
|
|
||||||
|
ov::CoordinateDiff pads_begin = {4, 0};
|
||||||
|
ov::CoordinateDiff pads_end = {0, 0};
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", input_layout_dynamic));
|
||||||
|
topology.add(border("border", {input_info("input")}, 0, pads_begin, pads_end, ov::op::PadMode::CONSTANT, 1.0f));
|
||||||
|
|
||||||
|
std::vector<float> ref_output = {
|
||||||
|
1, 1, 1, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
ExecutionConfig config = get_test_default_config(engine);
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
|
||||||
|
cldnn::network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto inst = network.get_primitive("border");
|
||||||
|
auto impl = inst->get_impl();
|
||||||
|
ASSERT_TRUE(impl != nullptr);
|
||||||
|
ASSERT_TRUE(impl->is_dynamic());
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "border");
|
||||||
|
|
||||||
|
auto output = outputs.at("border").get_memory();
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
ASSERT_EQ(ref_output.size(), output_ptr.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < output_ptr.size(); ++i) {
|
||||||
|
ASSERT_EQ(ref_output[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(border_gpu, basic_zero_input) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
// WA to avoid crash due to attempt to allocate 0 bytes for USM memory
|
||||||
|
layout fake_input_layout = {{1}, data_types::bin, format::bfyx};
|
||||||
|
auto input = engine.allocate_memory(fake_input_layout);
|
||||||
|
|
||||||
|
layout zero_input_layout = {{0, 1}, data_types::f32, format::bfyx};
|
||||||
|
input = engine.reinterpret_buffer(*input, zero_input_layout);
|
||||||
|
|
||||||
|
ov::CoordinateDiff pads_begin = {4, 0};
|
||||||
|
ov::PartialShape pads_begin_shape = { ov::Dimension(pads_begin.size()) };
|
||||||
|
auto pads_begin_input = engine.allocate_memory({pads_begin_shape, data_types::i32, format::bfyx});
|
||||||
|
set_values(pads_begin_input, pads_begin);
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", input->get_layout()));
|
||||||
|
topology.add(input_layout("pads_begin", pads_begin_input->get_layout()));
|
||||||
|
topology.add(border("border", {input_info("input"), input_info("pads_begin")},
|
||||||
|
border::PAD_NON_CONST_INPUT::BEGIN,
|
||||||
|
/*pads_begin*/{}, /*pads_end*/{0, 0},
|
||||||
|
ov::op::PadMode::CONSTANT,
|
||||||
|
2.0f));
|
||||||
|
|
||||||
|
std::vector<float> ref_output = {
|
||||||
|
2, 2, 2, 2
|
||||||
|
};
|
||||||
|
|
||||||
|
ExecutionConfig config = get_test_default_config(engine);
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
|
||||||
|
cldnn::network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
network.set_input_data("pads_begin", pads_begin_input);
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("border").get_memory();
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
ASSERT_EQ(ref_output.size(), output_ptr.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < output_ptr.size(); ++i) {
|
||||||
|
ASSERT_EQ(ref_output[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}; // namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user