[GPU] Support empty tensor (#15631)

* Support empty tensor in gpu plugin

* Common kernel setup for skipping

* Refactor

* Cleanup

* Fix for shape agnostic kernel

* Fix error due to memory allocation conflict for an empty input blob with other input blob

* Fix output blob parsing error

* Fixed quantize unittest error

* Fixed wrong TC

* Rename set_skip_kernels to update_kernels_list_to_skip

* Refactor output blob processing

* Applied review comments : more cleanup
This commit is contained in:
Taylor Yeonbok Lee
2023-02-15 21:53:22 -08:00
committed by GitHub
parent 3f06d871bf
commit 523b516835
32 changed files with 416 additions and 107 deletions

View File

@@ -41,11 +41,16 @@ struct network_output {
return _result;
}
layout get_layout() const { // Last tensor memory might be null (e.g., {N, 0} shape) but we should be able to get the layout
return _layout;
}
private:
event::ptr _event;
memory::ptr _result;
stream::ptr _stream;
network_output(event::ptr evt, memory::ptr mem, stream::ptr stream) : _event(evt), _result(mem), _stream(stream) {}
layout _layout;
network_output(event::ptr evt, memory::ptr mem, stream::ptr stream, layout layout) : _event(evt), _result(mem), _stream(stream), _layout(layout) {}
friend struct network;
};
@@ -126,7 +131,7 @@ public:
event::ptr evt;
if (get_stream().get_queue_type() == QueueTypes::out_of_order)
evt = get_primitive_event(output_id);
return network_output(evt, get_output_memory(output_id), get_stream_ptr());
return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id));
}
layout get_node_output_layout(const primitive_id& output_id) const;
memory::ptr get_output_memory(const primitive_id& output_id);

View File

@@ -43,9 +43,9 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
return args;
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<activation>();
auto params = get_default_params<kernel_selector::activation_params>(impl_param);
auto params = get_default_params<kernel_selector::activation_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::activation_optional_params>(impl_param.get_program());
convert_new_activation_func(*primitive, params.activations);
@@ -66,8 +66,9 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -26,9 +26,9 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
return make_unique<broadcast_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<broadcast>();
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param);
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::broadcast_optional_params>(impl_param.get_program());
const auto format = impl_param.get_output_layout().format;
@@ -151,8 +151,9 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -60,9 +60,9 @@ struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<cum_sum>();
auto params = get_default_params<kernel_selector::cum_sum_params>(impl_param);
auto params = get_default_params<kernel_selector::cum_sum_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::cum_sum_optional_params>(impl_param.get_program());
size_t rank = impl_param.get_output_layout().get_rank();
@@ -73,8 +73,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -33,11 +33,11 @@ protected:
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<eltwise>();
auto inputs_count = primitive->input.size();
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param);
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(impl_param.get_program());
for (size_t i = 1; i < inputs_count; i++) {
@@ -119,8 +119,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -42,7 +42,7 @@ protected:
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<fully_connected>();
auto get_fc_input_layouts = [primitive](const std::vector<layout>& input_layouts) {
@@ -101,7 +101,7 @@ public:
updated_impl_param.output_layouts[0] = get_fc_output_layout(input_layouts, impl_param.get_output_layout());
const auto& progam = impl_param.get_program();
auto params = get_weights_bias_default_params<kernel_selector::fully_connected_params>(updated_impl_param);
auto params = get_weights_bias_default_params<kernel_selector::fully_connected_params>(updated_impl_param, false, is_shape_agnostic);
auto optional_params = get_default_weights_bias_optional_params<kernel_selector::fully_connected_optional_params>(progam);
optional_params.allowInputReordering = true;
@@ -122,8 +122,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -72,9 +72,9 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<gather>();
auto params = get_default_params<kernel_selector::gather_params>(impl_param);
auto params = get_default_params<kernel_selector::gather_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::gather_optional_params>(impl_param.get_program());
auto input_layout = impl_param.get_input_layout(0);
@@ -101,8 +101,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -27,12 +27,12 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<gemm>();
const auto input_layouts = gemm_inst::transform_input_layouts(primitive, impl_param.input_layouts, impl_param.output_layouts[0]);
const auto output_layout = gemm_inst::transform_output_layout(primitive, input_layouts, impl_param.output_layouts[0]);
auto params = get_default_params<kernel_selector::gemm_params>(impl_param);
auto params = get_default_params<kernel_selector::gemm_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::gemm_optional_params>(impl_param.get_program());
params.inputs.clear();
@@ -59,8 +59,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -29,9 +29,9 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
return make_unique<mvn_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<mvn>();
auto params = get_default_params<kernel_selector::mvn_params>(impl_param);
auto params = get_default_params<kernel_selector::mvn_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(impl_param.get_program());
params.mvnMode = primitive->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
@@ -45,8 +45,9 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -29,15 +29,16 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
return make_unique<count_nonzero_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param);
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::count_nonzero_optional_params>(impl_param.get_program());
return {params, optional_params};
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};
@@ -53,8 +54,8 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
return make_unique<gather_nonzero_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param);
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::gather_nonzero_optional_params>(impl_param.get_program());
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
@@ -63,8 +64,9 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -54,9 +54,9 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
return make_unique<permute_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<permute>();
auto params = get_default_params<kernel_selector::permute_params>(impl_param);
auto params = get_default_params<kernel_selector::permute_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::permute_optional_params>(impl_param.get_program());
auto in_rank = impl_param.get_input_layout(0).get_rank();
@@ -67,8 +67,9 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -92,6 +92,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
return make_unique<ImplType>(kernel_selector::kernel_data{});
}
auto kernel_params = ImplType::get_kernel_params(impl_param);
kernel_params.first.is_shape_agnostic = impl_param.is_dynamic();
auto& kernel_selector = ImplType::kernel_selector_t::Instance();
auto best_kernel = kernel_selector.get_best_kernel(kernel_params.first, kernel_params.second);
@@ -208,9 +209,12 @@ protected:
}
stream& stream = instance.get_network().get_stream();
for (size_t k = 0; k < _kernels.size(); ++k) {
size_t k_idx = 0;
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
kernel_arguments_data args;
if (_kernel_data.kernels[kd_idx].skip_execution) {
continue;
}
if (_kernel_args.inputs.size() > 0) {
args = get_arguments_by_idx(instance);
@@ -222,9 +226,9 @@ protected:
args.intermediates.push_back(m);
}
args.scalars = &_kernel_data.kernels[k].params.scalars;
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
stream.set_arguments(*_kernels[k], _kernel_data.kernels[k].params, args);
stream.set_arguments(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args);
}
}
@@ -254,11 +258,12 @@ protected:
if (instance.can_be_optimized()) {
return aggregate_events(events, stream, false, instance.is_output());
}
std::vector<event::ptr> tmp_events(events);
std::vector<event::ptr> all_events;
for (size_t k = 0; k < _kernels.size(); ++k) {
size_t k_idx = 0;
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
if (_kernel_data.kernels[kd_idx].skip_execution)
continue;
std::vector<event::ptr> new_events;
// is any user of the prim's users is an detecion output, set prim as a output event (event won't be nullptr)
bool is_output_event;
@@ -281,9 +286,9 @@ protected:
}
}
args.scalars = &_kernel_data.kernels[k].params.scalars;
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
auto ev = stream.enqueue_kernel(*_kernels[k], _kernel_data.kernels[k].params, args, tmp_events, is_output_event);
auto ev = stream.enqueue_kernel(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args, tmp_events, is_output_event);
new_events.push_back(ev);
all_events.push_back(ev);
@@ -304,7 +309,8 @@ protected:
std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
kernel_strings.push_back(_kernel_data.kernels[i].code.kernelString);
if (!_kernel_data.kernels[i].skip_execution)
kernel_strings.push_back(_kernel_data.kernels[i].code.kernelString);
}
return kernel_strings;
}
@@ -314,6 +320,14 @@ protected:
_kernel_data.kernels[i].code.kernelString.reset();
}
}
void update_kernels_list_to_skip() {
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
auto gws = _kernel_data.kernels[0].params.workGroups.global;
_kernel_data.kernels[0].skip_execution =
(std::accumulate(gws.begin(), gws.end(), 1, std::multiplies<size_t>()) == 0);
}
}
};
} // namespace ocl

View File

@@ -84,6 +84,7 @@ public:
quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
}
quantize_params.is_shape_agnostic = impl_param.is_dynamic();
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
auto best_kernel = kernel_selector.get_best_kernel(quantize_params, quantize_optional_params);
@@ -95,6 +96,7 @@ public:
const auto& output_layout = impl_param.get_output_layout();
quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
(_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -78,9 +78,9 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
return make_unique<reduce_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<reduce>();
auto params = get_default_params<kernel_selector::reduce_params>(impl_param);
auto params = get_default_params<kernel_selector::reduce_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::reduce_optional_params>(impl_param.get_program());
params.reduceAxes = convert_axes(primitive->axes, impl_param.input_layouts[0].get_rank());
@@ -91,8 +91,9 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -41,10 +41,10 @@ protected:
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<reorder>();
auto&& output_layout = impl_param.get_output_layout();
auto params = get_default_params<kernel_selector::reorder_params>(impl_param);
auto params = get_default_params<kernel_selector::reorder_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::reorder_optional_params>(impl_param.get_program());
auto inputs_count = primitive->input.size();
@@ -107,8 +107,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -27,9 +27,9 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
return make_unique<scatter_nd_update_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<scatter_nd_update>();
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param);
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::scatter_nd_update_optional_params>(impl_param.get_program());
params.indices_rank = primitive->indices_rank;
@@ -41,8 +41,9 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -53,9 +53,9 @@ struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<scatter_update>();
auto params = get_default_params<kernel_selector::scatter_update_params>(impl_param);
auto params = get_default_params<kernel_selector::scatter_update_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::scatter_update_optional_params>(impl_param.get_program());
params.axis = convert_axis(primitive->axis, impl_param.get_input_layout(0).get_rank());
@@ -66,8 +66,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -25,8 +25,8 @@ struct select_impl : typed_primitive_impl_ocl<select> {
return make_unique<select_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::select_params>(impl_param);
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::select_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
std::vector<layout> input_layouts = impl_param.input_layouts;
@@ -69,8 +69,9 @@ struct select_impl : typed_primitive_impl_ocl<select> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -25,8 +25,8 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
return make_unique<shape_of_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param);
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::shape_of_optional_params>(impl_param.get_program());
auto input_layout = impl_param.get_input_layout(0);
@@ -37,8 +37,9 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -61,6 +61,7 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -58,9 +58,9 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
}
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& prim = impl_param.typed_desc<strided_slice>();
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param);
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param, is_shape_agnostic);
auto op_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(impl_param.get_program());
const size_t dims_num = params.inputs[0].Dimentions();
@@ -188,8 +188,9 @@ public:
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -177,6 +177,16 @@ struct kernel_impl_params {
return fused_desc.back().output_layout;
}
bool is_dynamic() const {
for (auto i : input_layouts)
if (i.is_dynamic())
return true;
for (auto i : output_layouts)
if (i.is_dynamic())
return true;
return false;
}
template <class PType>
std::shared_ptr<const PType> typed_desc() const { return std::static_pointer_cast<const PType>(desc); }
@@ -201,12 +211,11 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
void convert_fused_ops_to_legacy_activations(const kernel_impl_params& param_info, std::vector<kernel_selector::base_activation_params>& activations);
bool use_legacy_fused_ops(const kernel_impl_params& param_info);
bool is_shape_agnostic(const kernel_impl_params& param_info);
void set_params(const kernel_impl_params& param_info, kernel_selector::params& params);
template <typename params_t>
inline params_t get_default_params(const kernel_impl_params& param_info) {
inline params_t get_default_params(const kernel_impl_params& param_info, bool is_shape_agnostic = false) {
params_t params;
set_params(param_info, params);
@@ -214,11 +223,9 @@ inline params_t get_default_params(const kernel_impl_params& param_info) {
const auto& input_layout = param_info.get_input_layout(0);
const auto& output_layout = param_info.get_output_layout(0);
params.is_shape_agnostic = is_shape_agnostic;
params.inputs[0] = convert_data_tensor(input_layout);
params.outputs[0] = convert_data_tensor(output_layout);
if (is_shape_agnostic(param_info)) {
params.is_shape_agnostic = true;
}
params.layerID = param_info.desc->id;
if (use_legacy_fused_ops(param_info)) {
@@ -281,8 +288,8 @@ inline params_t get_default_params(const kernel_impl_params& param_info) {
}
template <typename params_t>
inline params_t get_weights_bias_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false) {
params_t params = get_default_params<params_t>(param_info);
inline params_t get_weights_bias_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false, bool is_shape_agnostic = false) {
params_t params = get_default_params<params_t>(param_info, is_shape_agnostic);
params.weights = convert_weights_tensor(*param_info.weights_layout, has_group_dimension);
if (param_info.bias_layout) {
@@ -294,8 +301,8 @@ inline params_t get_weights_bias_default_params(const kernel_impl_params& param_
}
template <typename params_t>
params_t get_weight_bias_zero_point_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false) {
params_t params = get_weights_bias_default_params<params_t>(param_info, has_group_dimension);
params_t get_weight_bias_zero_point_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false, bool is_shape_agnostic = false) {
params_t params = get_weights_bias_default_params<params_t>(param_info, has_group_dimension, is_shape_agnostic);
if (param_info.weights_zero_points_layout) {
params.weights_zero_points.push_back(

View File

@@ -1071,16 +1071,6 @@ bool use_legacy_fused_ops(const kernel_impl_params& param_info) {
return true;
}
bool is_shape_agnostic(const kernel_impl_params& param_info) {
const auto& program = param_info.prog;
const auto& node = program->get_node(param_info.desc->id);
if (node.is_dynamic())
return true;
return false;
}
void set_params(const kernel_impl_params& param_info, kernel_selector::params& params) {
const auto& program = param_info.prog;
const auto& device_info = program->get_engine().get_device_info();

View File

@@ -162,6 +162,11 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
<< ", original shape: " << size.to_string() << ")" << std::endl;
}
if (size.count() == 0) {
file_stream << "Empty buffer" << std::endl;
return;
}
mem_lock<T, mem_lock_type::read> lock(mem, stream);
auto mem_ptr = lock.data();
auto x_pitch = get_x_pitch(mem->get_layout());
@@ -226,8 +231,12 @@ void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName)
std::replace(filename.begin(), filename.end(), ' ', '_');
std::replace(filename.begin(), filename.end(), ':', '_');
filename = debug_config->dump_layers_path + filename + ".txt";
std::ofstream file_stream(filename);
if (!mem) {
file_stream << "Empty" << std::endl;
return;
}
auto mem_dt = mem->get_layout().data_type;
if (mem_dt == cldnn::data_types::f32)
dump<float>(mem, stream, file_stream);

View File

@@ -264,6 +264,8 @@ void primitive_inst::realloc_if_needed() {
}
// intermediate memory allocation is required for primitives consisting of multiple kernels in dynamic case
{
if (_impl == nullptr)
return;
const auto& ibuf_layouts = _impl->get_internal_buffer_layouts();
if (ibuf_layouts.empty())
return;
@@ -356,7 +358,6 @@ bool primitive_inst::update_impl() {
std::lock_guard<std::mutex> lock(get_network().get_impl_cache_mutex());
cache.add(impl_key, impl->clone());
});
_impl = _dynamic_impl->clone();
_impl->update_dispatch_data(*_impl_params);
@@ -386,13 +387,17 @@ bool primitive_inst::update_impl() {
event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
const auto primitive_id = id();
OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
GPU_DEBUG_GET_INSTANCE(debug_config);
std::vector<event::ptr> dependencies;
if (is_dynamic()) {
OPENVINO_ASSERT(_node != nullptr, "[GPU] Invalid primitive_inst object for dynamic shapes case: program_node can't be null");
update_shape();
if (_impl_params->output_layouts[0].bytes_count() == 0) {
auto ev = get_network().get_stream().create_user_event(true);
return ev;
}
if (!is_valid_fusion()) {
auto subgraph = get_unfused_subgraph();
@@ -423,8 +428,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
// Try update impl if current impl is dynamic because opt kernel may be added to impl cache through async compilation.
// Only try update weight and realloc when impl is updated.
if (shape_changed() || !_impl
|| (!shape_changed() && _impl->is_dynamic())) {
if (shape_changed() || !_impl || (!shape_changed() && _impl->is_dynamic())) {
if (update_impl()) {
auto ev = update_weights();
if (ev)

View File

@@ -86,6 +86,12 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls
if (kds.size() && kds[0].kernels.size()) {
kernelsData = kds;
kernelName = implementation->GetName();
if (!params.is_shape_agnostic) {
for (size_t k = 0; k < kds[0].kernels.size(); ++k) {
auto gws = kds[0].kernels[k].params.workGroups.global;
kernelsData[0].kernels[k].skip_execution = (std::accumulate(gws.begin(), gws.end(), 1, std::multiplies<size_t>()) == 0);
}
}
break;
}
} catch (std::runtime_error& ex) {

View File

@@ -64,6 +64,7 @@ struct KernelCode {
struct clKernelData {
KernelCode code;
KernelParams params;
bool skip_execution = false;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -408,7 +408,7 @@ struct Params {
virtual ParamsKey GetParamsKey() const;
protected:
Params(KernelType kt, const std::string& id) : kType(kt), layerID(id) {}
Params(KernelType kt, const std::string& id) : kType(kt), layerID(id), is_shape_agnostic(false) {}
KernelType kType;
public:
@@ -416,7 +416,7 @@ public:
std::string forceImplementation;
EngineInfo engineInfo;
std::string uniqueID;
bool is_shape_agnostic;
virtual std::string to_string() const;
virtual std::string to_cache_string_v2() const;
};
@@ -631,7 +631,6 @@ struct base_params : public Params {
std::vector<fused_operation_desc> fused_ops = {};
MultiDataTensor inputs;
MultiDataTensor outputs;
bool is_shape_agnostic;
std::string to_string() const override;
std::string to_cache_string_v2() const override;
ParamsKey GetParamsKey() const override;
@@ -649,7 +648,7 @@ struct base_params : public Params {
}
protected:
explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1), is_shape_agnostic(false) {}
explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -89,7 +89,6 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
}
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
uint32_t lastOffset = 0;
@@ -113,7 +112,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
kernel.params.workGroups.global = dispatchData.gws;
kernel.params.workGroups.local = dispatchData.lws;
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i });
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t) i});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
ScalarDescriptor s;

View File

@@ -180,10 +180,6 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
if (!data)
IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
size_t dataSize = data->size();
if (0 == dataSize) {
IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
}
if (inputTensorsMap.find(name) != inputTensorsMap.end()) {
inputTensorsMap.erase(name);
}
@@ -202,13 +198,18 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
<< (is_input ? "input" : "output") << " precision";
}
size_t dataBinSize = dataSize * data->element_size();
size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(),
desc.getPrecision().size(),
std::multiplies<size_t>());
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
size_t dataSize = data->size();
if (0 == dataSize && !isDynamic) {
IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
}
size_t dataBinSize = dataSize * data->element_size();
if (!isDynamic && dataBinSize != netReqBinSize) {
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
" blob with name: \'" << name << "\' " <<
@@ -511,20 +512,26 @@ void InferRequest::wait() {
if (internal_outputs.empty()) {
IE_THROW() << "Inference was not started!\n";
}
// wait for completion & collect outputs as requested by the model
for (auto& no : _networkOutputs) {
// In dynamic case, graph API must be used to retrieve outputID
// because it does not create outputsMap during SetGraph
std::string outputID = outputsMap.empty() ? m_graph->MapOutputName(no.first) : outputsMap.at(no.first);
auto outputMemory = internal_outputs.at(outputID).get_memory();
auto outputLayout = internal_outputs.at(outputID).get_layout();
if (outputMemory)
outputMemory = m_graph->get_engine().reinterpret_buffer(*outputMemory, outputLayout);
bool need_output_update = _outputs.find(no.first) == _outputs.end() || _outputs.at(no.first)->byteSize() != outputMemory->size();
bool need_output_update = false;
if (outputLayout.bytes_count() == 0 || _outputs.find(no.first) == _outputs.end() || _outputs.at(no.first)->byteSize() != outputMemory->size()) {
need_output_update = true;
}
if (need_output_update) {
auto node = findOutputByNodeName(no.first);
auto out_partial_shape = node->get_output_partial_shape(0);
auto mem_dims = outputMemory->get_layout().get_shape();
auto mem_dims = outputLayout.get_shape();
size_t out_rank = out_partial_shape.size();
auto precision = InferenceEngine::Precision::FP32;
auto dims = SizeVector(mem_dims.begin(), mem_dims.end());
@@ -558,14 +565,14 @@ void InferRequest::wait() {
// mapping remote blobs not needed -
// let the user take care of them explicitly
if (!bptr->is<gpu::ClBlob>()) {
if (!bptr->is<gpu::ClBlob>() && outputMemory) {
bool same_mem = false;
{
auto dst_lock = bptr->cbuffer();
auto dst_ptr = dst_lock.as<uint8_t*>();
same_mem = same_host_mem(outputMemory, dst_ptr);
}
if (!same_mem) {
if (!same_mem && outputMemory->size()) {
copy_output_data(outputMemory, bptr);
}
}
@@ -1040,10 +1047,19 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin
auto dt = DataTypeFromPrecision(desc.getPrecision());
ov::PartialShape shape(desc.getDims());
// Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty
// W/A for this issue:
// Allocate with non-empty shape and then reinterprete with original shape
for (auto &i : shape) {
if (i == 0)
i = 1;
}
auto l = cldnn::layout(shape, dt, format);
if (m_graph->get_engine().use_unified_shared_memory()) {
return create_remote_blob<RemoteUSMbuffer>(desc, l, BlobType::BT_USM_DEVICE_INTERNAL);
auto blob = create_remote_blob<RemoteUSMbuffer>(desc, l, BlobType::BT_USM_DEVICE_INTERNAL);
return reinterpret_device_blob(blob, desc);
} else {
return create_remote_blob<RemoteCLbuffer>(desc, l, BlobType::BT_BUF_INTERNAL);
}

View File

@@ -0,0 +1,83 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/non_zero.hpp>
#include <intel_gpu/primitives/gather.hpp>
#include <intel_gpu/runtime/memory.hpp>
#include <intel_gpu/graph/topology.hpp>
#include <intel_gpu/graph/network.hpp>
#include "ngraph/runtime/reference/non_zero.hpp"
#include <cstddef>
using namespace cldnn;
using namespace ::tests;
namespace {
struct empty_tensor_test_params {
layout nonzero_input_layout;
layout concat_input_layout;
int64_t concat_axis;
};
class test_empty_tensor : public testing::TestWithParam<empty_tensor_test_params> {};
TEST_P(test_empty_tensor, concat_two_inputs) {
auto p = GetParam();
auto& engine = get_test_engine();
auto nonzero_input_mem = engine.allocate_memory(p.nonzero_input_layout);
auto concat_data_mem = engine.allocate_memory(p.concat_input_layout);
std::vector<int32_t> concat_another_input_data = generate_random_1d<int32_t>(p.concat_input_layout.count(), 0, 100);
set_values(concat_data_mem, concat_another_input_data);
topology topology;
topology.add(input_layout("nonzero_input", p.nonzero_input_layout));
topology.add(data("concat_data", concat_data_mem));
topology.add(count_nonzero("count_nonzero", input_info("nonzero_input")));
topology.add(gather_nonzero("gather_nonzero", input_info("nonzero_input"), input_info("count_nonzero")));
topology.add(concatenation("concat", { input_info("gather_nonzero"), input_info("concat_data") }, p.concat_axis));
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
std::vector<int32_t> nonzero_input_with_all_zero(p.nonzero_input_layout.count());
std::fill(nonzero_input_with_all_zero.begin(), nonzero_input_with_all_zero.end(), 0);
set_values(nonzero_input_mem, nonzero_input_with_all_zero); // nonzero output shape will be (2, 0)
network.set_input_data("nonzero_input", nonzero_input_mem);
auto outputs = network.execute();
auto output = outputs.at("concat").get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < output->get_layout().count(); ++i) {
ASSERT_EQ(concat_another_input_data[i], output_ptr[i]);
}
}
INSTANTIATE_TEST_SUITE_P(smoke_empty, test_empty_tensor,
testing::ValuesIn(std::vector<empty_tensor_test_params>{
{
layout{ov::PartialShape{1, 2}, data_types::i32, format::bfyx},
layout{ov::PartialShape{2, 3}, data_types::i32, format::bfyx},
1
},
{
layout{ov::PartialShape{2, 3, 4}, data_types::i32, format::bfyx},
layout{ov::PartialShape{3, 4}, data_types::i32, format::bfyx},
1
},
{
layout{ov::PartialShape{3, 1, 2, 5, 1}, data_types::i32, format::bfzyx},
layout{ov::PartialShape{5, 3}, data_types::i32, format::bfyx},
1
}
}));
} // namespace

View File

@@ -0,0 +1,155 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "shared_test_classes/single_layer/shape_of.hpp"
#include "shared_test_classes/single_layer/strided_slice.hpp"
#include <shared_test_classes/single_layer/eltwise.hpp>
#include "shared_test_classes/single_layer/gather.hpp"
#include <common_test_utils/ov_tensor_utils.hpp>
using namespace ngraph;
using namespace InferenceEngine;
using namespace ov::test;
namespace GPULayerTestsDefinitions {
typedef std::tuple<
std::vector<InputShape>, // input shapes
ElementType, // Network precision
TargetDevice, // Device name
std::map<std::string, std::string> // Additional network configuration
> emptyTensorTestParamsSet;
const std::vector<ElementType> netPrecisions = {
ElementType::i32,
};
class EmptyTensorDynamicGPUTest : public testing::WithParamInterface<emptyTensorTestParamsSet>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<emptyTensorTestParamsSet>& obj) {
emptyTensorTestParamsSet basicParamsSet = obj.param;
std::ostringstream result;
std::vector<InputShape> inputShapes;
ElementType netType;
TargetDevice targetDevice;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShapes, netType, targetDevice, additionalConfig) = basicParamsSet;
result << "IS=";
for (const auto& shape : inputShapes) {
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
for (const auto& actual_shape : shape.second) {
result << CommonTestUtils::partialShape2str({actual_shape}) << "_";
}
}
result << "NetType=" << netType << "_";
result << "targetDevice=" << targetDevice;
return result.str();
}
protected:
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
for (int32_t i = 0; i < funcInputs.size(); ++i) {
auto node = funcInputs[i].get_node_shared_ptr();
auto tensor = ov::runtime::Tensor(node->get_element_type(), targetInputStaticShapes[i]);
if (i == 0) {
// All zero inputs for non_zero op
auto tensor_ptr = static_cast<int32_t*>(tensor.data());
for (int j = 0; j < ov::shape_size(targetInputStaticShapes[i]); ++j) {
tensor_ptr[j] = 0;
}
} else {
// Random inputs for concat
tensor = ov::test::utils::create_and_fill_tensor(funcInputs[i].get_element_type(),
targetInputStaticShapes[i],
80,
0,
8);
}
inputs.insert({funcInputs[i].get_node_shared_ptr(), tensor});
}
}
void SetUp() override {
emptyTensorTestParamsSet basicParamsSet = this->GetParam();
std::vector<InputShape> inputShapes;
ElementType netType;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShapes, netType, targetDevice, additionalConfig) = basicParamsSet;
init_input_shapes(inputShapes);
const auto AllZeroData = inputDynamicShapes[0];
const auto ConcatInputData = inputDynamicShapes[1];
auto params = builder::makeDynamicParams(netType, {AllZeroData, ConcatInputData});
auto paramOuts =
helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
const ElementType intInputsPrecision = ElementType::i32;
auto nonzeroEmptyResultOp = std::make_shared<ngraph::opset3::NonZero>(paramOuts[0]);
auto convertEmptyInputOp = ngraph::builder::makeConversion(nonzeroEmptyResultOp,
ElementType::i32,
ngraph::helpers::ConversionTypes::CONVERT);
auto concatPartialInputEmptyOp =
ngraph::builder::makeConcat({convertEmptyInputOp, paramOuts[1], convertEmptyInputOp},
1); // partially empty input / non empty output
auto concatEmptyInputEmptyOutputOp =
ngraph::builder::makeConcat({convertEmptyInputOp, convertEmptyInputOp, convertEmptyInputOp},
1); // all empty input/ all empty output
std::vector<int64_t> squeezeDims = {0};
auto squeezeDimsConst =
std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i32, ngraph::Shape{1}, squeezeDims);
auto squeezeEmptyInputOp = std::make_shared<ngraph::opset1::Squeeze>(nonzeroEmptyResultOp, squeezeDimsConst);
auto axisNode = ngraph::builder::makeConstant<int64_t>(intInputsPrecision, ov::Shape({1}), {0});
auto gatherEmptyIndicesOp =
std::make_shared<ov::op::v7::Gather>(paramOuts[0], squeezeEmptyInputOp, axisNode, 0);
auto shapeofEmptyInputOp = std::make_shared<ngraph::opset3::ShapeOf>(gatherEmptyIndicesOp, ElementType::i32);
ngraph::ResultVector results = {std::make_shared<ngraph::opset1::Result>(shapeofEmptyInputOp),
std::make_shared<ngraph::opset1::Result>(concatPartialInputEmptyOp),
std::make_shared<ngraph::opset1::Result>(concatEmptyInputEmptyOutputOp)};
function = std::make_shared<ngraph::Function>(results, params, "result");
auto nonzero = std::make_shared<ngraph::opset3::NonZero>(paramOuts[0]);
}
};
TEST_P(EmptyTensorDynamicGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
namespace {
std::map<std::string, std::string> emptyAdditionalConfig;
const std::vector<std::vector<ov::test::InputShape>> dynInputShapes = {
{
// Input for NonZero
{{ov::Dimension::dynamic()}, {{30}, {40}, {50}, {10}, {7}}},
// Input for Concat
{{ov::Dimension::dynamic(), ov::Dimension::dynamic()}, {{1, 0}, {1, 8}, {1, 0}, {1, 3}, {1, 20}}}
},
};
const auto testParams_smoke = ::testing::Combine(::testing::ValuesIn(dynInputShapes),
::testing::ValuesIn(netPrecisions), // netprec
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(emptyAdditionalConfig));
INSTANTIATE_TEST_SUITE_P(smoke_empty_tensor, EmptyTensorDynamicGPUTest,
testParams_smoke, EmptyTensorDynamicGPUTest::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions