[GPU] Support empty tensor (#15631)
* Support empty tensor in gpu plugin * Common kernel setup for skipping * Refactor * Cleanup * Fix for shape agnostic kernel * Fix error due to memory allocation conflict for an empty input blob with other input blob * Fix output blob parsing error * Fixed quantize unittest error * Fixed wrong TC * Rename set_skip_kernels to update_kernels_list_to_skip * Refactor output blob processing * Applied review comments : more cleanup
This commit is contained in:
committed by
GitHub
parent
3f06d871bf
commit
523b516835
@@ -41,11 +41,16 @@ struct network_output {
|
||||
return _result;
|
||||
}
|
||||
|
||||
layout get_layout() const { // Last tensor memory might be null (e.g., {N, 0} shape) but we should be able to get the layout
|
||||
return _layout;
|
||||
}
|
||||
|
||||
private:
|
||||
event::ptr _event;
|
||||
memory::ptr _result;
|
||||
stream::ptr _stream;
|
||||
network_output(event::ptr evt, memory::ptr mem, stream::ptr stream) : _event(evt), _result(mem), _stream(stream) {}
|
||||
layout _layout;
|
||||
network_output(event::ptr evt, memory::ptr mem, stream::ptr stream, layout layout) : _event(evt), _result(mem), _stream(stream), _layout(layout) {}
|
||||
friend struct network;
|
||||
};
|
||||
|
||||
@@ -126,7 +131,7 @@ public:
|
||||
event::ptr evt;
|
||||
if (get_stream().get_queue_type() == QueueTypes::out_of_order)
|
||||
evt = get_primitive_event(output_id);
|
||||
return network_output(evt, get_output_memory(output_id), get_stream_ptr());
|
||||
return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id));
|
||||
}
|
||||
layout get_node_output_layout(const primitive_id& output_id) const;
|
||||
memory::ptr get_output_memory(const primitive_id& output_id);
|
||||
|
||||
@@ -43,9 +43,9 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
|
||||
return args;
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<activation>();
|
||||
auto params = get_default_params<kernel_selector::activation_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::activation_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::activation_optional_params>(impl_param.get_program());
|
||||
|
||||
convert_new_activation_func(*primitive, params.activations);
|
||||
@@ -66,8 +66,9 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -26,9 +26,9 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
return make_unique<broadcast_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<broadcast>();
|
||||
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::broadcast_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::broadcast_optional_params>(impl_param.get_program());
|
||||
|
||||
const auto format = impl_param.get_output_layout().format;
|
||||
@@ -151,8 +151,9 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -60,9 +60,9 @@ struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<cum_sum>();
|
||||
auto params = get_default_params<kernel_selector::cum_sum_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::cum_sum_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::cum_sum_optional_params>(impl_param.get_program());
|
||||
|
||||
size_t rank = impl_param.get_output_layout().get_rank();
|
||||
@@ -73,8 +73,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -33,11 +33,11 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<eltwise>();
|
||||
auto inputs_count = primitive->input.size();
|
||||
|
||||
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(impl_param.get_program());
|
||||
|
||||
for (size_t i = 1; i < inputs_count; i++) {
|
||||
@@ -119,8 +119,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<fully_connected>();
|
||||
|
||||
auto get_fc_input_layouts = [primitive](const std::vector<layout>& input_layouts) {
|
||||
@@ -101,7 +101,7 @@ public:
|
||||
updated_impl_param.output_layouts[0] = get_fc_output_layout(input_layouts, impl_param.get_output_layout());
|
||||
|
||||
const auto& progam = impl_param.get_program();
|
||||
auto params = get_weights_bias_default_params<kernel_selector::fully_connected_params>(updated_impl_param);
|
||||
auto params = get_weights_bias_default_params<kernel_selector::fully_connected_params>(updated_impl_param, false, is_shape_agnostic);
|
||||
auto optional_params = get_default_weights_bias_optional_params<kernel_selector::fully_connected_optional_params>(progam);
|
||||
optional_params.allowInputReordering = true;
|
||||
|
||||
@@ -122,8 +122,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -72,9 +72,9 @@ struct gather_impl : typed_primitive_impl_ocl<gather> {
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gather>();
|
||||
auto params = get_default_params<kernel_selector::gather_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::gather_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::gather_optional_params>(impl_param.get_program());
|
||||
|
||||
auto input_layout = impl_param.get_input_layout(0);
|
||||
@@ -101,8 +101,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -27,12 +27,12 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gemm>();
|
||||
const auto input_layouts = gemm_inst::transform_input_layouts(primitive, impl_param.input_layouts, impl_param.output_layouts[0]);
|
||||
const auto output_layout = gemm_inst::transform_output_layout(primitive, input_layouts, impl_param.output_layouts[0]);
|
||||
|
||||
auto params = get_default_params<kernel_selector::gemm_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::gemm_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::gemm_optional_params>(impl_param.get_program());
|
||||
|
||||
params.inputs.clear();
|
||||
@@ -59,8 +59,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -29,9 +29,9 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
|
||||
return make_unique<mvn_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<mvn>();
|
||||
auto params = get_default_params<kernel_selector::mvn_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::mvn_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(impl_param.get_program());
|
||||
|
||||
params.mvnMode = primitive->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
|
||||
@@ -45,8 +45,9 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -29,15 +29,16 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
|
||||
return make_unique<count_nonzero_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param);
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::count_nonzero_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::count_nonzero_optional_params>(impl_param.get_program());
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -53,8 +54,8 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
|
||||
return make_unique<gather_nonzero_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param);
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::gather_nonzero_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::gather_nonzero_optional_params>(impl_param.get_program());
|
||||
|
||||
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
|
||||
@@ -63,8 +64,9 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -54,9 +54,9 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
return make_unique<permute_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<permute>();
|
||||
auto params = get_default_params<kernel_selector::permute_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::permute_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::permute_optional_params>(impl_param.get_program());
|
||||
|
||||
auto in_rank = impl_param.get_input_layout(0).get_rank();
|
||||
@@ -67,8 +67,9 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -92,6 +92,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
return make_unique<ImplType>(kernel_selector::kernel_data{});
|
||||
}
|
||||
auto kernel_params = ImplType::get_kernel_params(impl_param);
|
||||
kernel_params.first.is_shape_agnostic = impl_param.is_dynamic();
|
||||
auto& kernel_selector = ImplType::kernel_selector_t::Instance();
|
||||
auto best_kernel = kernel_selector.get_best_kernel(kernel_params.first, kernel_params.second);
|
||||
|
||||
@@ -208,9 +209,12 @@ protected:
|
||||
}
|
||||
|
||||
stream& stream = instance.get_network().get_stream();
|
||||
|
||||
for (size_t k = 0; k < _kernels.size(); ++k) {
|
||||
size_t k_idx = 0;
|
||||
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
|
||||
kernel_arguments_data args;
|
||||
if (_kernel_data.kernels[kd_idx].skip_execution) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (_kernel_args.inputs.size() > 0) {
|
||||
args = get_arguments_by_idx(instance);
|
||||
@@ -222,9 +226,9 @@ protected:
|
||||
args.intermediates.push_back(m);
|
||||
}
|
||||
|
||||
args.scalars = &_kernel_data.kernels[k].params.scalars;
|
||||
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
|
||||
|
||||
stream.set_arguments(*_kernels[k], _kernel_data.kernels[k].params, args);
|
||||
stream.set_arguments(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,11 +258,12 @@ protected:
|
||||
if (instance.can_be_optimized()) {
|
||||
return aggregate_events(events, stream, false, instance.is_output());
|
||||
}
|
||||
|
||||
std::vector<event::ptr> tmp_events(events);
|
||||
std::vector<event::ptr> all_events;
|
||||
|
||||
for (size_t k = 0; k < _kernels.size(); ++k) {
|
||||
size_t k_idx = 0;
|
||||
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
|
||||
if (_kernel_data.kernels[kd_idx].skip_execution)
|
||||
continue;
|
||||
std::vector<event::ptr> new_events;
|
||||
// is any user of the prim's users is an detecion output, set prim as a output event (event won't be nullptr)
|
||||
bool is_output_event;
|
||||
@@ -281,9 +286,9 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
args.scalars = &_kernel_data.kernels[k].params.scalars;
|
||||
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
|
||||
|
||||
auto ev = stream.enqueue_kernel(*_kernels[k], _kernel_data.kernels[k].params, args, tmp_events, is_output_event);
|
||||
auto ev = stream.enqueue_kernel(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args, tmp_events, is_output_event);
|
||||
new_events.push_back(ev);
|
||||
all_events.push_back(ev);
|
||||
|
||||
@@ -304,7 +309,8 @@ protected:
|
||||
std::vector<std::shared_ptr<cldnn::kernel_string>> get_kernels_source() override {
|
||||
std::vector<std::shared_ptr<cldnn::kernel_string>> kernel_strings;
|
||||
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
|
||||
kernel_strings.push_back(_kernel_data.kernels[i].code.kernelString);
|
||||
if (!_kernel_data.kernels[i].skip_execution)
|
||||
kernel_strings.push_back(_kernel_data.kernels[i].code.kernelString);
|
||||
}
|
||||
return kernel_strings;
|
||||
}
|
||||
@@ -314,6 +320,14 @@ protected:
|
||||
_kernel_data.kernels[i].code.kernelString.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void update_kernels_list_to_skip() {
|
||||
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
|
||||
auto gws = _kernel_data.kernels[0].params.workGroups.global;
|
||||
_kernel_data.kernels[0].skip_execution =
|
||||
(std::accumulate(gws.begin(), gws.end(), 1, std::multiplies<size_t>()) == 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ocl
|
||||
|
||||
@@ -84,6 +84,7 @@ public:
|
||||
quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
|
||||
}
|
||||
|
||||
quantize_params.is_shape_agnostic = impl_param.is_dynamic();
|
||||
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
|
||||
auto best_kernel = kernel_selector.get_best_kernel(quantize_params, quantize_optional_params);
|
||||
|
||||
@@ -95,6 +96,7 @@ public:
|
||||
const auto& output_layout = impl_param.get_output_layout();
|
||||
quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
|
||||
(_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -78,9 +78,9 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
||||
return make_unique<reduce_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<reduce>();
|
||||
auto params = get_default_params<kernel_selector::reduce_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::reduce_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::reduce_optional_params>(impl_param.get_program());
|
||||
|
||||
params.reduceAxes = convert_axes(primitive->axes, impl_param.input_layouts[0].get_rank());
|
||||
@@ -91,8 +91,9 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -41,10 +41,10 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<reorder>();
|
||||
auto&& output_layout = impl_param.get_output_layout();
|
||||
auto params = get_default_params<kernel_selector::reorder_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::reorder_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::reorder_optional_params>(impl_param.get_program());
|
||||
|
||||
auto inputs_count = primitive->input.size();
|
||||
@@ -107,8 +107,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
|
||||
return make_unique<scatter_nd_update_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<scatter_nd_update>();
|
||||
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::scatter_nd_update_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::scatter_nd_update_optional_params>(impl_param.get_program());
|
||||
|
||||
params.indices_rank = primitive->indices_rank;
|
||||
@@ -41,8 +41,9 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -53,9 +53,9 @@ struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<scatter_update>();
|
||||
auto params = get_default_params<kernel_selector::scatter_update_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::scatter_update_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::scatter_update_optional_params>(impl_param.get_program());
|
||||
|
||||
params.axis = convert_axis(primitive->axis, impl_param.get_input_layout(0).get_rank());
|
||||
@@ -66,8 +66,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
return make_unique<select_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::select_params>(impl_param);
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::select_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
|
||||
|
||||
std::vector<layout> input_layouts = impl_param.input_layouts;
|
||||
@@ -69,8 +69,9 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
|
||||
return make_unique<shape_of_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param);
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
auto params = get_default_params<kernel_selector::shape_of_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::shape_of_optional_params>(impl_param.get_program());
|
||||
|
||||
auto input_layout = impl_param.get_input_layout(0);
|
||||
@@ -37,8 +37,9 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -61,6 +61,7 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -58,9 +58,9 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
|
||||
}
|
||||
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) {
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& prim = impl_param.typed_desc<strided_slice>();
|
||||
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param);
|
||||
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param, is_shape_agnostic);
|
||||
auto op_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(impl_param.get_program());
|
||||
const size_t dims_num = params.inputs[0].Dimentions();
|
||||
|
||||
@@ -188,8 +188,9 @@ public:
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param);
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -177,6 +177,16 @@ struct kernel_impl_params {
|
||||
return fused_desc.back().output_layout;
|
||||
}
|
||||
|
||||
bool is_dynamic() const {
|
||||
for (auto i : input_layouts)
|
||||
if (i.is_dynamic())
|
||||
return true;
|
||||
for (auto i : output_layouts)
|
||||
if (i.is_dynamic())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class PType>
|
||||
std::shared_ptr<const PType> typed_desc() const { return std::static_pointer_cast<const PType>(desc); }
|
||||
|
||||
@@ -201,12 +211,11 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
|
||||
|
||||
void convert_fused_ops_to_legacy_activations(const kernel_impl_params& param_info, std::vector<kernel_selector::base_activation_params>& activations);
|
||||
bool use_legacy_fused_ops(const kernel_impl_params& param_info);
|
||||
bool is_shape_agnostic(const kernel_impl_params& param_info);
|
||||
|
||||
void set_params(const kernel_impl_params& param_info, kernel_selector::params& params);
|
||||
|
||||
template <typename params_t>
|
||||
inline params_t get_default_params(const kernel_impl_params& param_info) {
|
||||
inline params_t get_default_params(const kernel_impl_params& param_info, bool is_shape_agnostic = false) {
|
||||
params_t params;
|
||||
|
||||
set_params(param_info, params);
|
||||
@@ -214,11 +223,9 @@ inline params_t get_default_params(const kernel_impl_params& param_info) {
|
||||
const auto& input_layout = param_info.get_input_layout(0);
|
||||
const auto& output_layout = param_info.get_output_layout(0);
|
||||
|
||||
params.is_shape_agnostic = is_shape_agnostic;
|
||||
params.inputs[0] = convert_data_tensor(input_layout);
|
||||
params.outputs[0] = convert_data_tensor(output_layout);
|
||||
if (is_shape_agnostic(param_info)) {
|
||||
params.is_shape_agnostic = true;
|
||||
}
|
||||
params.layerID = param_info.desc->id;
|
||||
|
||||
if (use_legacy_fused_ops(param_info)) {
|
||||
@@ -281,8 +288,8 @@ inline params_t get_default_params(const kernel_impl_params& param_info) {
|
||||
}
|
||||
|
||||
template <typename params_t>
|
||||
inline params_t get_weights_bias_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false) {
|
||||
params_t params = get_default_params<params_t>(param_info);
|
||||
inline params_t get_weights_bias_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false, bool is_shape_agnostic = false) {
|
||||
params_t params = get_default_params<params_t>(param_info, is_shape_agnostic);
|
||||
params.weights = convert_weights_tensor(*param_info.weights_layout, has_group_dimension);
|
||||
|
||||
if (param_info.bias_layout) {
|
||||
@@ -294,8 +301,8 @@ inline params_t get_weights_bias_default_params(const kernel_impl_params& param_
|
||||
}
|
||||
|
||||
template <typename params_t>
|
||||
params_t get_weight_bias_zero_point_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false) {
|
||||
params_t params = get_weights_bias_default_params<params_t>(param_info, has_group_dimension);
|
||||
params_t get_weight_bias_zero_point_default_params(const kernel_impl_params& param_info, bool has_group_dimension = false, bool is_shape_agnostic = false) {
|
||||
params_t params = get_weights_bias_default_params<params_t>(param_info, has_group_dimension, is_shape_agnostic);
|
||||
|
||||
if (param_info.weights_zero_points_layout) {
|
||||
params.weights_zero_points.push_back(
|
||||
|
||||
@@ -1071,16 +1071,6 @@ bool use_legacy_fused_ops(const kernel_impl_params& param_info) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_shape_agnostic(const kernel_impl_params& param_info) {
|
||||
const auto& program = param_info.prog;
|
||||
const auto& node = program->get_node(param_info.desc->id);
|
||||
|
||||
if (node.is_dynamic())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void set_params(const kernel_impl_params& param_info, kernel_selector::params& params) {
|
||||
const auto& program = param_info.prog;
|
||||
const auto& device_info = program->get_engine().get_device_info();
|
||||
|
||||
@@ -162,6 +162,11 @@ void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
<< ", original shape: " << size.to_string() << ")" << std::endl;
|
||||
}
|
||||
|
||||
if (size.count() == 0) {
|
||||
file_stream << "Empty buffer" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
mem_lock<T, mem_lock_type::read> lock(mem, stream);
|
||||
auto mem_ptr = lock.data();
|
||||
auto x_pitch = get_x_pitch(mem->get_layout());
|
||||
@@ -226,8 +231,12 @@ void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName)
|
||||
std::replace(filename.begin(), filename.end(), ' ', '_');
|
||||
std::replace(filename.begin(), filename.end(), ':', '_');
|
||||
filename = debug_config->dump_layers_path + filename + ".txt";
|
||||
|
||||
std::ofstream file_stream(filename);
|
||||
if (!mem) {
|
||||
file_stream << "Empty" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
auto mem_dt = mem->get_layout().data_type;
|
||||
if (mem_dt == cldnn::data_types::f32)
|
||||
dump<float>(mem, stream, file_stream);
|
||||
|
||||
@@ -264,6 +264,8 @@ void primitive_inst::realloc_if_needed() {
|
||||
}
|
||||
// intermediate memory allocation is required for primitives consisting of multiple kernels in dynamic case
|
||||
{
|
||||
if (_impl == nullptr)
|
||||
return;
|
||||
const auto& ibuf_layouts = _impl->get_internal_buffer_layouts();
|
||||
if (ibuf_layouts.empty())
|
||||
return;
|
||||
@@ -356,7 +358,6 @@ bool primitive_inst::update_impl() {
|
||||
std::lock_guard<std::mutex> lock(get_network().get_impl_cache_mutex());
|
||||
cache.add(impl_key, impl->clone());
|
||||
});
|
||||
|
||||
_impl = _dynamic_impl->clone();
|
||||
_impl->update_dispatch_data(*_impl_params);
|
||||
|
||||
@@ -386,13 +387,17 @@ bool primitive_inst::update_impl() {
|
||||
event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
const auto primitive_id = id();
|
||||
OPENVINO_ASSERT(_has_valid_input, primitive_id, " has invalid/unset input");
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
|
||||
std::vector<event::ptr> dependencies;
|
||||
if (is_dynamic()) {
|
||||
OPENVINO_ASSERT(_node != nullptr, "[GPU] Invalid primitive_inst object for dynamic shapes case: program_node can't be null");
|
||||
update_shape();
|
||||
if (_impl_params->output_layouts[0].bytes_count() == 0) {
|
||||
auto ev = get_network().get_stream().create_user_event(true);
|
||||
return ev;
|
||||
}
|
||||
|
||||
if (!is_valid_fusion()) {
|
||||
auto subgraph = get_unfused_subgraph();
|
||||
|
||||
@@ -423,8 +428,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
|
||||
// Try update impl if current impl is dynamic because opt kernel may be added to impl cache through async compilation.
|
||||
// Only try update weight and realloc when impl is updated.
|
||||
if (shape_changed() || !_impl
|
||||
|| (!shape_changed() && _impl->is_dynamic())) {
|
||||
if (shape_changed() || !_impl || (!shape_changed() && _impl->is_dynamic())) {
|
||||
if (update_impl()) {
|
||||
auto ev = update_weights();
|
||||
if (ev)
|
||||
|
||||
@@ -86,6 +86,12 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls
|
||||
if (kds.size() && kds[0].kernels.size()) {
|
||||
kernelsData = kds;
|
||||
kernelName = implementation->GetName();
|
||||
if (!params.is_shape_agnostic) {
|
||||
for (size_t k = 0; k < kds[0].kernels.size(); ++k) {
|
||||
auto gws = kds[0].kernels[k].params.workGroups.global;
|
||||
kernelsData[0].kernels[k].skip_execution = (std::accumulate(gws.begin(), gws.end(), 1, std::multiplies<size_t>()) == 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch (std::runtime_error& ex) {
|
||||
|
||||
@@ -64,6 +64,7 @@ struct KernelCode {
|
||||
struct clKernelData {
|
||||
KernelCode code;
|
||||
KernelParams params;
|
||||
bool skip_execution = false;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -408,7 +408,7 @@ struct Params {
|
||||
virtual ParamsKey GetParamsKey() const;
|
||||
|
||||
protected:
|
||||
Params(KernelType kt, const std::string& id) : kType(kt), layerID(id) {}
|
||||
Params(KernelType kt, const std::string& id) : kType(kt), layerID(id), is_shape_agnostic(false) {}
|
||||
KernelType kType;
|
||||
|
||||
public:
|
||||
@@ -416,7 +416,7 @@ public:
|
||||
std::string forceImplementation;
|
||||
EngineInfo engineInfo;
|
||||
std::string uniqueID;
|
||||
|
||||
bool is_shape_agnostic;
|
||||
virtual std::string to_string() const;
|
||||
virtual std::string to_cache_string_v2() const;
|
||||
};
|
||||
@@ -631,7 +631,6 @@ struct base_params : public Params {
|
||||
std::vector<fused_operation_desc> fused_ops = {};
|
||||
MultiDataTensor inputs;
|
||||
MultiDataTensor outputs;
|
||||
bool is_shape_agnostic;
|
||||
std::string to_string() const override;
|
||||
std::string to_cache_string_v2() const override;
|
||||
ParamsKey GetParamsKey() const override;
|
||||
@@ -649,7 +648,7 @@ struct base_params : public Params {
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1), is_shape_agnostic(false) {}
|
||||
explicit base_params(KernelType kt) : Params(kt, ""), inputs(1), outputs(1) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -89,7 +89,6 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
}
|
||||
|
||||
const concatenation_params& orgParams = static_cast<const concatenation_params&>(params);
|
||||
|
||||
KernelData kd = KernelData::Default<concatenation_params>(params, orgParams.inputs.size());
|
||||
|
||||
uint32_t lastOffset = 0;
|
||||
@@ -113,7 +112,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
|
||||
kernel.params.workGroups.global = dispatchData.gws;
|
||||
kernel.params.workGroups.local = dispatchData.lws;
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i });
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t) i});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
|
||||
|
||||
ScalarDescriptor s;
|
||||
|
||||
@@ -180,10 +180,6 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
||||
if (!data)
|
||||
IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
|
||||
|
||||
size_t dataSize = data->size();
|
||||
if (0 == dataSize) {
|
||||
IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
|
||||
}
|
||||
if (inputTensorsMap.find(name) != inputTensorsMap.end()) {
|
||||
inputTensorsMap.erase(name);
|
||||
}
|
||||
@@ -202,13 +198,18 @@ void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
||||
<< (is_input ? "input" : "output") << " precision";
|
||||
}
|
||||
|
||||
size_t dataBinSize = dataSize * data->element_size();
|
||||
size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(),
|
||||
desc.getPrecision().size(),
|
||||
std::multiplies<size_t>());
|
||||
auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name);
|
||||
bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic());
|
||||
|
||||
size_t dataSize = data->size();
|
||||
if (0 == dataSize && !isDynamic) {
|
||||
IE_THROW() << "Input data is empty. Input name: \'" << name << "\'";
|
||||
}
|
||||
|
||||
size_t dataBinSize = dataSize * data->element_size();
|
||||
if (!isDynamic && dataBinSize != netReqBinSize) {
|
||||
IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") <<
|
||||
" blob with name: \'" << name << "\' " <<
|
||||
@@ -511,20 +512,26 @@ void InferRequest::wait() {
|
||||
if (internal_outputs.empty()) {
|
||||
IE_THROW() << "Inference was not started!\n";
|
||||
}
|
||||
|
||||
// wait for completion & collect outputs as requested by the model
|
||||
for (auto& no : _networkOutputs) {
|
||||
// In dynamic case, graph API must be used to retrieve outputID
|
||||
// because it does not create outputsMap during SetGraph
|
||||
std::string outputID = outputsMap.empty() ? m_graph->MapOutputName(no.first) : outputsMap.at(no.first);
|
||||
auto outputMemory = internal_outputs.at(outputID).get_memory();
|
||||
auto outputLayout = internal_outputs.at(outputID).get_layout();
|
||||
if (outputMemory)
|
||||
outputMemory = m_graph->get_engine().reinterpret_buffer(*outputMemory, outputLayout);
|
||||
|
||||
bool need_output_update = _outputs.find(no.first) == _outputs.end() || _outputs.at(no.first)->byteSize() != outputMemory->size();
|
||||
bool need_output_update = false;
|
||||
|
||||
if (outputLayout.bytes_count() == 0 || _outputs.find(no.first) == _outputs.end() || _outputs.at(no.first)->byteSize() != outputMemory->size()) {
|
||||
need_output_update = true;
|
||||
}
|
||||
|
||||
if (need_output_update) {
|
||||
auto node = findOutputByNodeName(no.first);
|
||||
auto out_partial_shape = node->get_output_partial_shape(0);
|
||||
auto mem_dims = outputMemory->get_layout().get_shape();
|
||||
auto mem_dims = outputLayout.get_shape();
|
||||
size_t out_rank = out_partial_shape.size();
|
||||
auto precision = InferenceEngine::Precision::FP32;
|
||||
auto dims = SizeVector(mem_dims.begin(), mem_dims.end());
|
||||
@@ -558,14 +565,14 @@ void InferRequest::wait() {
|
||||
|
||||
// mapping remote blobs not needed -
|
||||
// let the user take care of them explicitly
|
||||
if (!bptr->is<gpu::ClBlob>()) {
|
||||
if (!bptr->is<gpu::ClBlob>() && outputMemory) {
|
||||
bool same_mem = false;
|
||||
{
|
||||
auto dst_lock = bptr->cbuffer();
|
||||
auto dst_ptr = dst_lock.as<uint8_t*>();
|
||||
same_mem = same_host_mem(outputMemory, dst_ptr);
|
||||
}
|
||||
if (!same_mem) {
|
||||
if (!same_mem && outputMemory->size()) {
|
||||
copy_output_data(outputMemory, bptr);
|
||||
}
|
||||
}
|
||||
@@ -1040,10 +1047,19 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin
|
||||
auto dt = DataTypeFromPrecision(desc.getPrecision());
|
||||
ov::PartialShape shape(desc.getDims());
|
||||
|
||||
// Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty
|
||||
// W/A for this issue:
|
||||
// Allocate with non-empty shape and then reinterprete with original shape
|
||||
for (auto &i : shape) {
|
||||
if (i == 0)
|
||||
i = 1;
|
||||
}
|
||||
|
||||
auto l = cldnn::layout(shape, dt, format);
|
||||
|
||||
if (m_graph->get_engine().use_unified_shared_memory()) {
|
||||
return create_remote_blob<RemoteUSMbuffer>(desc, l, BlobType::BT_USM_DEVICE_INTERNAL);
|
||||
auto blob = create_remote_blob<RemoteUSMbuffer>(desc, l, BlobType::BT_USM_DEVICE_INTERNAL);
|
||||
return reinterpret_device_blob(blob, desc);
|
||||
} else {
|
||||
return create_remote_blob<RemoteCLbuffer>(desc, l, BlobType::BT_BUF_INTERNAL);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/non_zero.hpp>
|
||||
#include <intel_gpu/primitives/gather.hpp>
|
||||
#include <intel_gpu/runtime/memory.hpp>
|
||||
#include <intel_gpu/graph/topology.hpp>
|
||||
#include <intel_gpu/graph/network.hpp>
|
||||
#include "ngraph/runtime/reference/non_zero.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
namespace {
|
||||
struct empty_tensor_test_params {
|
||||
layout nonzero_input_layout;
|
||||
layout concat_input_layout;
|
||||
int64_t concat_axis;
|
||||
};
|
||||
|
||||
class test_empty_tensor : public testing::TestWithParam<empty_tensor_test_params> {};
|
||||
|
||||
TEST_P(test_empty_tensor, concat_two_inputs) {
|
||||
auto p = GetParam();
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto nonzero_input_mem = engine.allocate_memory(p.nonzero_input_layout);
|
||||
auto concat_data_mem = engine.allocate_memory(p.concat_input_layout);
|
||||
|
||||
|
||||
std::vector<int32_t> concat_another_input_data = generate_random_1d<int32_t>(p.concat_input_layout.count(), 0, 100);
|
||||
|
||||
set_values(concat_data_mem, concat_another_input_data);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("nonzero_input", p.nonzero_input_layout));
|
||||
topology.add(data("concat_data", concat_data_mem));
|
||||
topology.add(count_nonzero("count_nonzero", input_info("nonzero_input")));
|
||||
topology.add(gather_nonzero("gather_nonzero", input_info("nonzero_input"), input_info("count_nonzero")));
|
||||
topology.add(concatenation("concat", { input_info("gather_nonzero"), input_info("concat_data") }, p.concat_axis));
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
|
||||
std::vector<int32_t> nonzero_input_with_all_zero(p.nonzero_input_layout.count());
|
||||
std::fill(nonzero_input_with_all_zero.begin(), nonzero_input_with_all_zero.end(), 0);
|
||||
set_values(nonzero_input_mem, nonzero_input_with_all_zero); // nonzero output shape will be (2, 0)
|
||||
|
||||
network.set_input_data("nonzero_input", nonzero_input_mem);
|
||||
auto outputs = network.execute();
|
||||
auto output = outputs.at("concat").get_memory();
|
||||
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
|
||||
for (size_t i = 0; i < output->get_layout().count(); ++i) {
|
||||
ASSERT_EQ(concat_another_input_data[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_empty, test_empty_tensor,
|
||||
testing::ValuesIn(std::vector<empty_tensor_test_params>{
|
||||
{
|
||||
layout{ov::PartialShape{1, 2}, data_types::i32, format::bfyx},
|
||||
layout{ov::PartialShape{2, 3}, data_types::i32, format::bfyx},
|
||||
1
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{2, 3, 4}, data_types::i32, format::bfyx},
|
||||
layout{ov::PartialShape{3, 4}, data_types::i32, format::bfyx},
|
||||
1
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{3, 1, 2, 5, 1}, data_types::i32, format::bfzyx},
|
||||
layout{ov::PartialShape{5, 3}, data_types::i32, format::bfyx},
|
||||
1
|
||||
}
|
||||
}));
|
||||
} // namespace
|
||||
@@ -0,0 +1,155 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "shared_test_classes/single_layer/shape_of.hpp"
|
||||
#include "shared_test_classes/single_layer/strided_slice.hpp"
|
||||
#include <shared_test_classes/single_layer/eltwise.hpp>
|
||||
#include "shared_test_classes/single_layer/gather.hpp"
|
||||
#include <common_test_utils/ov_tensor_utils.hpp>
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace GPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
std::vector<InputShape>, // input shapes
|
||||
ElementType, // Network precision
|
||||
TargetDevice, // Device name
|
||||
std::map<std::string, std::string> // Additional network configuration
|
||||
> emptyTensorTestParamsSet;
|
||||
|
||||
const std::vector<ElementType> netPrecisions = {
|
||||
ElementType::i32,
|
||||
};
|
||||
|
||||
class EmptyTensorDynamicGPUTest : public testing::WithParamInterface<emptyTensorTestParamsSet>,
|
||||
virtual public SubgraphBaseTest {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<emptyTensorTestParamsSet>& obj) {
|
||||
emptyTensorTestParamsSet basicParamsSet = obj.param;
|
||||
std::ostringstream result;
|
||||
std::vector<InputShape> inputShapes;
|
||||
ElementType netType;
|
||||
TargetDevice targetDevice;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShapes, netType, targetDevice, additionalConfig) = basicParamsSet;
|
||||
result << "IS=";
|
||||
for (const auto& shape : inputShapes) {
|
||||
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
|
||||
for (const auto& actual_shape : shape.second) {
|
||||
result << CommonTestUtils::partialShape2str({actual_shape}) << "_";
|
||||
}
|
||||
}
|
||||
result << "NetType=" << netType << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
for (int32_t i = 0; i < funcInputs.size(); ++i) {
|
||||
auto node = funcInputs[i].get_node_shared_ptr();
|
||||
auto tensor = ov::runtime::Tensor(node->get_element_type(), targetInputStaticShapes[i]);
|
||||
if (i == 0) {
|
||||
// All zero inputs for non_zero op
|
||||
auto tensor_ptr = static_cast<int32_t*>(tensor.data());
|
||||
for (int j = 0; j < ov::shape_size(targetInputStaticShapes[i]); ++j) {
|
||||
tensor_ptr[j] = 0;
|
||||
}
|
||||
} else {
|
||||
// Random inputs for concat
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInputs[i].get_element_type(),
|
||||
targetInputStaticShapes[i],
|
||||
80,
|
||||
0,
|
||||
8);
|
||||
}
|
||||
inputs.insert({funcInputs[i].get_node_shared_ptr(), tensor});
|
||||
}
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
emptyTensorTestParamsSet basicParamsSet = this->GetParam();
|
||||
std::vector<InputShape> inputShapes;
|
||||
ElementType netType;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(inputShapes, netType, targetDevice, additionalConfig) = basicParamsSet;
|
||||
|
||||
init_input_shapes(inputShapes);
|
||||
const auto AllZeroData = inputDynamicShapes[0];
|
||||
const auto ConcatInputData = inputDynamicShapes[1];
|
||||
auto params = builder::makeDynamicParams(netType, {AllZeroData, ConcatInputData});
|
||||
auto paramOuts =
|
||||
helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
|
||||
const ElementType intInputsPrecision = ElementType::i32;
|
||||
auto nonzeroEmptyResultOp = std::make_shared<ngraph::opset3::NonZero>(paramOuts[0]);
|
||||
|
||||
auto convertEmptyInputOp = ngraph::builder::makeConversion(nonzeroEmptyResultOp,
|
||||
ElementType::i32,
|
||||
ngraph::helpers::ConversionTypes::CONVERT);
|
||||
auto concatPartialInputEmptyOp =
|
||||
ngraph::builder::makeConcat({convertEmptyInputOp, paramOuts[1], convertEmptyInputOp},
|
||||
1); // partially empty input / non empty output
|
||||
auto concatEmptyInputEmptyOutputOp =
|
||||
ngraph::builder::makeConcat({convertEmptyInputOp, convertEmptyInputOp, convertEmptyInputOp},
|
||||
1); // all empty input/ all empty output
|
||||
|
||||
std::vector<int64_t> squeezeDims = {0};
|
||||
auto squeezeDimsConst =
|
||||
std::make_shared<ngraph::opset3::Constant>(ngraph::element::Type_t::i32, ngraph::Shape{1}, squeezeDims);
|
||||
|
||||
auto squeezeEmptyInputOp = std::make_shared<ngraph::opset1::Squeeze>(nonzeroEmptyResultOp, squeezeDimsConst);
|
||||
|
||||
auto axisNode = ngraph::builder::makeConstant<int64_t>(intInputsPrecision, ov::Shape({1}), {0});
|
||||
auto gatherEmptyIndicesOp =
|
||||
std::make_shared<ov::op::v7::Gather>(paramOuts[0], squeezeEmptyInputOp, axisNode, 0);
|
||||
auto shapeofEmptyInputOp = std::make_shared<ngraph::opset3::ShapeOf>(gatherEmptyIndicesOp, ElementType::i32);
|
||||
ngraph::ResultVector results = {std::make_shared<ngraph::opset1::Result>(shapeofEmptyInputOp),
|
||||
std::make_shared<ngraph::opset1::Result>(concatPartialInputEmptyOp),
|
||||
std::make_shared<ngraph::opset1::Result>(concatEmptyInputEmptyOutputOp)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "result");
|
||||
|
||||
auto nonzero = std::make_shared<ngraph::opset3::NonZero>(paramOuts[0]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
TEST_P(EmptyTensorDynamicGPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
run();
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::map<std::string, std::string> emptyAdditionalConfig;
|
||||
const std::vector<std::vector<ov::test::InputShape>> dynInputShapes = {
|
||||
{
|
||||
// Input for NonZero
|
||||
{{ov::Dimension::dynamic()}, {{30}, {40}, {50}, {10}, {7}}},
|
||||
// Input for Concat
|
||||
{{ov::Dimension::dynamic(), ov::Dimension::dynamic()}, {{1, 0}, {1, 8}, {1, 0}, {1, 3}, {1, 20}}}
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
const auto testParams_smoke = ::testing::Combine(::testing::ValuesIn(dynInputShapes),
|
||||
::testing::ValuesIn(netPrecisions), // netprec
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::Values(emptyAdditionalConfig));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_empty_tensor, EmptyTensorDynamicGPUTest,
|
||||
testParams_smoke, EmptyTensorDynamicGPUTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace GPULayerTestsDefinitions
|
||||
|
||||
Reference in New Issue
Block a user