[GPU] Added GatherND dynamic support and changed logic for empty tensor support (#16690)

This commit is contained in:
Roman Lyamin
2023-04-07 09:19:05 +04:00
committed by GitHub
parent 6d82f36050
commit 132b657977
68 changed files with 191 additions and 54 deletions

View File

@@ -3,6 +3,7 @@
//
#include "gather_nd_inst.h"
#include "gather_nd_shape_inference.hpp"
#include "primitive_type_base.h"
#include "json_object.h"
@@ -70,6 +71,40 @@ layout gather_nd_inst::calc_output_layout(gather_nd_node const& node, kernel_imp
return layout(input_layout_origin.data_type, output_format, output_sizes_tensor, padding);
}
template<typename ShapeType>
std::vector<layout> gather_nd_inst::calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param) {
auto desc = impl_param.typed_desc<gather_nd>();
auto input_layout = impl_param.get_input_layout(0);
auto indices_layout = impl_param.get_input_layout(1);
auto output_type = input_layout.data_type;
if (impl_param.has_fused_primitives()) {
output_type = impl_param.get_fused_output_layout().data_type;
}
std::vector<ShapeType> output_shapes = {ShapeType()};
std::vector<ShapeType> input_shapes = {
input_layout.get<ShapeType>(),
indices_layout.get<ShapeType>()
};
if (desc->batch_merged_output) {
ov::op::v5::GatherND op;
op.set_batch_dims(desc->batch_dims);
ov::op::v5::shape_infer(&op, input_shapes, output_shapes);
} else {
ov::op::v8::GatherND op;
op.set_batch_dims(desc->batch_dims);
ov::op::v8::shape_infer(&op, input_shapes, output_shapes);
}
format output_format = format::adjust_to_rank(input_layout.format, output_shapes[0].size());
return { layout{output_shapes[0], output_type, output_format} };
}
std::string gather_nd_inst::to_string(gather_nd_node const& node) {
auto desc = node.get_primitive();
auto node_info = node.desc_to_json();

View File

@@ -66,7 +66,6 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -114,7 +114,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -106,7 +106,6 @@ struct border_impl : typed_primitive_impl_ocl<border> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -118,7 +118,6 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -76,7 +76,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -49,7 +49,6 @@ public:
"[GPU] Scalar field for runtime offset is not added for crop shape agnostic impl");
_kernel_data.kernels[0].params.scalars[0] = s;
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -72,7 +72,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -162,7 +162,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -116,7 +116,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -126,7 +126,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -69,7 +69,6 @@ struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -75,7 +75,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -41,7 +41,6 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -34,7 +34,6 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};
@@ -62,7 +61,6 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -65,7 +65,6 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -283,13 +283,6 @@ protected:
}
}
void update_kernels_list_to_skip() {
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
auto gws = _kernel_data.kernels[i].params.workGroups.global;
_kernel_data.kernels[i].skip_execution = (std::accumulate(gws.begin(), gws.end(), static_cast<size_t>(1), std::multiplies<size_t>()) == 0);
}
}
void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) override {
if (is_cpu())
return;

View File

@@ -92,7 +92,6 @@ public:
const auto& output_layout = impl_param.get_output_layout();
quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
(_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -35,7 +35,6 @@ struct range_impl : typed_primitive_impl_ocl<range> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -87,7 +87,6 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -107,7 +107,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -39,7 +39,6 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -64,7 +64,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -55,7 +55,6 @@ struct select_impl : typed_primitive_impl_ocl<select> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -53,7 +53,6 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -59,7 +59,6 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -185,7 +185,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -46,7 +46,6 @@ public:
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
update_kernels_list_to_skip();
}
};

View File

@@ -17,6 +17,8 @@ class typed_primitive_inst<gather_nd> : public typed_primitive_inst_base<gather_
using parent::parent;
public:
template<typename ShapeType>
static std::vector<layout> calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param);
static layout calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(gather_nd_node const& node);

View File

@@ -71,17 +71,15 @@ std::vector<layout> gather_nonzero_inst::calc_output_layouts(gather_nonzero_node
auto desc = impl_param.typed_desc<gather_nonzero>();
assert(static_cast<bool>(desc->output_data_types[0]) == false &&
"Output data type forcing is not supported for gather_nonzero_node!");
auto rank = impl_param.get_input_layout(0).get<ShapeType>().rank().get_length();
if (impl_param.memory_deps.count(1)) {
auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream());
// output shape of nonzero is [input_rank, count_non_zero]
auto rank = static_cast<size_t>(impl_param.get_input_layout(0).get<ShapeType>().rank().get_length());
auto count = static_cast<size_t>(out_size[0]);
ov::Shape output_shape({rank, count});
ov::PartialShape output_pshape(output_shape);
auto out_layout = layout{output_pshape, cldnn::data_types::i32, cldnn::format::bfyx};
auto out_layout = layout{{rank, out_size[0]}, cldnn::data_types::i32, cldnn::format::bfyx};
return {out_layout};
} else {
return {layout{ov::PartialShape({ov::Dimension::dynamic(), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
return {layout{ov::PartialShape({ov::Dimension(rank), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
}
}

View File

@@ -212,7 +212,7 @@ void KernelBaseOpenCL::FillCLKernelData(clKernelData& kernel,
uint32_t number_of_inputs_for_fused_prims,
int number_of_outputs,
bool is_dynamic) const {
if (!is_dynamic)
if (!is_dynamic && !kernel.skip_execution)
KernelBase::CheckDispatchData(kernelMapName, dispatchData, engine_info.maxWorkGroupSize);
kernel.code.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode);
kernel.params.workGroups.global = dispatchData.gws;

View File

@@ -86,15 +86,6 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls
if (kds.size() && kds[0].kernels.size()) {
kernelsData = kds;
kernelName = implementation->GetName();
if (!params.is_shape_agnostic) {
for (size_t k = 0; k < kds[0].kernels.size(); ++k) {
auto gws = kds[0].kernels[k].params.workGroups.global;
kernelsData[0].kernels[k].skip_execution = (std::accumulate(gws.begin(),
gws.end(),
static_cast<size_t>(1),
std::multiplies<size_t>()) == 0);
}
}
break;
}
} catch (std::runtime_error& ex) {

View File

@@ -114,6 +114,24 @@ struct KernelData {
bool can_reuse_memory = true;
static bool SkipKernelExecution(const base_params& params, size_t kernel_id = 0) {
for (const auto& input : params.inputs) {
if (input.LogicalSize() == 0) {
return true;
}
}
for (const auto& output : params.outputs) {
if (output.LogicalSize() == 0) {
return true;
}
}
return false;
}
static bool SkipKernelExecution(const Params& params, size_t kernel_id = 0) {
return false;
}
template <typename T>
inline static KernelData Default(const Params& _params, size_t kernel_nums = 1) {
KernelData kd;
@@ -124,6 +142,10 @@ struct KernelData {
kd.reorderInput = false; // for KW
kd.autoTuneIndex = -1;
kd.can_reuse_memory = true;
for (auto& kernel : kd.kernels) {
kernel.skip_execution = SkipKernelExecution(orgParams);
}
return kd;
}
};

View File

@@ -99,6 +99,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -128,6 +128,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
const size_t elem_size = prim_params.inputs[0].ElementSize();
const size_t iav_type_size = elem_size + 4;

View File

@@ -51,6 +51,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto cldnn_jit = GetJitConstants(orgParams);

View File

@@ -60,6 +60,10 @@ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params&
return dispatchData;
}
bool BorderKernelBase::SkipKernelExecution(const border_params& params) const {
return params.outputs[0].LogicalSize() == 0;
}
KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
const optional_params& options) const {
assert(params.GetType() == KernelType::BORDER);
@@ -75,6 +79,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
};
auto cldnn_jit = GetJitConstants(prim_params);
@@ -82,6 +87,8 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];
kernel.skip_execution = SkipKernelExecution(prim_params);
FillCLKernelData(kernel,
dispatchData,
params.engineInfo,

View File

@@ -50,5 +50,6 @@ protected:
JitConstants GetJitConstants(const border_params& params) const;
DispatchData SetDefault(const border_params& params) const;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
bool SkipKernelExecution(const border_params& params) const;
};
} // namespace kernel_selector

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2018-2023 Intel Corporation
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@@ -91,6 +91,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto cldnn_jit = GetJitConstants(prim_params);

View File

@@ -109,6 +109,10 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
return dispatchData;
}
bool ConcatenationKernelBase::SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const {
return params.inputs[kernel_id].LogicalSize() == 0;
}
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
@@ -136,6 +140,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
DispatchData dispatchData = SetDefault(newParams);
kernel.params.workGroups.global = dispatchData.gws;
kernel.params.workGroups.local = dispatchData.lws;
kernel.skip_execution = SkipKernelExecution(prim_params, i);
ScalarDescriptor s;
s.t = ScalarDescriptor::Types::UINT32;
@@ -173,6 +178,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
kernel.params.workGroups.global = dispatchData.gws;
kernel.params.workGroups.local = dispatchData.lws;
kernel.skip_execution = SkipKernelExecution(newParams, i);
if (is_dynamic) {
kernel.params.arguments.push_back({ArgumentDescriptor::Types::SHAPE_INFO, 0});
}

View File

@@ -68,5 +68,6 @@ protected:
virtual size_t GetAlignment(const concatenation_params& /*params*/) const {
return 1;
}
bool SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const;
};
} // namespace kernel_selector

View File

@@ -114,6 +114,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
kernel.params.workGroups.local = dispatchData.lws;
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
kernel.skip_execution = SkipKernelExecution(newParams, i);
ScalarDescriptor s;
s.t = ScalarDescriptor::Types::UINT32;

View File

@@ -86,6 +86,7 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -714,6 +714,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
DispatchData dispatchData = SetDefault(newParams);

View File

@@ -66,6 +66,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params &params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());

View File

@@ -169,6 +169,7 @@ KernelsData GatherElementsKernelRef::GetKernelsData(const Params& params, const
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -283,6 +283,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
FillCLKernelData(kernel,

View File

@@ -51,6 +51,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);

View File

@@ -71,6 +71,7 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -81,6 +81,7 @@ KernelsData CountNonzeroKernelRef::GetKernelsData(const Params& params, const op
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
// In case of count-nonzero, the output shape is static unconditionally,

View File

@@ -85,6 +85,7 @@ KernelsData GatherNonzeroKernelRef::GetKernelsData(const Params& params, const o
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
FillCLKernelData(kernel,

View File

@@ -47,6 +47,7 @@ KernelsData PermuteKernelBase::GetKernelsData(const Params& params, const option
OPENVINO_ASSERT(kernel_data.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kernel_data.kernels[0].params.workGroups.global = dispatchData.gws;
kernel_data.kernels[0].params.workGroups.local = dispatchData.lws;
kernel_data.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);

View File

@@ -72,6 +72,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -40,6 +40,7 @@ KernelsData RangeKernelRef::GetKernelsData(const Params &params, const optional_
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto &clKernelData = kernel_data.kernels[0];

View File

@@ -234,6 +234,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -239,6 +239,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -169,6 +169,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
auto dispatchData = SetDefault(prim_params, (i == 1));
kd.kernels[i].params.workGroups.global = dispatchData.gws;
kd.kernels[i].params.workGroups.local = dispatchData.lws;
kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
}
};

View File

@@ -331,6 +331,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
auto dispatchData = SetDefault(prim_params, i == 1);
kd.kernels[i].params.workGroups.global = dispatchData.gws;
kd.kernels[i].params.workGroups.local = dispatchData.lws;
kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
}
};

View File

@@ -129,6 +129,7 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -28,6 +28,10 @@ JitConstants ShapeOfKernelRef::GetJitConstants(const shape_of_params& params) co
return jit;
}
bool ShapeOfKernelRef::SkipKernelExecution(const shape_of_params& params) const {
return false;
}
KernelsData ShapeOfKernelRef::GetKernelsData(const Params &params, const optional_params &options) const {
KernelsData kernels_data;
if (!Validate(params, options))
@@ -40,13 +44,15 @@ KernelsData ShapeOfKernelRef::GetKernelsData(const Params &params, const optiona
auto jit_constants = GetJitConstants(derived_params);
auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto &clKernelData = kernel_data.kernels[0];
clKernelData.skip_execution = SkipKernelExecution(derived_params);
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
kernel_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const shape_of_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
};
FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point, EXE_MODE_DEFAULT,

View File

@@ -29,6 +29,7 @@ class ShapeOfKernelRef: public KernelBaseOpenCL {
ParamsKey GetSupportedKey() const override;
bool Validate(const Params &p, const optional_params &o) const override;
virtual JitConstants GetJitConstants(const shape_of_params& params) const;
bool SkipKernelExecution(const shape_of_params& params) const;
public:
ShapeOfKernelRef() :
KernelBaseOpenCL { "shape_of_ref" } {

View File

@@ -86,6 +86,7 @@ KernelsData SoftmaxKernel_bf::GetKernelsData(const Params& params, const optiona
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
}

View File

@@ -56,6 +56,7 @@ KernelsData SoftmaxKernelRef::GetKernelsData(const Params& params, const optiona
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
kd.internalBufferSizes.clear();
kd.internalBufferSizes.push_back(prim_params.inputs[0].PhysicalSizeInBytes());
kd.internalBufferDataType = prim_params.inputs[0].GetDType();

View File

@@ -229,6 +229,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,

View File

@@ -67,6 +67,7 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
auto& kernel = kd.kernels[0];

View File

@@ -187,7 +187,9 @@ void set_arguments_impl(ocl_kernel_type& kernel,
}
if (status != CL_SUCCESS) {
throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
throw std::runtime_error("Error set arg " + std::to_string(i)
+ ", kernel: " + kernel.getInfo<CL_KERNEL_FUNCTION_NAME>()
+ ", error code: " + std::to_string(status) + "\n");
}
}
}

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/gather_nd.hpp>
#include "gather_nd_inst.h"
#include "program_wrapper.h"
using namespace cldnn;
using namespace ::tests;
namespace shape_infer_tests {
struct gather_nd_test_params {
layout in0_layout;
layout in1_layout;
int64_t batch_dim;
bool batch_merged_output;
layout expected_layout;
};
class gather_nd_test : public testing::TestWithParam<gather_nd_test_params> {};
TEST_P(gather_nd_test, shape_infer) {
auto p = GetParam();
auto& engine = get_test_engine();
auto input0_layout_prim = std::make_shared<input_layout>("input0", p.in0_layout);
auto input1_layout_prim = std::make_shared<input_layout>("input1", p.in1_layout);
uint8_t input_rank = static_cast<uint8_t>(p.in0_layout.get_partial_shape().size());
uint8_t indices_rank = static_cast<uint8_t>(p.in1_layout.get_partial_shape().size());
auto gather_nd_prim = std::make_shared<gather_nd>("output", input_info("input0"), input_info("input1"),
input_rank, indices_rank, p.batch_dim, p.batch_merged_output);
cldnn::program prog(engine);
auto& input0_layout_node = prog.get_or_create(input0_layout_prim);
auto& input1_layout_node = prog.get_or_create(input1_layout_prim);
auto& gather_nd_node = prog.get_or_create(gather_nd_prim);
program_wrapper::add_connection(prog, input0_layout_node, gather_nd_node);
program_wrapper::add_connection(prog, input1_layout_node, gather_nd_node);
auto res = gather_nd_inst::calc_output_layouts<ov::PartialShape>(gather_nd_node, *gather_nd_node.get_kernel_impl_params());
ASSERT_EQ(res.size(), 1);
ASSERT_EQ(res[0], p.expected_layout);
}
INSTANTIATE_TEST_SUITE_P(smoke, gather_nd_test,
testing::ValuesIn(std::vector<gather_nd_test_params>{
{
layout{ov::PartialShape{1000, 256, 10, 15}, data_types::f32, format::bfyx},
layout{ov::PartialShape{25, 125, 3}, data_types::f32, format::bfyx},
0, false,
layout{ov::PartialShape{25, 125, 15}, data_types::f32, format::bfyx}
},
{
layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
2, false,
layout{ov::PartialShape{30, 2, 3, 35}, data_types::f32, format::bfyx}
},
{
layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
2, true,
layout{ov::PartialShape{60, 3, 35}, data_types::f32, format::bfyx}
}
}));
} // shape_infer_tests

View File

@@ -32,7 +32,6 @@ TEST_P(test_empty_tensor, concat_two_inputs) {
auto nonzero_input_mem = engine.allocate_memory(p.nonzero_input_layout);
auto concat_data_mem = engine.allocate_memory(p.concat_input_layout);
std::vector<int32_t> concat_another_input_data = generate_random_1d<int32_t>(p.concat_input_layout.count(), 0, 100);
set_values(concat_data_mem, concat_another_input_data);