[GPU] Added GatherND dynamic support and changed logic for empty tensor support (#16690)
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "gather_nd_inst.h"
|
||||
#include "gather_nd_shape_inference.hpp"
|
||||
|
||||
#include "primitive_type_base.h"
|
||||
#include "json_object.h"
|
||||
@@ -70,6 +71,40 @@ layout gather_nd_inst::calc_output_layout(gather_nd_node const& node, kernel_imp
|
||||
return layout(input_layout_origin.data_type, output_format, output_sizes_tensor, padding);
|
||||
}
|
||||
|
||||
|
||||
template<typename ShapeType>
|
||||
std::vector<layout> gather_nd_inst::calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param) {
|
||||
auto desc = impl_param.typed_desc<gather_nd>();
|
||||
|
||||
auto input_layout = impl_param.get_input_layout(0);
|
||||
auto indices_layout = impl_param.get_input_layout(1);
|
||||
|
||||
auto output_type = input_layout.data_type;
|
||||
if (impl_param.has_fused_primitives()) {
|
||||
output_type = impl_param.get_fused_output_layout().data_type;
|
||||
}
|
||||
|
||||
std::vector<ShapeType> output_shapes = {ShapeType()};
|
||||
std::vector<ShapeType> input_shapes = {
|
||||
input_layout.get<ShapeType>(),
|
||||
indices_layout.get<ShapeType>()
|
||||
};
|
||||
|
||||
if (desc->batch_merged_output) {
|
||||
ov::op::v5::GatherND op;
|
||||
op.set_batch_dims(desc->batch_dims);
|
||||
ov::op::v5::shape_infer(&op, input_shapes, output_shapes);
|
||||
} else {
|
||||
ov::op::v8::GatherND op;
|
||||
op.set_batch_dims(desc->batch_dims);
|
||||
ov::op::v8::shape_infer(&op, input_shapes, output_shapes);
|
||||
}
|
||||
|
||||
format output_format = format::adjust_to_rank(input_layout.format, output_shapes[0].size());
|
||||
|
||||
return { layout{output_shapes[0], output_type, output_format} };
|
||||
}
|
||||
|
||||
std::string gather_nd_inst::to_string(gather_nd_node const& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
|
||||
@@ -66,7 +66,6 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -114,7 +114,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -106,7 +106,6 @@ struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -118,7 +118,6 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -76,7 +76,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -49,7 +49,6 @@ public:
|
||||
"[GPU] Scalar field for runtime offset is not added for crop shape agnostic impl");
|
||||
_kernel_data.kernels[0].params.scalars[0] = s;
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -72,7 +72,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -162,7 +162,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -116,7 +116,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -126,7 +126,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -69,7 +69,6 @@ struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -75,7 +75,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -41,7 +41,6 @@ struct mvn_impl : typed_primitive_impl_ocl<mvn> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@ struct count_nonzero_impl : typed_primitive_impl_ocl<count_nonzero> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -62,7 +61,6 @@ struct gather_nonzero_impl : typed_primitive_impl_ocl<gather_nonzero> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -65,7 +65,6 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -283,13 +283,6 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
void update_kernels_list_to_skip() {
|
||||
for (size_t i = 0; i < _kernel_data.kernels.size(); ++i) {
|
||||
auto gws = _kernel_data.kernels[i].params.workGroups.global;
|
||||
_kernel_data.kernels[i].skip_execution = (std::accumulate(gws.begin(), gws.end(), static_cast<size_t>(1), std::multiplies<size_t>()) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
void set_kernels(cldnn::kernels_cache::compiled_kernels kernels) override {
|
||||
if (is_cpu())
|
||||
return;
|
||||
|
||||
@@ -92,7 +92,6 @@ public:
|
||||
const auto& output_layout = impl_param.get_output_layout();
|
||||
quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
|
||||
(_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ struct range_impl : typed_primitive_impl_ocl<range> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -87,7 +87,6 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -107,7 +107,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -39,7 +39,6 @@ struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -64,7 +64,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -53,7 +53,6 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -59,7 +59,6 @@ struct softmax_impl : typed_primitive_impl_ocl<softmax> {
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -185,7 +185,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ public:
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
update_kernels_list_to_skip();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@ class typed_primitive_inst<gather_nd> : public typed_primitive_inst_base<gather_
|
||||
using parent::parent;
|
||||
|
||||
public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(gather_nd_node const& /*node*/, const kernel_impl_params& impl_param);
|
||||
static layout calc_output_layout(gather_nd_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(gather_nd_node const& node);
|
||||
|
||||
|
||||
@@ -71,17 +71,15 @@ std::vector<layout> gather_nonzero_inst::calc_output_layouts(gather_nonzero_node
|
||||
auto desc = impl_param.typed_desc<gather_nonzero>();
|
||||
assert(static_cast<bool>(desc->output_data_types[0]) == false &&
|
||||
"Output data type forcing is not supported for gather_nonzero_node!");
|
||||
|
||||
auto rank = impl_param.get_input_layout(0).get<ShapeType>().rank().get_length();
|
||||
if (impl_param.memory_deps.count(1)) {
|
||||
auto out_size = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream());
|
||||
// output shape of nonzero is [input_rank, count_non_zero]
|
||||
auto rank = static_cast<size_t>(impl_param.get_input_layout(0).get<ShapeType>().rank().get_length());
|
||||
auto count = static_cast<size_t>(out_size[0]);
|
||||
ov::Shape output_shape({rank, count});
|
||||
ov::PartialShape output_pshape(output_shape);
|
||||
auto out_layout = layout{output_pshape, cldnn::data_types::i32, cldnn::format::bfyx};
|
||||
auto out_layout = layout{{rank, out_size[0]}, cldnn::data_types::i32, cldnn::format::bfyx};
|
||||
return {out_layout};
|
||||
} else {
|
||||
return {layout{ov::PartialShape({ov::Dimension::dynamic(), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
|
||||
return {layout{ov::PartialShape({ov::Dimension(rank), ov::Dimension::dynamic()}), cldnn::data_types::i32, cldnn::format::bfyx}};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -212,7 +212,7 @@ void KernelBaseOpenCL::FillCLKernelData(clKernelData& kernel,
|
||||
uint32_t number_of_inputs_for_fused_prims,
|
||||
int number_of_outputs,
|
||||
bool is_dynamic) const {
|
||||
if (!is_dynamic)
|
||||
if (!is_dynamic && !kernel.skip_execution)
|
||||
KernelBase::CheckDispatchData(kernelMapName, dispatchData, engine_info.maxWorkGroupSize);
|
||||
kernel.code.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode);
|
||||
kernel.params.workGroups.global = dispatchData.gws;
|
||||
|
||||
@@ -86,15 +86,6 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const KernelList& all_impls
|
||||
if (kds.size() && kds[0].kernels.size()) {
|
||||
kernelsData = kds;
|
||||
kernelName = implementation->GetName();
|
||||
if (!params.is_shape_agnostic) {
|
||||
for (size_t k = 0; k < kds[0].kernels.size(); ++k) {
|
||||
auto gws = kds[0].kernels[k].params.workGroups.global;
|
||||
kernelsData[0].kernels[k].skip_execution = (std::accumulate(gws.begin(),
|
||||
gws.end(),
|
||||
static_cast<size_t>(1),
|
||||
std::multiplies<size_t>()) == 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
} catch (std::runtime_error& ex) {
|
||||
|
||||
@@ -114,6 +114,24 @@ struct KernelData {
|
||||
|
||||
bool can_reuse_memory = true;
|
||||
|
||||
static bool SkipKernelExecution(const base_params& params, size_t kernel_id = 0) {
|
||||
for (const auto& input : params.inputs) {
|
||||
if (input.LogicalSize() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (const auto& output : params.outputs) {
|
||||
if (output.LogicalSize() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool SkipKernelExecution(const Params& params, size_t kernel_id = 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline static KernelData Default(const Params& _params, size_t kernel_nums = 1) {
|
||||
KernelData kd;
|
||||
@@ -124,6 +142,10 @@ struct KernelData {
|
||||
kd.reorderInput = false; // for KW
|
||||
kd.autoTuneIndex = -1;
|
||||
kd.can_reuse_memory = true;
|
||||
|
||||
for (auto& kernel : kd.kernels) {
|
||||
kernel.skip_execution = SkipKernelExecution(orgParams);
|
||||
}
|
||||
return kd;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -99,6 +99,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -128,6 +128,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
|
||||
const size_t elem_size = prim_params.inputs[0].ElementSize();
|
||||
const size_t iav_type_size = elem_size + 4;
|
||||
|
||||
@@ -51,6 +51,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
|
||||
@@ -60,6 +60,10 @@ BorderKernelBase::DispatchData BorderKernelBase::SetDefault(const border_params&
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
bool BorderKernelBase::SkipKernelExecution(const border_params& params) const {
|
||||
return params.outputs[0].LogicalSize() == 0;
|
||||
}
|
||||
|
||||
KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
const optional_params& options) const {
|
||||
assert(params.GetType() == KernelType::BORDER);
|
||||
@@ -75,6 +79,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
@@ -82,6 +87,8 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
kernel.skip_execution = SkipKernelExecution(prim_params);
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
dispatchData,
|
||||
params.engineInfo,
|
||||
|
||||
@@ -50,5 +50,6 @@ protected:
|
||||
JitConstants GetJitConstants(const border_params& params) const;
|
||||
DispatchData SetDefault(const border_params& params) const;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
bool SkipKernelExecution(const border_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@@ -91,6 +91,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
|
||||
@@ -109,6 +109,10 @@ ConcatenationKernelBase::DispatchData ConcatenationKernelBase::SetDefault(const
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
bool ConcatenationKernelBase::SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const {
|
||||
return params.inputs[kernel_id].LogicalSize() == 0;
|
||||
}
|
||||
|
||||
KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
@@ -136,6 +140,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
kernel.params.workGroups.global = dispatchData.gws;
|
||||
kernel.params.workGroups.local = dispatchData.lws;
|
||||
kernel.skip_execution = SkipKernelExecution(prim_params, i);
|
||||
|
||||
ScalarDescriptor s;
|
||||
s.t = ScalarDescriptor::Types::UINT32;
|
||||
@@ -173,6 +178,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
|
||||
kernel.params.workGroups.global = dispatchData.gws;
|
||||
kernel.params.workGroups.local = dispatchData.lws;
|
||||
kernel.skip_execution = SkipKernelExecution(newParams, i);
|
||||
if (is_dynamic) {
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::SHAPE_INFO, 0});
|
||||
}
|
||||
|
||||
@@ -68,5 +68,6 @@ protected:
|
||||
virtual size_t GetAlignment(const concatenation_params& /*params*/) const {
|
||||
return 1;
|
||||
}
|
||||
bool SkipKernelExecution(const concatenation_params& params, size_t kernel_id) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
||||
@@ -114,6 +114,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
|
||||
kernel.params.workGroups.local = dispatchData.lws;
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
|
||||
kernel.skip_execution = SkipKernelExecution(newParams, i);
|
||||
|
||||
ScalarDescriptor s;
|
||||
s.t = ScalarDescriptor::Types::UINT32;
|
||||
|
||||
@@ -86,6 +86,7 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -714,6 +714,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
|
||||
@@ -66,6 +66,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
fully_connected_params& newParams = *static_cast<fully_connected_params*>(kd.params.get());
|
||||
|
||||
|
||||
@@ -169,6 +169,7 @@ KernelsData GatherElementsKernelRef::GetKernelsData(const Params& params, const
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -283,6 +283,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
|
||||
@@ -51,6 +51,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
|
||||
@@ -71,6 +71,7 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -81,6 +81,7 @@ KernelsData CountNonzeroKernelRef::GetKernelsData(const Params& params, const op
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
// In case of count-nonzero, the output shape is static unconditionally,
|
||||
|
||||
@@ -85,6 +85,7 @@ KernelsData GatherNonzeroKernelRef::GetKernelsData(const Params& params, const o
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
|
||||
@@ -47,6 +47,7 @@ KernelsData PermuteKernelBase::GetKernelsData(const Params& params, const option
|
||||
OPENVINO_ASSERT(kernel_data.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kernel_data.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kernel_data.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kernel_data.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
|
||||
@@ -72,6 +72,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -40,6 +40,7 @@ KernelsData RangeKernelRef::GetKernelsData(const Params ¶ms, const optional_
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto &clKernelData = kernel_data.kernels[0];
|
||||
|
||||
@@ -234,6 +234,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -239,6 +239,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -169,6 +169,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
|
||||
auto dispatchData = SetDefault(prim_params, (i == 1));
|
||||
kd.kernels[i].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[i].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -331,6 +331,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
|
||||
auto dispatchData = SetDefault(prim_params, i == 1);
|
||||
kd.kernels[i].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[i].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[i].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -28,6 +28,10 @@ JitConstants ShapeOfKernelRef::GetJitConstants(const shape_of_params& params) co
|
||||
return jit;
|
||||
}
|
||||
|
||||
bool ShapeOfKernelRef::SkipKernelExecution(const shape_of_params& params) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelsData ShapeOfKernelRef::GetKernelsData(const Params ¶ms, const optional_params &options) const {
|
||||
KernelsData kernels_data;
|
||||
if (!Validate(params, options))
|
||||
@@ -40,13 +44,15 @@ KernelsData ShapeOfKernelRef::GetKernelsData(const Params ¶ms, const optiona
|
||||
auto jit_constants = GetJitConstants(derived_params);
|
||||
auto jit = CreateJit(kernelName, jit_constants, entry_point);
|
||||
auto &clKernelData = kernel_data.kernels[0];
|
||||
clKernelData.skip_execution = SkipKernelExecution(derived_params);
|
||||
|
||||
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
|
||||
kernel_data.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = static_cast<const shape_of_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
FillCLKernelData(clKernelData, dispatch_data, params.engineInfo, kernelName, jit, entry_point, EXE_MODE_DEFAULT,
|
||||
|
||||
@@ -29,6 +29,7 @@ class ShapeOfKernelRef: public KernelBaseOpenCL {
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
bool Validate(const Params &p, const optional_params &o) const override;
|
||||
virtual JitConstants GetJitConstants(const shape_of_params& params) const;
|
||||
bool SkipKernelExecution(const shape_of_params& params) const;
|
||||
public:
|
||||
ShapeOfKernelRef() :
|
||||
KernelBaseOpenCL { "shape_of_ref" } {
|
||||
|
||||
@@ -86,6 +86,7 @@ KernelsData SoftmaxKernel_bf::GetKernelsData(const Params& params, const optiona
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -56,6 +56,7 @@ KernelsData SoftmaxKernelRef::GetKernelsData(const Params& params, const optiona
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
kd.internalBufferSizes.clear();
|
||||
kd.internalBufferSizes.push_back(prim_params.inputs[0].PhysicalSizeInBytes());
|
||||
kd.internalBufferDataType = prim_params.inputs[0].GetDType();
|
||||
|
||||
@@ -229,6 +229,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
||||
|
||||
@@ -67,6 +67,7 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
||||
@@ -187,7 +187,9 @@ void set_arguments_impl(ocl_kernel_type& kernel,
|
||||
}
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
|
||||
throw std::runtime_error("Error set arg " + std::to_string(i)
|
||||
+ ", kernel: " + kernel.getInfo<CL_KERNEL_FUNCTION_NAME>()
|
||||
+ ", error code: " + std::to_string(status) + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/gather_nd.hpp>
|
||||
|
||||
#include "gather_nd_inst.h"
|
||||
#include "program_wrapper.h"
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace shape_infer_tests {
|
||||
|
||||
struct gather_nd_test_params {
|
||||
layout in0_layout;
|
||||
layout in1_layout;
|
||||
int64_t batch_dim;
|
||||
bool batch_merged_output;
|
||||
layout expected_layout;
|
||||
};
|
||||
|
||||
class gather_nd_test : public testing::TestWithParam<gather_nd_test_params> {};
|
||||
|
||||
TEST_P(gather_nd_test, shape_infer) {
|
||||
auto p = GetParam();
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input0_layout_prim = std::make_shared<input_layout>("input0", p.in0_layout);
|
||||
auto input1_layout_prim = std::make_shared<input_layout>("input1", p.in1_layout);
|
||||
uint8_t input_rank = static_cast<uint8_t>(p.in0_layout.get_partial_shape().size());
|
||||
uint8_t indices_rank = static_cast<uint8_t>(p.in1_layout.get_partial_shape().size());
|
||||
auto gather_nd_prim = std::make_shared<gather_nd>("output", input_info("input0"), input_info("input1"),
|
||||
input_rank, indices_rank, p.batch_dim, p.batch_merged_output);
|
||||
cldnn::program prog(engine);
|
||||
|
||||
auto& input0_layout_node = prog.get_or_create(input0_layout_prim);
|
||||
auto& input1_layout_node = prog.get_or_create(input1_layout_prim);
|
||||
auto& gather_nd_node = prog.get_or_create(gather_nd_prim);
|
||||
program_wrapper::add_connection(prog, input0_layout_node, gather_nd_node);
|
||||
program_wrapper::add_connection(prog, input1_layout_node, gather_nd_node);
|
||||
auto res = gather_nd_inst::calc_output_layouts<ov::PartialShape>(gather_nd_node, *gather_nd_node.get_kernel_impl_params());
|
||||
|
||||
ASSERT_EQ(res.size(), 1);
|
||||
ASSERT_EQ(res[0], p.expected_layout);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke, gather_nd_test,
|
||||
testing::ValuesIn(std::vector<gather_nd_test_params>{
|
||||
{
|
||||
layout{ov::PartialShape{1000, 256, 10, 15}, data_types::f32, format::bfyx},
|
||||
layout{ov::PartialShape{25, 125, 3}, data_types::f32, format::bfyx},
|
||||
0, false,
|
||||
layout{ov::PartialShape{25, 125, 15}, data_types::f32, format::bfyx}
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
|
||||
layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
|
||||
2, false,
|
||||
layout{ov::PartialShape{30, 2, 3, 35}, data_types::f32, format::bfyx}
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{30, 2, 100, 35}, data_types::f32, format::bfyx},
|
||||
layout{ov::PartialShape{30, 2, 3, 1}, data_types::f32, format::bfyx},
|
||||
2, true,
|
||||
layout{ov::PartialShape{60, 3, 35}, data_types::f32, format::bfyx}
|
||||
}
|
||||
}));
|
||||
|
||||
} // shape_infer_tests
|
||||
@@ -32,7 +32,6 @@ TEST_P(test_empty_tensor, concat_two_inputs) {
|
||||
auto nonzero_input_mem = engine.allocate_memory(p.nonzero_input_layout);
|
||||
auto concat_data_mem = engine.allocate_memory(p.concat_input_layout);
|
||||
|
||||
|
||||
std::vector<int32_t> concat_another_input_data = generate_random_1d<int32_t>(p.concat_input_layout.count(), 0, 100);
|
||||
|
||||
set_values(concat_data_mem, concat_another_input_data);
|
||||
|
||||
Reference in New Issue
Block a user