[GPU] Unique-10 operation implementation. (#16412)
* [GPU] Unique-10 operation implementation. * Handled flattened case. * Created results for all outputs in single layer test. * Save total unique count as fifth output. * Handled axis case. * Added unique reshape kernel. * Moved data types to unique primitive constructor. * Added shape agnostic Unique ref kernel. * Added blocked layout support to Unique-10. * Use int in bubble sort. * Added unit tests. * Added support for blocked layouts to flattened mode. * Fixed usage of shape_info in kernel. * Use correct total data size for dynamic shapes. * Commented some functional tests. For some reasons big shapes cause std::bad_alloc. * Initialize out_counts with zeros. * Implemented new approach for reducing memory footprint. Changed first kernel to only count unique values and changed second kernel to fill all outputs. * Revert "Commented some functional tests." This reverts commit a7f9763c575e71e14b85ee37adf1e98f10785c15. * Fixed calc output layouts for flattened case when rank in greater than 4. * Added temporary fix for axis case when rank is greater than 4. * Revert "Added temporary fix for axis case when rank is greater than 4." This reverts commit 236640d2f0e9d5b1f8dcbbf9482763badd7fde66. * Renamed "unique" to "unique_count" and "unique_reshape" to "unique_gather" primitives. * Quick fix for add_intermediate_node to consider dep_idx of multiple output * Fix bug for multiple output: 1) get_reorder was getting reorder from cache regardless of the dep_idx. 2) remove_redundant_reorder was not considering original dep_idx * Fixed conflicts. * Fixed win build issue. * Fixed build issue. * Revert "Fix bug for multiple output:" This reverts commit d4a2c4f32eabe9108df31d4837fed8995c93bd1c. * Revert "Quick fix for add_intermediate_node to consider dep_idx of multiple output" This reverts commit 2dfd2aaefdf32067a7469505b35f7096632ac5f2. * Added some tests to skip config. --------- Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>
This commit is contained in:
parent
5993c4942a
commit
bae926de22
@ -248,6 +248,7 @@ REGISTER_FACTORY(v9, Eye);
|
||||
REGISTER_FACTORY(v10, IsFinite);
|
||||
REGISTER_FACTORY(v10, IsInf);
|
||||
REGISTER_FACTORY(v10, IsNaN);
|
||||
REGISTER_FACTORY(v10, Unique);
|
||||
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
|
@ -0,0 +1,88 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
struct unique_count : primitive_base<unique_count> {
|
||||
CLDNN_DECLARE_PRIMITIVE(unique_count)
|
||||
|
||||
/// @brief Constructs unique_count primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
/// @param flattened If true, operator works on a flattened version of the input tensor.
|
||||
/// @param axis Is used to “divide” the input tensor into slices.
|
||||
unique_count(const primitive_id& id, const input_info& input, bool flattened, int64_t axis)
|
||||
: primitive_base(id, {input}),
|
||||
flattened(flattened),
|
||||
axis(axis) {}
|
||||
|
||||
bool flattened;
|
||||
int64_t axis;
|
||||
|
||||
size_t hash() const override {
|
||||
size_t seed = primitive::hash();
|
||||
seed = hash_combine(seed, flattened);
|
||||
seed = hash_combine(seed, axis);
|
||||
return seed;
|
||||
}
|
||||
|
||||
bool operator==(const primitive& rhs) const override {
|
||||
if (!compare_common_params(rhs)) {
|
||||
return false;
|
||||
}
|
||||
auto rhs_casted = downcast<const unique_count>(rhs);
|
||||
return flattened == rhs_casted.flattened && axis == rhs_casted.axis;
|
||||
}
|
||||
};
|
||||
|
||||
struct unique_gather : primitive_base<unique_gather> {
|
||||
CLDNN_DECLARE_PRIMITIVE(unique_gather)
|
||||
|
||||
/// @brief Constructs unique_gather primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param inputs Input primitives ids.
|
||||
/// @param flattened If true, operator works on a flattened version of the input tensor.
|
||||
/// @param axis Is used to “divide” the input tensor into slices.
|
||||
/// @param sorted Controls the order of the returned unique values (sorts ascending when true).
|
||||
unique_gather(const primitive_id& id,
|
||||
const std::vector<input_info>& inputs,
|
||||
bool flattened,
|
||||
int64_t axis,
|
||||
bool sorted,
|
||||
data_types elem_type,
|
||||
data_types index_type,
|
||||
data_types count_type)
|
||||
: primitive_base(id, inputs, decltype(output_paddings)(4), {elem_type, index_type, index_type, count_type}, 4),
|
||||
flattened(flattened),
|
||||
axis(axis),
|
||||
sorted(sorted) {}
|
||||
|
||||
bool flattened;
|
||||
int64_t axis;
|
||||
bool sorted;
|
||||
|
||||
size_t hash() const override {
|
||||
size_t seed = primitive::hash();
|
||||
seed = hash_combine(seed, flattened);
|
||||
seed = hash_combine(seed, axis);
|
||||
seed = hash_combine(seed, sorted);
|
||||
return seed;
|
||||
}
|
||||
|
||||
bool operator==(const primitive& rhs) const override {
|
||||
if (!compare_common_params(rhs)) {
|
||||
return false;
|
||||
}
|
||||
auto rhs_casted = downcast<const unique_gather>(rhs);
|
||||
return flattened == rhs_casted.flattened && axis == rhs_casted.axis && sorted == rhs_casted.sorted;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
@ -94,6 +94,8 @@ void register_implementations() {
|
||||
REGISTER_OCL(count_nonzero);
|
||||
REGISTER_OCL(gather_nonzero);
|
||||
REGISTER_OCL(eye);
|
||||
REGISTER_OCL(unique_count);
|
||||
REGISTER_OCL(unique_gather);
|
||||
}
|
||||
|
||||
} // namespace ocl
|
||||
|
@ -75,6 +75,7 @@
|
||||
#include "intel_gpu/primitives/tile.hpp"
|
||||
#include "intel_gpu/primitives/non_zero.hpp"
|
||||
#include "intel_gpu/primitives/eye.hpp"
|
||||
#include "intel_gpu/primitives/unique.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
@ -174,6 +175,8 @@ REGISTER_OCL(convert_color);
|
||||
REGISTER_OCL(count_nonzero);
|
||||
REGISTER_OCL(gather_nonzero);
|
||||
REGISTER_OCL(eye);
|
||||
REGISTER_OCL(unique_count);
|
||||
REGISTER_OCL(unique_gather);
|
||||
|
||||
#undef REGISTER_OCL
|
||||
|
||||
|
167
src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp
Normal file
167
src/plugins/intel_gpu/src/graph/impls/ocl/unique.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
#include "unique/unique_kernel_ref.hpp"
|
||||
#include "unique/unique_kernel_selector.hpp"
|
||||
#include "unique_inst.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct unique_count_impl : typed_primitive_impl_ocl<unique_count> {
|
||||
using parent = typed_primitive_impl_ocl<unique_count>;
|
||||
using parent::parent;
|
||||
using kernel_selector_t = kernel_selector::unique_count_kernel_selector;
|
||||
using kernel_params_t =
|
||||
std::pair<kernel_selector::unique_count_params, kernel_selector::unique_count_optional_params>;
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<unique_count_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<unique_count>();
|
||||
auto params = get_default_params<kernel_selector::unique_count_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params =
|
||||
get_default_optional_params<kernel_selector::unique_count_optional_params>(impl_param.get_program());
|
||||
|
||||
params.flattened = primitive->flattened;
|
||||
params.axis = primitive->axis;
|
||||
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_unique_count_impl::attach_unique_count_impl() {
|
||||
auto types = {
|
||||
data_types::u8,
|
||||
data_types::i8,
|
||||
data_types::f16,
|
||||
data_types::f32,
|
||||
data_types::i32,
|
||||
data_types::i64,
|
||||
};
|
||||
|
||||
auto formats = {
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv16_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
|
||||
format::bfzyx,
|
||||
format::b_fs_zyx_fsv16,
|
||||
format::b_fs_zyx_fsv32,
|
||||
format::bs_fs_zyx_bsv16_fsv16,
|
||||
format::bs_fs_zyx_bsv16_fsv32,
|
||||
format::bs_fs_zyx_bsv32_fsv16,
|
||||
format::bs_fs_zyx_bsv32_fsv32,
|
||||
|
||||
format::bfwzyx,
|
||||
};
|
||||
|
||||
implementation_map<unique_count>::add(impl_types::ocl,
|
||||
shape_types::any,
|
||||
typed_primitive_impl_ocl<unique_count>::create<unique_count_impl>,
|
||||
types,
|
||||
formats);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
struct unique_gather_impl : typed_primitive_impl_ocl<unique_gather> {
|
||||
using parent = typed_primitive_impl_ocl<unique_gather>;
|
||||
using parent::parent;
|
||||
using kernel_selector_t = kernel_selector::unique_gather_kernel_selector;
|
||||
using kernel_params_t =
|
||||
std::pair<kernel_selector::unique_gather_params, kernel_selector::unique_gather_optional_params>;
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<unique_gather_impl>(*this);
|
||||
}
|
||||
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<unique_gather>();
|
||||
auto params = get_default_params<kernel_selector::unique_gather_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params =
|
||||
get_default_optional_params<kernel_selector::unique_gather_optional_params>(impl_param.get_program());
|
||||
|
||||
params.flattened = primitive->flattened;
|
||||
params.axis = primitive->axis;
|
||||
params.sorted = primitive->sorted;
|
||||
|
||||
for (auto i = 1U; i < impl_param.input_layouts.size(); ++i) {
|
||||
params.inputs.push_back(convert_data_tensor(impl_param.input_layouts.at(i)));
|
||||
}
|
||||
|
||||
for (auto i = 1U; i < impl_param.output_layouts.size(); ++i) {
|
||||
params.outputs.push_back(convert_data_tensor(impl_param.output_layouts.at(i)));
|
||||
}
|
||||
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_unique_gather_impl::attach_unique_gather_impl() {
|
||||
auto types = {
|
||||
data_types::u8,
|
||||
data_types::i8,
|
||||
data_types::f16,
|
||||
data_types::f32,
|
||||
data_types::i32,
|
||||
data_types::i64,
|
||||
};
|
||||
|
||||
auto formats = {
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv16_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
|
||||
format::bfzyx,
|
||||
format::b_fs_zyx_fsv16,
|
||||
format::b_fs_zyx_fsv32,
|
||||
format::bs_fs_zyx_bsv16_fsv16,
|
||||
format::bs_fs_zyx_bsv16_fsv32,
|
||||
format::bs_fs_zyx_bsv32_fsv16,
|
||||
format::bs_fs_zyx_bsv32_fsv32,
|
||||
|
||||
format::bfwzyx,
|
||||
};
|
||||
|
||||
implementation_map<unique_gather>::add(impl_types::ocl,
|
||||
shape_types::any,
|
||||
typed_primitive_impl_ocl<unique_gather>::create<unique_gather_impl>,
|
||||
types,
|
||||
formats);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::unique_count_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::unique_gather_impl)
|
73
src/plugins/intel_gpu/src/graph/include/unique_inst.hpp
Normal file
73
src/plugins/intel_gpu/src/graph/include/unique_inst.hpp
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "intel_gpu/primitives/unique.hpp"
|
||||
#include "primitive_inst.h"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <>
|
||||
struct typed_program_node<unique_count> : typed_program_node_base<unique_count> {
|
||||
using parent = typed_program_node_base<unique_count>;
|
||||
using parent::parent;
|
||||
|
||||
program_node& input() const {
|
||||
return get_dependency(0);
|
||||
}
|
||||
};
|
||||
|
||||
using unique_count_node = typed_program_node<unique_count>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<unique_count> : public typed_primitive_inst_base<unique_count> {
|
||||
public:
|
||||
using parent = typed_primitive_inst_base<unique_count>;
|
||||
using parent::parent;
|
||||
|
||||
static layout calc_output_layout(const unique_count_node& node, const kernel_impl_params& impl_param);
|
||||
template <typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(const unique_count_node& node, const kernel_impl_params& impl_param);
|
||||
static std::string to_string(const unique_count_node& node);
|
||||
};
|
||||
|
||||
using unique_count_inst = typed_primitive_inst<unique_count>;
|
||||
|
||||
template <>
|
||||
struct typed_program_node<unique_gather> : typed_program_node_base<unique_gather> {
|
||||
using parent = typed_program_node_base<unique_gather>;
|
||||
using parent::parent;
|
||||
|
||||
program_node& input() const {
|
||||
return get_dependency(0);
|
||||
}
|
||||
|
||||
bool generates_dynamic_output() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override {
|
||||
return {1};
|
||||
}
|
||||
};
|
||||
|
||||
using unique_gather_node = typed_program_node<unique_gather>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<unique_gather> : public typed_primitive_inst_base<unique_gather> {
|
||||
public:
|
||||
using parent = typed_primitive_inst_base<unique_gather>;
|
||||
using parent::parent;
|
||||
|
||||
static layout calc_output_layout(const unique_gather_node& node, const kernel_impl_params& impl_param);
|
||||
template <typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(const unique_gather_node& node,
|
||||
const kernel_impl_params& impl_param);
|
||||
static std::string to_string(const unique_gather_node& node);
|
||||
};
|
||||
|
||||
using unique_gather_inst = typed_primitive_inst<unique_gather>;
|
||||
|
||||
} // namespace cldnn
|
@ -65,6 +65,7 @@
|
||||
#include "strided_slice_inst.h"
|
||||
#include "loop_inst.h"
|
||||
#include "reverse_inst.h"
|
||||
#include "unique_inst.hpp"
|
||||
#include "to_string_utils.h"
|
||||
|
||||
// TODO: Remove once we have interface for kernels cache
|
||||
@ -1440,6 +1441,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::gather_tree::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_detection_output::type_id() &&
|
||||
prim.type() != cldnn::convert_color::type_id() &&
|
||||
prim.type() != cldnn::unique_count::type_id() &&
|
||||
prim.type() != cldnn::unique_gather::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
|
||||
can_use_fsv16 = false;
|
||||
}
|
||||
@ -1493,6 +1496,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::multiclass_nms::type_id() &&
|
||||
prim.type() != cldnn::normalize::type_id() &&
|
||||
prim.type() != cldnn::deconvolution::type_id() &&
|
||||
prim.type() != cldnn::unique_count::type_id() &&
|
||||
prim.type() != cldnn::unique_gather::type_id() &&
|
||||
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
|
||||
can_use_bs_fs_yx_bsv16_fsv16 = false;
|
||||
}
|
||||
|
138
src/plugins/intel_gpu/src/graph/unique.cpp
Normal file
138
src/plugins/intel_gpu/src/graph/unique.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/op/unique.hpp"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "json_object.h"
|
||||
#include "primitive_type_base.h"
|
||||
#include "unique_inst.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
// -----------------------------------------------
|
||||
// unique_count
|
||||
// -----------------------------------------------
|
||||
GPU_DEFINE_PRIMITIVE_TYPE_ID(unique_count)
|
||||
|
||||
layout unique_count_inst::calc_output_layout(const unique_count_node& node, const kernel_impl_params& impl_param) {
|
||||
OPENVINO_THROW("Only calc_output_layouts should be used!");
|
||||
}
|
||||
|
||||
template <typename ShapeType>
|
||||
std::vector<layout> unique_count_inst::calc_output_layouts(const unique_count_node& node,
|
||||
const kernel_impl_params& impl_param) {
|
||||
return {layout{ov::PartialShape{1}, cldnn::data_types::i64, cldnn::format::bfyx}};
|
||||
}
|
||||
|
||||
template std::vector<layout> unique_count_inst::calc_output_layouts<ov::PartialShape>(
|
||||
const unique_count_node& node,
|
||||
const kernel_impl_params& impl_param);
|
||||
|
||||
std::string unique_count_inst::to_string(const unique_count_node& node) {
|
||||
auto primitive = node.get_primitive();
|
||||
json_composite unique_count_info;
|
||||
unique_count_info.add("input", node.input().id());
|
||||
if (!primitive->flattened) {
|
||||
unique_count_info.add("axis", primitive->axis);
|
||||
}
|
||||
|
||||
auto node_info = node.desc_to_json();
|
||||
node_info->add("unique_count info", unique_count_info);
|
||||
|
||||
std::ostringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// unique_gather
|
||||
// -----------------------------------------------
|
||||
GPU_DEFINE_PRIMITIVE_TYPE_ID(unique_gather)
|
||||
|
||||
layout unique_gather_inst::calc_output_layout(const unique_gather_node& node, const kernel_impl_params& impl_param) {
|
||||
OPENVINO_THROW("Only calc_output_layouts should be used!");
|
||||
}
|
||||
|
||||
template <typename ShapeType>
|
||||
std::vector<layout> unique_gather_inst::calc_output_layouts(const unique_gather_node& node,
|
||||
const kernel_impl_params& impl_param) {
|
||||
std::vector<layout> layouts;
|
||||
const auto desc = impl_param.typed_desc<unique_gather>();
|
||||
const auto input_layout = impl_param.get_input_layout();
|
||||
|
||||
std::vector<ShapeType> output_shapes = {ShapeType(), ShapeType(), ShapeType(), ShapeType()};
|
||||
|
||||
if (!impl_param.memory_deps.count(1)) {
|
||||
if (desc->flattened) {
|
||||
output_shapes.at(0) = ov::PartialShape{ov::Dimension::dynamic()};
|
||||
} else {
|
||||
output_shapes.at(0) = ov::PartialShape::dynamic(input_layout.get_partial_shape().rank());
|
||||
}
|
||||
output_shapes.at(1) = ov::PartialShape{ov::Dimension::dynamic()};
|
||||
output_shapes.at(2) = ov::PartialShape{ov::Dimension::dynamic()};
|
||||
output_shapes.at(3) = ov::PartialShape{ov::Dimension::dynamic()};
|
||||
} else {
|
||||
const auto input_shape = input_layout.get_shape();
|
||||
const size_t unique_count = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream()).at(0);
|
||||
if (desc->flattened) {
|
||||
const auto input_tensor_capacity = ov::shape_size(input_shape);
|
||||
output_shapes.at(0) = ov::Shape{unique_count};
|
||||
output_shapes.at(1) = ov::Shape{unique_count};
|
||||
output_shapes.at(2) = ov::Shape{input_tensor_capacity};
|
||||
output_shapes.at(3) = ov::Shape{unique_count};
|
||||
} else {
|
||||
auto output_shape = input_shape;
|
||||
auto& new_axis_dimension = output_shape.at(desc->axis);
|
||||
const auto old_axis_dimension = new_axis_dimension;
|
||||
new_axis_dimension = unique_count;
|
||||
output_shapes.at(0) = output_shape;
|
||||
output_shapes.at(1) = ov::Shape{new_axis_dimension};
|
||||
output_shapes.at(2) = ov::Shape{old_axis_dimension};
|
||||
output_shapes.at(3) = ov::Shape{new_axis_dimension};
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = 0U; i < desc->num_outputs; ++i) {
|
||||
const auto& output_shape = output_shapes.at(i);
|
||||
const auto output_dt = desc->output_data_types.at(i).value();
|
||||
auto output_format = format::get_default_format(output_shape.size());
|
||||
if (i == 0) {
|
||||
if (desc->flattened) {
|
||||
output_format = format::adjust_to_rank(input_layout.format, output_shape.size());
|
||||
} else {
|
||||
output_format = input_layout.format;
|
||||
}
|
||||
}
|
||||
layouts.emplace_back(output_shape, output_dt, output_format);
|
||||
}
|
||||
|
||||
return layouts;
|
||||
}
|
||||
|
||||
template std::vector<layout> unique_gather_inst::calc_output_layouts<ov::PartialShape>(
|
||||
const unique_gather_node& node,
|
||||
const kernel_impl_params& impl_param);
|
||||
|
||||
std::string unique_gather_inst::to_string(const unique_gather_node& node) {
|
||||
auto primitive = node.get_primitive();
|
||||
json_composite unique_gather_info;
|
||||
unique_gather_info.add("input", node.input().id());
|
||||
if (!primitive->flattened) {
|
||||
unique_gather_info.add("axis", primitive->axis);
|
||||
}
|
||||
unique_gather_info.add("sorted", primitive->sorted);
|
||||
|
||||
auto node_info = node.desc_to_json();
|
||||
node_info->add("unique_gather info", unique_gather_info);
|
||||
|
||||
std::ostringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#ifdef FLATTENED
|
||||
# define LENGTH TOTAL_DATA_SIZE
|
||||
#else
|
||||
# define LENGTH AXIS_LENGTH
|
||||
#endif
|
||||
|
||||
#ifndef FLATTENED
|
||||
inline bool FUNC(slices_are_equal)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* out_unique_elements,
|
||||
uint lhs,
|
||||
const __global INPUT0_TYPE* input,
|
||||
uint rhs) {
|
||||
ITERATE(if (out_unique_elements[GET_INDEX(INPUT0, lhs)] != input[GET_INDEX(INPUT0, rhs)]) { return false; })
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void FUNC(assign_slice)(OPTIONAL_SHAPE_INFO_ARG __global INPUT0_TYPE* out_unique_elements,
|
||||
uint lhs,
|
||||
const __global INPUT0_TYPE* input,
|
||||
uint rhs) {
|
||||
ITERATE(out_unique_elements[GET_INDEX(INPUT0, lhs)] = input[GET_INDEX(INPUT0, rhs)];)
|
||||
}
|
||||
#endif
|
||||
|
||||
// Works on unsorted data, but has worse complexity
|
||||
inline uint FUNC(unique)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
|
||||
__global INPUT0_TYPE* out_unique_elements,
|
||||
uint first,
|
||||
const uint last) {
|
||||
uint unique_length = 0;
|
||||
for (; first != last; ++first) {
|
||||
bool unique = true;
|
||||
for (uint unique_idx = 0; unique_idx < unique_length; ++unique_idx) {
|
||||
#ifdef FLATTENED
|
||||
if (out_unique_elements[unique_idx] == input[GET_INDEX(INPUT0, first)]) {
|
||||
#else
|
||||
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_idx, input, first)) {
|
||||
#endif
|
||||
unique = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unique) {
|
||||
#ifdef FLATTENED
|
||||
out_unique_elements[unique_length] = input[GET_INDEX(INPUT0, first)];
|
||||
#else
|
||||
FUNC_CALL(assign_slice)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_length, input, first);
|
||||
#endif
|
||||
++unique_length;
|
||||
}
|
||||
}
|
||||
return unique_length;
|
||||
}
|
||||
|
||||
KERNEL(unique_count_ref)
|
||||
(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* out_total_count,
|
||||
__global INPUT0_TYPE* out_unique_elements) {
|
||||
out_total_count[0] = FUNC_CALL(unique)(OPTIONAL_SHAPE_INFO_TENSOR input, out_unique_elements, 0, LENGTH);
|
||||
}
|
||||
|
||||
#undef LENGTH
|
@ -0,0 +1,181 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#ifdef FLATTENED
|
||||
# define LENGTH TOTAL_DATA_SIZE
|
||||
#else
|
||||
# define LENGTH AXIS_LENGTH
|
||||
#endif
|
||||
|
||||
inline void FUNC(swap_out_unique_elements)(__global OUTPUT_TYPE* a, __global OUTPUT_TYPE* b) {
|
||||
const OUTPUT_TYPE temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
inline void FUNC(swap_out_indices)(__global OUTPUT1_TYPE* a, __global OUTPUT1_TYPE* b) {
|
||||
const OUTPUT1_TYPE temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
inline void FUNC(swap_out_counts)(__global OUTPUT3_TYPE* a, __global OUTPUT3_TYPE* b) {
|
||||
const OUTPUT3_TYPE temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
#ifndef FLATTENED
|
||||
inline bool FUNC(compare_slices_ascending)(OPTIONAL_SHAPE_INFO_ARG const __global OUTPUT_TYPE* out_unique_elements,
|
||||
uint lhs,
|
||||
uint rhs) {
|
||||
ITERATE(
|
||||
if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] > out_unique_elements[GET_INDEX(OUTPUT, rhs)]) {
|
||||
return true;
|
||||
} else if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] < out_unique_elements[GET_INDEX(OUTPUT, rhs)]) {
|
||||
return false;
|
||||
} else { continue; })
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void FUNC(swap_slices)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements, uint lhs, uint rhs) {
|
||||
ITERATE(FUNC_CALL(swap_out_unique_elements)(&out_unique_elements[GET_INDEX(OUTPUT, lhs)],
|
||||
&out_unique_elements[GET_INDEX(OUTPUT, rhs)]);)
|
||||
}
|
||||
|
||||
inline bool FUNC(slices_are_equal)(OPTIONAL_SHAPE_INFO_ARG const __global OUTPUT_TYPE* out_unique_elements,
|
||||
uint lhs,
|
||||
const __global INPUT0_TYPE* input,
|
||||
uint rhs) {
|
||||
ITERATE(if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] != input[GET_INDEX(INPUT0, rhs)]) { return false; })
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void FUNC(assign_slice)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements,
|
||||
uint lhs,
|
||||
const __global INPUT0_TYPE* input,
|
||||
uint rhs) {
|
||||
ITERATE(out_unique_elements[GET_INDEX(OUTPUT, lhs)] = input[GET_INDEX(INPUT0, rhs)];)
|
||||
}
|
||||
#endif
|
||||
|
||||
// We use bubble sort here, because we need stable sort
|
||||
// TODO: Change to better stable sort algorithm
|
||||
inline void FUNC(bubbleSort)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements,
|
||||
__global OUTPUT1_TYPE* out_indices,
|
||||
__global OUTPUT3_TYPE* out_counts,
|
||||
int l,
|
||||
int h) {
|
||||
for (int i = 0; i < h - l; ++i) {
|
||||
bool swapped = false;
|
||||
for (int j = l; j < h - i; ++j) {
|
||||
#ifdef FLATTENED
|
||||
int j1 = j + 1;
|
||||
if ((out_unique_elements[GET_INDEX(OUTPUT, j)] > out_unique_elements[GET_INDEX(OUTPUT, j1)])) {
|
||||
FUNC_CALL(swap_out_unique_elements)
|
||||
(&out_unique_elements[GET_INDEX(OUTPUT, j)], &out_unique_elements[GET_INDEX(OUTPUT, j1)]);
|
||||
#else
|
||||
if (FUNC_CALL(compare_slices_ascending)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, j + 1)) {
|
||||
FUNC_CALL(swap_slices)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, j + 1);
|
||||
#endif
|
||||
FUNC_CALL(swap_out_indices)(&out_indices[j], &out_indices[j + 1]);
|
||||
FUNC_CALL(swap_out_counts)(&out_counts[j], &out_counts[j + 1]);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
if (!swapped) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Works on unsorted data, but has worse complexity
|
||||
inline uint FUNC(unique)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* out_unique_elements,
|
||||
__global OUTPUT1_TYPE* out_indices,
|
||||
__global OUTPUT2_TYPE* out_rev_indices,
|
||||
__global OUTPUT3_TYPE* out_counts,
|
||||
uint first,
|
||||
const uint last) {
|
||||
uint unique_length = 0;
|
||||
for (; first != last; ++first) {
|
||||
bool unique = true;
|
||||
for (uint unique_idx = 0; unique_idx < unique_length; ++unique_idx) {
|
||||
#ifdef FLATTENED
|
||||
if (out_unique_elements[GET_INDEX(OUTPUT, unique_idx)] == input[GET_INDEX(INPUT0, first)]) {
|
||||
#else
|
||||
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_idx, input, first)) {
|
||||
#endif
|
||||
unique = false;
|
||||
out_rev_indices[first] = unique_idx;
|
||||
++out_counts[unique_idx];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (unique) {
|
||||
#ifdef FLATTENED
|
||||
out_unique_elements[GET_INDEX(OUTPUT, unique_length)] = input[GET_INDEX(INPUT0, first)];
|
||||
#else
|
||||
FUNC_CALL(assign_slice)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_length, input, first);
|
||||
#endif
|
||||
out_indices[unique_length] = first;
|
||||
out_rev_indices[first] = unique_length;
|
||||
++out_counts[unique_length];
|
||||
++unique_length;
|
||||
}
|
||||
}
|
||||
return unique_length;
|
||||
}
|
||||
|
||||
inline uint FUNC(fill_out_rev_indices)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* out_unique_elements,
|
||||
__global OUTPUT2_TYPE* out_rev_indices,
|
||||
const uint end) {
|
||||
for (uint i = 0; i < LENGTH; ++i) {
|
||||
for (uint j = 0; j < end; ++j) {
|
||||
#ifdef FLATTENED
|
||||
if (out_unique_elements[GET_INDEX(OUTPUT, j)] == input[GET_INDEX(INPUT0, i)]) {
|
||||
#else
|
||||
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, input, i)) {
|
||||
#endif
|
||||
out_rev_indices[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL(unique_gather_ref)
|
||||
(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
|
||||
const __global INPUT1_TYPE* out_total_count,
|
||||
__global OUTPUT_TYPE* out_unique_elements,
|
||||
__global OUTPUT1_TYPE* out_indices,
|
||||
__global OUTPUT2_TYPE* out_rev_indices,
|
||||
__global OUTPUT3_TYPE* out_counts) {
|
||||
// TODO: Think of better approach to initialize with zeros
|
||||
for (uint i = 0; i < LENGTH; ++i) {
|
||||
out_counts[i] = 0;
|
||||
}
|
||||
// Run unique algorithm
|
||||
const uint end = FUNC_CALL(unique)(OPTIONAL_SHAPE_INFO_TENSOR input,
|
||||
out_unique_elements,
|
||||
out_indices,
|
||||
out_rev_indices,
|
||||
out_counts,
|
||||
0,
|
||||
LENGTH);
|
||||
#ifdef SORTED
|
||||
// Sort out data
|
||||
FUNC_CALL(bubbleSort)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, out_indices, out_counts, 0, end - 1);
|
||||
// After sorting all out_unique_elements will shuffle and out_rev_indices should change not only order, but their
|
||||
// values (indexes).
|
||||
// So, we need to fill them again...
|
||||
// Another approach would be to allocate whole separate buffer as input, sort whole dataset first and then run
|
||||
// deduplicate algorithm with correct filling of out_rev_indices.
|
||||
FUNC_CALL(fill_out_rev_indices)
|
||||
(OPTIONAL_SHAPE_INFO_TENSOR input, out_unique_elements, out_rev_indices, end);
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef LENGTH
|
@ -93,7 +93,9 @@ enum class KernelType {
|
||||
PRIOR_BOX,
|
||||
EYE,
|
||||
GENERATE_PROPOSALS,
|
||||
MULTICLASS_NMS
|
||||
MULTICLASS_NMS,
|
||||
UNIQUE_COUNT,
|
||||
UNIQUE_GATHER,
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -0,0 +1,343 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "unique_kernel_ref.hpp"
|
||||
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
namespace {
|
||||
|
||||
JitConstants MakeAxisJitConstants(size_t rank, int64_t axis, const std::string& prefix_for_iterate) {
|
||||
const std::map<char, std::string> dimensions_sizes_map = {
|
||||
{'b', "_BATCH_NUM"},
|
||||
{'f', "_FEATURE_NUM"},
|
||||
{'w', "_SIZE_W"},
|
||||
{'z', "_SIZE_Z"},
|
||||
{'y', "_SIZE_Y"},
|
||||
{'x', "_SIZE_X"},
|
||||
};
|
||||
|
||||
auto dimensions = [rank]() -> std::vector<char> {
|
||||
switch (rank) {
|
||||
case 4:
|
||||
return {'b', 'f', 'y', 'x'};
|
||||
case 5:
|
||||
return {'b', 'f', 'z', 'y', 'x'};
|
||||
case 6:
|
||||
return {'b', 'f', 'w', 'z', 'y', 'x'};
|
||||
}
|
||||
throw std::invalid_argument("Unsupported input rank for unique primitive");
|
||||
}();
|
||||
auto& axis_dimension = dimensions.at(axis);
|
||||
|
||||
const auto axis_length_name = "AXIS_LENGTH";
|
||||
const auto axis_length_val = "INPUT0" + dimensions_sizes_map.at(axis_dimension);
|
||||
|
||||
// Mark axis dimension as 'i' for indexing
|
||||
axis_dimension = 'i';
|
||||
|
||||
const auto get_index_name = "GET_INDEX(prefix, i)";
|
||||
const auto get_index_val = [&dimensions]() {
|
||||
std::string str = "CAT(prefix, _GET_INDEX)";
|
||||
str += '(';
|
||||
for (auto ch : dimensions) {
|
||||
str += ch;
|
||||
str += ',';
|
||||
}
|
||||
str.back() = ')';
|
||||
return str;
|
||||
}();
|
||||
|
||||
const auto iterate_name = "ITERATE(body)";
|
||||
const auto iterate_val = [&dimensions, &dimensions_sizes_map, &prefix_for_iterate]() {
|
||||
std::stringstream ss;
|
||||
for (auto ch : dimensions) {
|
||||
// No need to iterate through axis index
|
||||
if (ch == 'i') {
|
||||
continue;
|
||||
}
|
||||
const auto size = prefix_for_iterate + dimensions_sizes_map.at(ch);
|
||||
ss << "for (uint " << ch << " = 0; " << ch << " < " << size << "; ++" << ch << ") {";
|
||||
}
|
||||
ss << "body";
|
||||
// Note size - 1 here as we don't iterate through axis index
|
||||
for (auto i = 0U; i < dimensions.size() - 1; ++i) {
|
||||
ss << '}';
|
||||
}
|
||||
return ss.str();
|
||||
}();
|
||||
|
||||
return {MakeJitConstant(axis_length_name, axis_length_val),
|
||||
MakeJitConstant(get_index_name, get_index_val),
|
||||
MakeJitConstant(iterate_name, iterate_val)};
|
||||
}
|
||||
|
||||
JitConstants MakeFlattenedJitConstants(size_t rank, bool simple_layout) {
|
||||
const auto get_index_name = "GET_INDEX(prefix, i)";
|
||||
|
||||
if (simple_layout) {
|
||||
const auto get_index_val = "i";
|
||||
return {MakeJitConstant("FLATTENED", true), MakeJitConstant(get_index_name, get_index_val)};
|
||||
}
|
||||
|
||||
const auto dimensions = [rank]() -> std::vector<std::string> {
|
||||
switch (rank) {
|
||||
case 4:
|
||||
return {"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_FEATURE_NUM)",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_FEATURE_NUM",
|
||||
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
|
||||
"i % prefix##_SIZE_X"};
|
||||
case 5:
|
||||
return {"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_FEATURE_NUM)",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z) % prefix##_FEATURE_NUM",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_SIZE_Z",
|
||||
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
|
||||
"i % prefix##_SIZE_X"};
|
||||
case 6:
|
||||
return {
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_SIZE_W * prefix##_FEATURE_NUM)",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_SIZE_W) % prefix##_FEATURE_NUM",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z) % prefix##_SIZE_W",
|
||||
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_SIZE_Z",
|
||||
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
|
||||
"i % prefix##_SIZE_X"};
|
||||
}
|
||||
throw std::invalid_argument("Unsupported rank for unique primitive");
|
||||
}();
|
||||
|
||||
const auto get_index_val = [&dimensions]() {
|
||||
std::string str = "CAT(prefix, _GET_INDEX)";
|
||||
str += '(';
|
||||
for (const auto& dimension : dimensions) {
|
||||
str += dimension;
|
||||
str += ',';
|
||||
}
|
||||
str.back() = ')';
|
||||
return str;
|
||||
}();
|
||||
|
||||
return {MakeJitConstant("FLATTENED", true), MakeJitConstant(get_index_name, get_index_val)};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
KernelsData UniqueCountKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto kernel_data = KernelData::Default<unique_count_params>(params);
|
||||
const auto& kernel_params = dynamic_cast<const unique_count_params&>(*kernel_data.params);
|
||||
const auto dispatch_data = SetDefault(kernel_params);
|
||||
const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options);
|
||||
const auto jit_constants = GetJitConstants(kernel_params);
|
||||
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
|
||||
auto& kernel = kernel_data.kernels.front();
|
||||
|
||||
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = dynamic_cast<const unique_count_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
// Need to adjust buffer size according to input size
|
||||
kd.internalBufferSizes.front() = prim_params.inputs.front().PhysicalSizeInBytes();
|
||||
kd.internalBufferDataType = prim_params.inputs.front().GetDType();
|
||||
};
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
dispatch_data,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
{},
|
||||
false,
|
||||
false,
|
||||
static_cast<int>(kernel_params.inputs.size()),
|
||||
GetFusedPrimitiveInputsCount(kernel_params),
|
||||
static_cast<int>(kernel_params.outputs.size()),
|
||||
kernel_params.inputs.front().is_dynamic());
|
||||
|
||||
// Additional buffer to save intermediate algorithm results
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
|
||||
kernel_data.internalBufferSizes.push_back(kernel_params.inputs.front().PhysicalSizeInBytes());
|
||||
kernel_data.internalBufferDataType = kernel_params.inputs.front().GetDType();
|
||||
|
||||
return {kernel_data};
|
||||
}
|
||||
|
||||
ParamsKey UniqueCountKernelRef::GetSupportedKey() const {
|
||||
ParamsKey key;
|
||||
key.EnableAllInputDataType();
|
||||
key.EnableAllOutputDataType();
|
||||
key.EnableDifferentTypes();
|
||||
key.EnableAllInputLayout();
|
||||
key.EnableAllOutputLayout();
|
||||
key.EnableTensorOffset();
|
||||
key.EnableTensorPitches();
|
||||
key.EnableBatching();
|
||||
key.EnableDynamicShapesSupport();
|
||||
return key;
|
||||
}
|
||||
|
||||
bool UniqueCountKernelRef::Validate(const Params& params, const optional_params& options) const {
|
||||
if (params.GetType() != KernelType::UNIQUE_COUNT || options.GetType() != KernelType::UNIQUE_COUNT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& kernel_params = dynamic_cast<const unique_count_params&>(params);
|
||||
if (kernel_params.inputs.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
if (kernel_params.outputs.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants UniqueCountKernelRef::GetJitConstants(const unique_count_params& kernel_params) const {
|
||||
const auto input = kernel_params.inputs.front();
|
||||
auto jit_constants = MakeBaseParamsJitConstants(kernel_params);
|
||||
|
||||
if (kernel_params.flattened) {
|
||||
jit_constants.Merge(MakeFlattenedJitConstants(input.Dimentions(), input.SimpleLayout()));
|
||||
} else {
|
||||
jit_constants.Merge(MakeAxisJitConstants(input.Dimentions(), kernel_params.axis, "INPUT0"));
|
||||
}
|
||||
|
||||
if (input.is_dynamic()) {
|
||||
DimensionAccessHelper dims(input);
|
||||
const std::string total_data_size =
|
||||
toVectorMulString({dims.x(), dims.y(), dims.z(), dims.w(), dims.f(), dims.b()});
|
||||
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", total_data_size));
|
||||
} else {
|
||||
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", input.LogicalSize()));
|
||||
}
|
||||
|
||||
return jit_constants;
|
||||
}
|
||||
|
||||
CommonDispatchData UniqueCountKernelRef::SetDefault(const unique_count_params& /* kernel_params */) {
|
||||
CommonDispatchData dispatch_data;
|
||||
|
||||
// For now we run only in one thread
|
||||
// TODO: Parallelize
|
||||
dispatch_data.gws = {1, 1, 1};
|
||||
dispatch_data.lws = {1, 1, 1};
|
||||
|
||||
return dispatch_data;
|
||||
}
|
||||
|
||||
KernelsData UniqueGatherKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
auto kernel_data = KernelData::Default<unique_gather_params>(params);
|
||||
const auto& kernel_params = dynamic_cast<const unique_gather_params&>(*kernel_data.params);
|
||||
const auto dispatch_data = SetDefault(kernel_params);
|
||||
const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options);
|
||||
const auto jit_constants = GetJitConstants(kernel_params);
|
||||
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
|
||||
auto& kernel = kernel_data.kernels.front();
|
||||
|
||||
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
|
||||
const auto& prim_params = dynamic_cast<const unique_gather_params&>(params);
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
|
||||
};
|
||||
|
||||
FillCLKernelData(kernel,
|
||||
dispatch_data,
|
||||
params.engineInfo,
|
||||
kernelName,
|
||||
jit,
|
||||
entry_point,
|
||||
{},
|
||||
false,
|
||||
false,
|
||||
static_cast<int>(kernel_params.inputs.size()),
|
||||
GetFusedPrimitiveInputsCount(kernel_params),
|
||||
static_cast<int>(kernel_params.outputs.size()),
|
||||
kernel_params.outputs.front().is_dynamic());
|
||||
|
||||
return {kernel_data};
|
||||
}
|
||||
|
||||
ParamsKey UniqueGatherKernelRef::GetSupportedKey() const {
|
||||
ParamsKey key;
|
||||
key.EnableAllInputDataType();
|
||||
key.EnableAllOutputDataType();
|
||||
key.EnableDifferentTypes();
|
||||
key.EnableAllInputLayout();
|
||||
key.EnableAllOutputLayout();
|
||||
key.EnableTensorOffset();
|
||||
key.EnableTensorPitches();
|
||||
key.EnableBatching();
|
||||
key.EnableDynamicShapesSupport();
|
||||
return key;
|
||||
}
|
||||
|
||||
bool UniqueGatherKernelRef::Validate(const Params& params, const optional_params& options) const {
|
||||
if (params.GetType() != KernelType::UNIQUE_GATHER || options.GetType() != KernelType::UNIQUE_GATHER) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& kernel_params = dynamic_cast<const unique_gather_params&>(params);
|
||||
if (kernel_params.inputs.size() != 2) {
|
||||
return false;
|
||||
}
|
||||
if (kernel_params.outputs.size() != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants UniqueGatherKernelRef::GetJitConstants(const unique_gather_params& kernel_params) const {
|
||||
const auto input = kernel_params.inputs.front();
|
||||
auto jit_constants = MakeBaseParamsJitConstants(kernel_params);
|
||||
|
||||
if (kernel_params.sorted) {
|
||||
jit_constants.AddConstant(MakeJitConstant("SORTED", true));
|
||||
}
|
||||
|
||||
if (kernel_params.flattened) {
|
||||
jit_constants.Merge(MakeFlattenedJitConstants(input.Dimentions(), input.SimpleLayout()));
|
||||
} else {
|
||||
jit_constants.Merge(MakeAxisJitConstants(input.Dimentions(), kernel_params.axis, "OUTPUT"));
|
||||
}
|
||||
|
||||
if (input.is_dynamic()) {
|
||||
DimensionAccessHelper dims(input);
|
||||
const std::string total_data_size =
|
||||
toVectorMulString({dims.x(), dims.y(), dims.z(), dims.w(), dims.f(), dims.b()});
|
||||
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", total_data_size));
|
||||
} else {
|
||||
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", input.LogicalSize()));
|
||||
}
|
||||
|
||||
return jit_constants;
|
||||
}
|
||||
|
||||
CommonDispatchData UniqueGatherKernelRef::SetDefault(const unique_gather_params& /* kernel_params */) {
|
||||
CommonDispatchData dispatch_data;
|
||||
|
||||
// For now we run only in one thread
|
||||
// TODO: Parallelize
|
||||
dispatch_data.gws = {1, 1, 1};
|
||||
dispatch_data.lws = {1, 1, 1};
|
||||
|
||||
return dispatch_data;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
/**
|
||||
* UniqueCount reference kernel parameters.
|
||||
*/
|
||||
struct unique_count_params : base_params {
|
||||
unique_count_params() : base_params(KernelType::UNIQUE_COUNT) {}
|
||||
bool flattened{};
|
||||
int64_t axis{};
|
||||
};
|
||||
|
||||
/**
|
||||
* UniqueCount reference kernel optional parameters.
|
||||
*/
|
||||
struct unique_count_optional_params : optional_params {
|
||||
unique_count_optional_params() : optional_params(KernelType::UNIQUE_COUNT) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Reference kernel for UniqueCount.
|
||||
*/
|
||||
class UniqueCountKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
UniqueCountKernelRef() : KernelBaseOpenCL{"unique_count_ref"} {}
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& params, const optional_params& options) const override;
|
||||
JitConstants GetJitConstants(const unique_count_params& kernel_params) const;
|
||||
static CommonDispatchData SetDefault(const unique_count_params& kernel_params);
|
||||
};
|
||||
|
||||
/**
|
||||
* UniqueGather reference kernel parameters.
|
||||
*/
|
||||
struct unique_gather_params : base_params {
|
||||
unique_gather_params() : base_params(KernelType::UNIQUE_GATHER) {}
|
||||
bool flattened{};
|
||||
int64_t axis{};
|
||||
bool sorted{};
|
||||
};
|
||||
|
||||
/**
|
||||
* UniqueGather reference kernel optional parameters.
|
||||
*/
|
||||
struct unique_gather_optional_params : optional_params {
|
||||
unique_gather_optional_params() : optional_params(KernelType::UNIQUE_GATHER) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Reference kernel for UniqueGather.
|
||||
*/
|
||||
class UniqueGatherKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
UniqueGatherKernelRef() : KernelBaseOpenCL{"unique_gather_ref"} {}
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& params, const optional_params& options) const override;
|
||||
JitConstants GetJitConstants(const unique_gather_params& kernel_params) const;
|
||||
static CommonDispatchData SetDefault(const unique_gather_params& kernel_params);
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "unique_kernel_selector.hpp"
|
||||
|
||||
#include "unique_kernel_ref.hpp"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
unique_count_kernel_selector::unique_count_kernel_selector() {
|
||||
Attach<UniqueCountKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData unique_count_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::UNIQUE_COUNT);
|
||||
}
|
||||
|
||||
unique_count_kernel_selector& unique_count_kernel_selector::Instance() {
|
||||
static unique_count_kernel_selector instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
unique_gather_kernel_selector::unique_gather_kernel_selector() {
|
||||
Attach<UniqueGatherKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData unique_gather_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::UNIQUE_GATHER);
|
||||
}
|
||||
|
||||
unique_gather_kernel_selector& unique_gather_kernel_selector::Instance() {
|
||||
static unique_gather_kernel_selector instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class unique_count_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
unique_count_kernel_selector();
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
static unique_count_kernel_selector& Instance();
|
||||
};
|
||||
|
||||
class unique_gather_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
unique_gather_kernel_selector();
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
static unique_gather_kernel_selector& Instance();
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
54
src/plugins/intel_gpu/src/plugin/ops/unique.cpp
Normal file
54
src/plugins/intel_gpu/src/plugin/ops/unique.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/primitives/unique.hpp"
|
||||
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "ngraph/op/unique.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
|
||||
void CreateUniqueOp(Program& p, const std::shared_ptr<ngraph::op::v10::Unique>& op) {
|
||||
validate_inputs_count(op, {1, 2});
|
||||
|
||||
bool flattened = true;
|
||||
int64_t axis{};
|
||||
if (op->get_input_size() == 2) {
|
||||
auto axis_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
|
||||
if (!axis_constant) {
|
||||
IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() << " ("
|
||||
<< op->get_type_name() << ")";
|
||||
}
|
||||
axis = axis_constant->cast_vector<int64_t>().at(0);
|
||||
axis = ov::normalize_axis(op.get(), axis, op->get_input_partial_shape(0).rank());
|
||||
flattened = false;
|
||||
}
|
||||
|
||||
const auto input = p.GetInputInfo(op).front();
|
||||
const auto layer_name = layer_type_name_ID(op);
|
||||
const auto count_prim_id = layer_name + "_count";
|
||||
|
||||
const cldnn::unique_count unique_count_prim(count_prim_id, input, flattened, axis);
|
||||
p.add_primitive(*op, unique_count_prim);
|
||||
|
||||
const cldnn::unique_gather unique_gather_prim(layer_name,
|
||||
{input, count_prim_id},
|
||||
flattened,
|
||||
axis,
|
||||
op->get_sorted(),
|
||||
cldnn::element_type_to_data_type(op->get_input_element_type(0)),
|
||||
cldnn::element_type_to_data_type(op->get_index_element_type()),
|
||||
cldnn::element_type_to_data_type(op->get_count_element_type()));
|
||||
p.add_primitive(*op, unique_gather_prim);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_FACTORY_IMPL(v10, Unique);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
@ -126,5 +126,8 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*smoke_GroupDeconv_2D_Dynamic_.*FP32/GroupDeconvolutionLayerGPUTest.CompareWithRefs.*)",
|
||||
// Issue: 111440
|
||||
R"(.*smoke_set1/GatherElementsGPUTest.CompareWithRefs.*)",
|
||||
// For some strange reason (bug?) output format cannot have a rank greater than 4 for dynamic shape case,
|
||||
// because it crashes in some random places during "reorder_inputs" pass.
|
||||
R"(.*UniqueLayerDynamicGPUTest.*\(\d*\.\d*\.\d*\.\d*\.\d*\).*axis.*)",
|
||||
};
|
||||
}
|
||||
|
@ -0,0 +1,170 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "common_test_utils/ov_tensor_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
|
||||
namespace GPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<std::vector<InputShape>, // Input shapes
|
||||
std::tuple<bool, int>, // Is flattened and axis
|
||||
bool, // Sorted
|
||||
ElementType // Data precision
|
||||
>
|
||||
UniqueDynamicGPUTestParams;
|
||||
|
||||
class UniqueLayerDynamicGPUTest : public testing::WithParamInterface<UniqueDynamicGPUTestParams>,
|
||||
virtual public SubgraphBaseTest {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<UniqueDynamicGPUTestParams>& obj) {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<bool, int> flatOrAxis;
|
||||
bool sorted;
|
||||
ElementType dataPrecision;
|
||||
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=(";
|
||||
for (size_t i = 0lu; i < inputShapes.size(); i++) {
|
||||
result << CommonTestUtils::partialShape2str({inputShapes[i].first})
|
||||
<< (i < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << ")_TS=";
|
||||
for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) {
|
||||
result << "{";
|
||||
for (size_t j = 0lu; j < inputShapes.size(); j++) {
|
||||
result << CommonTestUtils::vec2str(inputShapes[j].second[i])
|
||||
<< (j < inputShapes.size() - 1lu ? "_" : "");
|
||||
}
|
||||
result << "}_";
|
||||
}
|
||||
|
||||
if (!std::get<0>(flatOrAxis)) {
|
||||
result << "axis=" << std::get<1>(flatOrAxis) << "_";
|
||||
} else {
|
||||
result << "flattened"
|
||||
<< "_";
|
||||
}
|
||||
result << "sorted=" << (sorted ? "True" : "False") << "_";
|
||||
result << "dataPrc=" << dataPrecision;
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::tuple<bool, int> flatOrAxis;
|
||||
bool sorted, flattened;
|
||||
int axis;
|
||||
ElementType dataPrecision;
|
||||
|
||||
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision) = this->GetParam();
|
||||
targetDevice = CommonTestUtils::DEVICE_GPU;
|
||||
init_input_shapes(inputShapes);
|
||||
flattened = std::get<0>(flatOrAxis);
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(dataPrecision, inputDynamicShapes);
|
||||
params[0]->set_friendly_name("data");
|
||||
auto paramOuts =
|
||||
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
|
||||
std::shared_ptr<ov::Node> uniqueNode;
|
||||
if (flattened) {
|
||||
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0], sorted);
|
||||
} else {
|
||||
axis = std::get<1>(flatOrAxis);
|
||||
uniqueNode = std::make_shared<ov::op::v10::Unique>(
|
||||
paramOuts[0],
|
||||
ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}),
|
||||
sorted);
|
||||
}
|
||||
|
||||
// Need to create results for all outputs
|
||||
ngraph::ResultVector results;
|
||||
for (auto i = 0U; i < uniqueNode->get_output_size(); ++i) {
|
||||
results.push_back(std::make_shared<ngraph::opset1::Result>(uniqueNode->output(i)));
|
||||
}
|
||||
|
||||
function = std::make_shared<ngraph::Function>(results, params, "Unique");
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
|
||||
for (size_t i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::Tensor tensor;
|
||||
|
||||
if (funcInput.get_node()->get_friendly_name() == "data") {
|
||||
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(),
|
||||
targetInputStaticShapes[0].end(),
|
||||
1,
|
||||
std::multiplies<size_t>());
|
||||
tensor = utils::create_and_fill_tensor(funcInput.get_element_type(),
|
||||
targetInputStaticShapes[0],
|
||||
range,
|
||||
-range / 2,
|
||||
1);
|
||||
}
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(UniqueLayerDynamicGPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
run();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<ElementType> dataPrecision = {
|
||||
ElementType::f16,
|
||||
ElementType::i32,
|
||||
};
|
||||
|
||||
std::vector<std::tuple<bool, int>> flatOrAxis{{true, 0}, {false, 0}, {false, 1}, {false, -1}};
|
||||
|
||||
std::vector<bool> sorted{true, false};
|
||||
|
||||
std::vector<std::vector<InputShape>> getStaticShapes() {
|
||||
return {
|
||||
{{{}, {{7, 2, 3}}}},
|
||||
{{{}, {{7, 2, 3, 5}}}},
|
||||
{{{}, {{7, 2, 3, 5, 4}}}},
|
||||
};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_static,
|
||||
UniqueLayerDynamicGPUTest,
|
||||
::testing::Combine(::testing::ValuesIn(getStaticShapes()),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecision)),
|
||||
UniqueLayerDynamicGPUTest::getTestCaseName);
|
||||
|
||||
std::vector<std::vector<InputShape>> getDynamicShapes() {
|
||||
return {
|
||||
{{{ov::Dimension(2, 15), -1, -1, -1}, // Dynamic shape
|
||||
{{8, 3, 3, 3}, {6, 5, 2, 5}, {4, 7, 1, 11}, {2, 9, 3, 4}}}}, // Target shapes
|
||||
{{{-1, -1, -1, -1, -1}, // Dynamic shape
|
||||
{{1, 2, 1, 13, 2}, {3, 4, 7, 2, 2}, {5, 6, 3, 5, 2}, {7, 8, 4, 4, 2}}}}, // Target shapes
|
||||
};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_dynamic,
|
||||
UniqueLayerDynamicGPUTest,
|
||||
::testing::Combine(::testing::ValuesIn(getDynamicShapes()),
|
||||
::testing::ValuesIn(flatOrAxis),
|
||||
::testing::ValuesIn(sorted),
|
||||
::testing::ValuesIn(dataPrecision)),
|
||||
UniqueLayerDynamicGPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace GPULayerTestsDefinitions
|
394
src/plugins/intel_gpu/tests/unit/test_cases/unique_gpu_test.cpp
Normal file
394
src/plugins/intel_gpu/tests/unit/test_cases/unique_gpu_test.cpp
Normal file
@ -0,0 +1,394 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <test_utils/test_utils.h>
|
||||
|
||||
#include <intel_gpu/primitives/unique.hpp>
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename vecElementType>
|
||||
std::string vec2str(const std::vector<vecElementType>& vec) {
|
||||
if (!vec.empty()) {
|
||||
std::ostringstream result;
|
||||
result << "(";
|
||||
std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<vecElementType>(result, "."));
|
||||
result << vec.back() << ")";
|
||||
return result.str();
|
||||
}
|
||||
return "()";
|
||||
}
|
||||
|
||||
template <class ElemT, class IndexT, class CountT>
|
||||
struct unique_test_inputs {
|
||||
ov::Shape data_shape;
|
||||
std::vector<ElemT> input_data;
|
||||
std::vector<ElemT> expected_unique_values;
|
||||
std::vector<IndexT> expected_indices;
|
||||
std::vector<IndexT> expected_rev_indices;
|
||||
std::vector<CountT> expected_counts;
|
||||
bool flattened;
|
||||
int64_t axis;
|
||||
bool sorted;
|
||||
};
|
||||
|
||||
template <class ElemT, class IndexT, class CountT>
|
||||
using unique_test_params = std::tuple<unique_test_inputs<ElemT, IndexT, CountT>, format::type>;
|
||||
|
||||
template <class ElemT, class IndexT, class CountT>
|
||||
struct unique_gpu_test : public testing::TestWithParam<unique_test_params<ElemT, IndexT, CountT>> {
|
||||
public:
|
||||
void test() {
|
||||
format::type fmt;
|
||||
unique_test_inputs<ElemT, IndexT, CountT> p;
|
||||
std::tie(p, fmt) = testing::TestWithParam<unique_test_params<ElemT, IndexT, CountT>>::GetParam();
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
const auto elem_data_type = type_to_data_type<ElemT>::value;
|
||||
const auto index_data_type = type_to_data_type<IndexT>::value;
|
||||
const auto count_data_type = type_to_data_type<CountT>::value;
|
||||
const auto plain_format = format::bfyx;
|
||||
|
||||
const layout in_layout(p.data_shape, elem_data_type, plain_format);
|
||||
auto input = engine.allocate_memory(in_layout);
|
||||
set_values(input, p.input_data);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("reordered_input", input_info("input"), fmt, elem_data_type));
|
||||
topology.add(unique_count("unique_count", {input_info("reordered_input")}, p.flattened, p.axis));
|
||||
topology.add(unique_gather("unique_gather",
|
||||
{input_info("reordered_input"), input_info("unique_count")},
|
||||
p.flattened,
|
||||
p.axis,
|
||||
p.sorted,
|
||||
elem_data_type,
|
||||
index_data_type,
|
||||
count_data_type));
|
||||
topology.add(reorder("expected_unique_values", input_info("unique_gather", 0), plain_format, elem_data_type));
|
||||
topology.add(reorder("expected_indices", input_info("unique_gather", 1), plain_format, index_data_type));
|
||||
topology.add(reorder("expected_rev_indices", input_info("unique_gather", 2), plain_format, index_data_type));
|
||||
topology.add(reorder("expected_counts", input_info("unique_gather", 3), plain_format, count_data_type));
|
||||
|
||||
auto config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
|
||||
const auto outputs = network.execute();
|
||||
|
||||
const auto expected_unique_values = outputs.at("expected_unique_values").get_memory();
|
||||
cldnn::mem_lock<ElemT> expected_unique_values_ptr(expected_unique_values, get_test_stream());
|
||||
ASSERT_EQ(expected_unique_values_ptr.size(), p.expected_unique_values.size());
|
||||
for (auto i = 0U; i < expected_unique_values_ptr.size(); ++i) {
|
||||
ASSERT_EQ(expected_unique_values_ptr[i], p.expected_unique_values[i]);
|
||||
}
|
||||
|
||||
const auto expected_indices = outputs.at("expected_indices").get_memory();
|
||||
cldnn::mem_lock<IndexT> expected_indices_ptr(expected_indices, get_test_stream());
|
||||
ASSERT_EQ(expected_indices_ptr.size(), p.expected_indices.size());
|
||||
for (auto i = 0U; i < expected_indices_ptr.size(); ++i) {
|
||||
ASSERT_EQ(expected_indices_ptr[i], p.expected_indices[i]);
|
||||
}
|
||||
|
||||
const auto expected_rev_indices = outputs.at("expected_rev_indices").get_memory();
|
||||
cldnn::mem_lock<IndexT> expected_rev_indices_ptr(expected_rev_indices, get_test_stream());
|
||||
ASSERT_EQ(expected_rev_indices_ptr.size(), p.expected_rev_indices.size());
|
||||
for (auto i = 0U; i < expected_rev_indices_ptr.size(); ++i) {
|
||||
ASSERT_EQ(expected_rev_indices_ptr[i], p.expected_rev_indices[i]);
|
||||
}
|
||||
|
||||
const auto expected_counts = outputs.at("expected_counts").get_memory();
|
||||
cldnn::mem_lock<CountT> expected_counts_ptr(expected_counts, get_test_stream());
|
||||
ASSERT_EQ(expected_counts_ptr.size(), p.expected_counts.size());
|
||||
for (auto i = 0U; i < expected_counts_ptr.size(); ++i) {
|
||||
ASSERT_EQ(expected_counts_ptr[i], p.expected_counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string PrintToStringParamName(
|
||||
const testing::TestParamInfo<unique_test_params<ElemT, IndexT, CountT>>& info) {
|
||||
format::type fmt;
|
||||
unique_test_inputs<ElemT, IndexT, CountT> p;
|
||||
std::tie(p, fmt) = info.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "data_shape=" << vec2str(p.data_shape) << "; ";
|
||||
result << "input_data=" << vec2str(p.input_data) << "; ";
|
||||
result << "data_type=" << type_to_data_type<ElemT>::value << "; ";
|
||||
result << "index_type=" << type_to_data_type<IndexT>::value << "; ";
|
||||
result << "counts_type=" << type_to_data_type<CountT>::value << "; ";
|
||||
result << "sorted=" << p.sorted << "; ";
|
||||
if (!p.flattened) {
|
||||
result << "axis=" << p.axis << "; ";
|
||||
}
|
||||
result << "fmt=" << fmt_to_str(fmt) << "; ";
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <class ElemT, class IndexT, class CountT>
|
||||
std::vector<unique_test_inputs<ElemT, IndexT, CountT>> getUniqueParams() {
|
||||
return {
|
||||
{
|
||||
ov::Shape{5},
|
||||
std::vector<ElemT>{5, 4, 3, 2, 1},
|
||||
std::vector<ElemT>{5, 4, 3, 2, 1},
|
||||
std::vector<IndexT>{0, 1, 2, 3, 4},
|
||||
std::vector<IndexT>{0, 1, 2, 3, 4},
|
||||
std::vector<CountT>{1, 1, 1, 1, 1},
|
||||
true,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{5},
|
||||
std::vector<ElemT>{5, 4, 3, 2, 1},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5},
|
||||
std::vector<IndexT>{4, 3, 2, 1, 0},
|
||||
std::vector<IndexT>{4, 3, 2, 1, 0},
|
||||
std::vector<CountT>{1, 1, 1, 1, 1},
|
||||
true,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{7},
|
||||
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
|
||||
std::vector<ElemT>{1, 3, 5, 2, 4},
|
||||
std::vector<IndexT>{0, 1, 2, 4, 5},
|
||||
std::vector<IndexT>{0, 1, 2, 1, 3, 4, 3},
|
||||
std::vector<CountT>{1, 2, 1, 2, 1},
|
||||
true,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{7},
|
||||
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5},
|
||||
std::vector<IndexT>{0, 4, 1, 5, 2},
|
||||
std::vector<IndexT>{0, 2, 4, 2, 1, 3, 1},
|
||||
std::vector<CountT>{1, 2, 2, 1, 1},
|
||||
true,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{7},
|
||||
std::vector<ElemT>{3, 1, 5, 3, 2, 4, 2},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5},
|
||||
std::vector<IndexT>{1, 4, 0, 5, 2},
|
||||
std::vector<IndexT>{2, 0, 4, 2, 1, 3, 1},
|
||||
std::vector<CountT>{1, 2, 2, 1, 1},
|
||||
true,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{7},
|
||||
std::vector<ElemT>{3, 3, 5, 3, 2, 4, 2},
|
||||
std::vector<ElemT>{2, 3, 4, 5},
|
||||
std::vector<IndexT>{4, 0, 5, 2},
|
||||
std::vector<IndexT>{1, 1, 3, 1, 0, 2, 0},
|
||||
std::vector<CountT>{2, 3, 1, 1},
|
||||
true,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{7},
|
||||
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5},
|
||||
std::vector<IndexT>{0, 4, 1, 5, 2},
|
||||
std::vector<IndexT>{0, 2, 4, 2, 1, 3, 1},
|
||||
std::vector<CountT>{1, 2, 2, 1, 1},
|
||||
false,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 6},
|
||||
std::vector<ElemT>{3, 5, 3, 2, 4, 2, 1, 2, 3, 4, 5, 6},
|
||||
std::vector<ElemT>{3, 5, 2, 4, 1, 6},
|
||||
std::vector<IndexT>{0, 1, 3, 4, 6, 11},
|
||||
std::vector<IndexT>{0, 1, 0, 2, 3, 2, 4, 2, 0, 3, 1, 5},
|
||||
std::vector<CountT>{3, 2, 3, 2, 1, 1},
|
||||
true,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 4},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<CountT>{1, 1},
|
||||
false,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 4},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
|
||||
std::vector<IndexT>{0, 1, 2, 3},
|
||||
std::vector<IndexT>{0, 1, 2, 3},
|
||||
std::vector<CountT>{1, 1, 1, 1},
|
||||
false,
|
||||
1,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 4},
|
||||
std::vector<ElemT>{1, 2, 2, 4, 1, 2, 2, 5},
|
||||
std::vector<ElemT>{1, 2, 4, 1, 2, 5},
|
||||
std::vector<IndexT>{0, 1, 3},
|
||||
std::vector<IndexT>{0, 1, 1, 2},
|
||||
std::vector<CountT>{1, 2, 1},
|
||||
false,
|
||||
1,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6},
|
||||
std::vector<ElemT>{1, 2, 3, 4, 5, 6},
|
||||
std::vector<IndexT>{0},
|
||||
std::vector<IndexT>{0, 0},
|
||||
std::vector<CountT>{2},
|
||||
false,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 3, 2},
|
||||
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
|
||||
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<CountT>{1, 1},
|
||||
false,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 3, 2},
|
||||
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
|
||||
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<CountT>{1, 1},
|
||||
false,
|
||||
0,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{6, 5, 4, 6, 5, 4, 3, 2, 1, 3, 2, 1},
|
||||
std::vector<ElemT>{6, 5, 4, 3, 2, 1},
|
||||
std::vector<IndexT>{0},
|
||||
std::vector<IndexT>{0, 0},
|
||||
std::vector<CountT>{2},
|
||||
false,
|
||||
1,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{-1, 2, -1, 5, -3, 5, 7, -8, 7, 4, 4, 4},
|
||||
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<IndexT>{0, 1, 0},
|
||||
std::vector<CountT>{2, 1},
|
||||
false,
|
||||
2,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{-1, -1, 2, 5, 5, -3, 7, 7, -8, 4, 4, 4},
|
||||
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
|
||||
std::vector<IndexT>{0, 2},
|
||||
std::vector<IndexT>{0, 0, 1},
|
||||
std::vector<CountT>{2, 1},
|
||||
false,
|
||||
2,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{2, -1, -1, -3, 5, 5, -8, 7, 7, 4, 4, 4},
|
||||
std::vector<ElemT>{2, -1, -3, 5, -8, 7, 4, 4},
|
||||
std::vector<IndexT>{0, 1},
|
||||
std::vector<IndexT>{0, 1, 1},
|
||||
std::vector<CountT>{1, 2},
|
||||
false,
|
||||
2,
|
||||
false,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{2, -1, -1, -3, 5, 5, -8, 7, 7, 4, 4, 4},
|
||||
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
|
||||
std::vector<IndexT>{1, 0},
|
||||
std::vector<IndexT>{1, 0, 0},
|
||||
std::vector<CountT>{2, 1},
|
||||
false,
|
||||
2,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{2, 2, 3},
|
||||
std::vector<ElemT>{-1, -1, -1, 3, 2, 2, 6, 7, 7, 4, 4, 4},
|
||||
std::vector<ElemT>{-1, -1, 2, 3, 7, 6, 4, 4},
|
||||
std::vector<IndexT>{1, 0},
|
||||
std::vector<IndexT>{1, 0, 0},
|
||||
std::vector<CountT>{2, 1},
|
||||
false,
|
||||
2,
|
||||
true,
|
||||
},
|
||||
{
|
||||
ov::Shape{1, 3, 16},
|
||||
std::vector<ElemT>{15, -20, -11, 10, -21, 8, -15, -10, 7, 20, -19, -14, -13, -16, -7, -2,
|
||||
-17, -4, 21, -6, 11, 8, 17, 6, 7, 20, -3, 2, -13, -16, -23, 14,
|
||||
-1, 12, 5, -6, 11, -8, 1, -10, 23, 20, -19, 18, 3, -16, -7, 14},
|
||||
std::vector<ElemT>{-23, -21, -20, -19, -17, -16, -15, -14, -13, -11, -10, -8, -7, -6, -4, -3, -2, -1,
|
||||
1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 14, 15, 17, 18, 20, 21, 23},
|
||||
std::vector<IndexT>{30, 4, 1, 10, 16, 13, 6, 11, 12, 2, 7, 37, 14, 19, 17, 26, 15, 32,
|
||||
38, 27, 44, 34, 23, 8, 5, 3, 20, 33, 31, 0, 22, 43, 9, 18, 40},
|
||||
std::vector<IndexT>{29, 2, 9, 25, 1, 24, 6, 10, 23, 32, 3, 7, 8, 5, 12, 16,
|
||||
4, 14, 33, 13, 26, 24, 30, 22, 23, 32, 15, 19, 8, 5, 0, 28,
|
||||
17, 27, 21, 13, 26, 11, 18, 10, 34, 32, 3, 31, 20, 5, 12, 28},
|
||||
std::vector<CountT>{1, 1, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 3, 1, 1},
|
||||
true,
|
||||
0,
|
||||
true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const std::vector<format::type> layout_formats = {format::bfyx, format::b_fs_yx_fsv16};
|
||||
|
||||
#define INSTANTIATE_UNIQUE_TEST_SUITE(elem_type, index_type, count_type) \
|
||||
using unique_gpu_test_##elem_type##index_type##count_type = unique_gpu_test<elem_type, index_type, count_type>; \
|
||||
TEST_P(unique_gpu_test_##elem_type##index_type##count_type, test) { \
|
||||
ASSERT_NO_FATAL_FAILURE(test()); \
|
||||
} \
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_unique_##elem_type##index_type##count_type, \
|
||||
unique_gpu_test_##elem_type##index_type##count_type, \
|
||||
testing::Combine(testing::ValuesIn(getUniqueParams<elem_type, index_type, count_type>()), \
|
||||
testing::ValuesIn(layout_formats)), \
|
||||
unique_gpu_test_##elem_type##index_type##count_type::PrintToStringParamName);
|
||||
|
||||
INSTANTIATE_UNIQUE_TEST_SUITE(float, int64_t, int32_t);
|
||||
|
||||
} // namespace
|
Loading…
Reference in New Issue
Block a user