[GPU] Unique-10 operation implementation. (#16412)

* [GPU] Unique-10 operation implementation.

* Handled flattened case.

* Created results for all outputs in single layer test.

* Save total unique count as fifth output.

* Handled axis case.

* Added unique reshape kernel.

* Moved data types to unique primitive constructor.

* Added shape agnostic Unique ref kernel.

* Added blocked layout support to Unique-10.

* Use int in bubble sort.

* Added unit tests.

* Added support for blocked layouts to flattened mode.

* Fixed usage of shape_info in kernel.

* Use correct total data size for dynamic shapes.

* Commented some functional tests.

For some reasons big shapes cause std::bad_alloc.

* Initialize out_counts with zeros.

* Implemented new approach for reducing memory footprint.

Changed first kernel to only count unique values and changed second kernel to fill all outputs.

* Revert "Commented some functional tests."

This reverts commit a7f9763c575e71e14b85ee37adf1e98f10785c15.

* Fixed calc output layouts for flattened case when rank in greater than 4.

* Added temporary fix for axis case when rank is greater than 4.

* Revert "Added temporary fix for axis case when rank is greater than 4."

This reverts commit 236640d2f0e9d5b1f8dcbbf9482763badd7fde66.

* Renamed "unique" to "unique_count" and "unique_reshape" to "unique_gather" primitives.

* Quick fix for add_intermediate_node to consider dep_idx of multiple output

* Fix bug for multiple output:
1) get_reorder was getting reorder from cache regardless of the dep_idx.
2) remove_redundant_reorder was not considering original dep_idx

* Fixed conflicts.

* Fixed win build issue.

* Fixed build issue.

* Revert "Fix bug for multiple output:"

This reverts commit d4a2c4f32eabe9108df31d4837fed8995c93bd1c.

* Revert "Quick fix for add_intermediate_node to consider dep_idx of multiple output"

This reverts commit 2dfd2aaefdf32067a7469505b35f7096632ac5f2.

* Added some tests to skip config.

---------

Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>
This commit is contained in:
Mykhailo Hnap 2023-06-14 20:41:51 +03:00 committed by GitHub
parent 5993c4942a
commit bae926de22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1826 additions and 1 deletions

View File

@ -248,6 +248,7 @@ REGISTER_FACTORY(v9, Eye);
REGISTER_FACTORY(v10, IsFinite);
REGISTER_FACTORY(v10, IsInf);
REGISTER_FACTORY(v10, IsNaN);
REGISTER_FACTORY(v10, Unique);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

View File

@ -0,0 +1,88 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include "primitive.hpp"
namespace cldnn {
struct unique_count : primitive_base<unique_count> {
CLDNN_DECLARE_PRIMITIVE(unique_count)
/// @brief Constructs unique_count primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
/// @param flattened If true, operator works on a flattened version of the input tensor.
/// @param axis Is used to “divide” the input tensor into slices.
unique_count(const primitive_id& id, const input_info& input, bool flattened, int64_t axis)
: primitive_base(id, {input}),
flattened(flattened),
axis(axis) {}
bool flattened;
int64_t axis;
size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_combine(seed, flattened);
seed = hash_combine(seed, axis);
return seed;
}
bool operator==(const primitive& rhs) const override {
if (!compare_common_params(rhs)) {
return false;
}
auto rhs_casted = downcast<const unique_count>(rhs);
return flattened == rhs_casted.flattened && axis == rhs_casted.axis;
}
};
struct unique_gather : primitive_base<unique_gather> {
CLDNN_DECLARE_PRIMITIVE(unique_gather)
/// @brief Constructs unique_gather primitive.
/// @param id This primitive id.
/// @param inputs Input primitives ids.
/// @param flattened If true, operator works on a flattened version of the input tensor.
/// @param axis Is used to “divide” the input tensor into slices.
/// @param sorted Controls the order of the returned unique values (sorts ascending when true).
unique_gather(const primitive_id& id,
const std::vector<input_info>& inputs,
bool flattened,
int64_t axis,
bool sorted,
data_types elem_type,
data_types index_type,
data_types count_type)
: primitive_base(id, inputs, decltype(output_paddings)(4), {elem_type, index_type, index_type, count_type}, 4),
flattened(flattened),
axis(axis),
sorted(sorted) {}
bool flattened;
int64_t axis;
bool sorted;
size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_combine(seed, flattened);
seed = hash_combine(seed, axis);
seed = hash_combine(seed, sorted);
return seed;
}
bool operator==(const primitive& rhs) const override {
if (!compare_common_params(rhs)) {
return false;
}
auto rhs_casted = downcast<const unique_gather>(rhs);
return flattened == rhs_casted.flattened && axis == rhs_casted.axis && sorted == rhs_casted.sorted;
}
};
} // namespace cldnn

View File

@ -94,6 +94,8 @@ void register_implementations() {
REGISTER_OCL(count_nonzero);
REGISTER_OCL(gather_nonzero);
REGISTER_OCL(eye);
REGISTER_OCL(unique_count);
REGISTER_OCL(unique_gather);
}
} // namespace ocl

View File

@ -75,6 +75,7 @@
#include "intel_gpu/primitives/tile.hpp"
#include "intel_gpu/primitives/non_zero.hpp"
#include "intel_gpu/primitives/eye.hpp"
#include "intel_gpu/primitives/unique.hpp"
namespace cldnn {
namespace ocl {
@ -174,6 +175,8 @@ REGISTER_OCL(convert_color);
REGISTER_OCL(count_nonzero);
REGISTER_OCL(gather_nonzero);
REGISTER_OCL(eye);
REGISTER_OCL(unique_count);
REGISTER_OCL(unique_gather);
#undef REGISTER_OCL

View File

@ -0,0 +1,167 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "primitive_base.hpp"
#include "unique/unique_kernel_ref.hpp"
#include "unique/unique_kernel_selector.hpp"
#include "unique_inst.hpp"
namespace cldnn {
namespace ocl {
struct unique_count_impl : typed_primitive_impl_ocl<unique_count> {
using parent = typed_primitive_impl_ocl<unique_count>;
using parent::parent;
using kernel_selector_t = kernel_selector::unique_count_kernel_selector;
using kernel_params_t =
std::pair<kernel_selector::unique_count_params, kernel_selector::unique_count_optional_params>;
DECLARE_OBJECT_TYPE_SERIALIZATION
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<unique_count_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<unique_count>();
auto params = get_default_params<kernel_selector::unique_count_params>(impl_param, is_shape_agnostic);
auto optional_params =
get_default_optional_params<kernel_selector::unique_count_optional_params>(impl_param.get_program());
params.flattened = primitive->flattened;
params.axis = primitive->axis;
return {params, optional_params};
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
};
namespace detail {
attach_unique_count_impl::attach_unique_count_impl() {
auto types = {
data_types::u8,
data_types::i8,
data_types::f16,
data_types::f32,
data_types::i32,
data_types::i64,
};
auto formats = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv16_fsv32,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,
format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bfwzyx,
};
implementation_map<unique_count>::add(impl_types::ocl,
shape_types::any,
typed_primitive_impl_ocl<unique_count>::create<unique_count_impl>,
types,
formats);
}
} // namespace detail
struct unique_gather_impl : typed_primitive_impl_ocl<unique_gather> {
using parent = typed_primitive_impl_ocl<unique_gather>;
using parent::parent;
using kernel_selector_t = kernel_selector::unique_gather_kernel_selector;
using kernel_params_t =
std::pair<kernel_selector::unique_gather_params, kernel_selector::unique_gather_optional_params>;
DECLARE_OBJECT_TYPE_SERIALIZATION
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<unique_gather_impl>(*this);
}
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<unique_gather>();
auto params = get_default_params<kernel_selector::unique_gather_params>(impl_param, is_shape_agnostic);
auto optional_params =
get_default_optional_params<kernel_selector::unique_gather_optional_params>(impl_param.get_program());
params.flattened = primitive->flattened;
params.axis = primitive->axis;
params.sorted = primitive->sorted;
for (auto i = 1U; i < impl_param.input_layouts.size(); ++i) {
params.inputs.push_back(convert_data_tensor(impl_param.input_layouts.at(i)));
}
for (auto i = 1U; i < impl_param.output_layouts.size(); ++i) {
params.outputs.push_back(convert_data_tensor(impl_param.output_layouts.at(i)));
}
return {params, optional_params};
}
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param, true);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
};
namespace detail {
attach_unique_gather_impl::attach_unique_gather_impl() {
auto types = {
data_types::u8,
data_types::i8,
data_types::f16,
data_types::f32,
data_types::i32,
data_types::i64,
};
auto formats = {
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv16_fsv32,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32,
format::bfzyx,
format::b_fs_zyx_fsv16,
format::b_fs_zyx_fsv32,
format::bs_fs_zyx_bsv16_fsv16,
format::bs_fs_zyx_bsv16_fsv32,
format::bs_fs_zyx_bsv32_fsv16,
format::bs_fs_zyx_bsv32_fsv32,
format::bfwzyx,
};
implementation_map<unique_gather>::add(impl_types::ocl,
shape_types::any,
typed_primitive_impl_ocl<unique_gather>::create<unique_gather_impl>,
types,
formats);
}
} // namespace detail
} // namespace ocl
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::unique_count_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::unique_gather_impl)

View File

@ -0,0 +1,73 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "intel_gpu/primitives/unique.hpp"
#include "primitive_inst.h"
namespace cldnn {
template <>
struct typed_program_node<unique_count> : typed_program_node_base<unique_count> {
using parent = typed_program_node_base<unique_count>;
using parent::parent;
program_node& input() const {
return get_dependency(0);
}
};
using unique_count_node = typed_program_node<unique_count>;
template <>
class typed_primitive_inst<unique_count> : public typed_primitive_inst_base<unique_count> {
public:
using parent = typed_primitive_inst_base<unique_count>;
using parent::parent;
static layout calc_output_layout(const unique_count_node& node, const kernel_impl_params& impl_param);
template <typename ShapeType>
static std::vector<layout> calc_output_layouts(const unique_count_node& node, const kernel_impl_params& impl_param);
static std::string to_string(const unique_count_node& node);
};
using unique_count_inst = typed_primitive_inst<unique_count>;
template <>
struct typed_program_node<unique_gather> : typed_program_node_base<unique_gather> {
using parent = typed_program_node_base<unique_gather>;
using parent::parent;
program_node& input() const {
return get_dependency(0);
}
bool generates_dynamic_output() const override {
return true;
}
std::vector<size_t> get_shape_infer_dependencies() const override {
return {1};
}
};
using unique_gather_node = typed_program_node<unique_gather>;
template <>
class typed_primitive_inst<unique_gather> : public typed_primitive_inst_base<unique_gather> {
public:
using parent = typed_primitive_inst_base<unique_gather>;
using parent::parent;
static layout calc_output_layout(const unique_gather_node& node, const kernel_impl_params& impl_param);
template <typename ShapeType>
static std::vector<layout> calc_output_layouts(const unique_gather_node& node,
const kernel_impl_params& impl_param);
static std::string to_string(const unique_gather_node& node);
};
using unique_gather_inst = typed_primitive_inst<unique_gather>;
} // namespace cldnn

View File

@ -65,6 +65,7 @@
#include "strided_slice_inst.h"
#include "loop_inst.h"
#include "reverse_inst.h"
#include "unique_inst.hpp"
#include "to_string_utils.h"
// TODO: Remove once we have interface for kernels cache
@ -1440,6 +1441,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::gather_tree::type_id() &&
prim.type() != cldnn::experimental_detectron_detection_output::type_id() &&
prim.type() != cldnn::convert_color::type_id() &&
prim.type() != cldnn::unique_count::type_id() &&
prim.type() != cldnn::unique_gather::type_id() &&
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
can_use_fsv16 = false;
}
@ -1493,6 +1496,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::multiclass_nms::type_id() &&
prim.type() != cldnn::normalize::type_id() &&
prim.type() != cldnn::deconvolution::type_id() &&
prim.type() != cldnn::unique_count::type_id() &&
prim.type() != cldnn::unique_gather::type_id() &&
prim.type() != cldnn::experimental_detectron_generate_proposals_single_image::type_id()) {
can_use_bs_fs_yx_bsv16_fsv16 = false;
}

View File

@ -0,0 +1,138 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/unique.hpp"
#include <sstream>
#include <string>
#include "intel_gpu/runtime/memory.hpp"
#include "json_object.h"
#include "primitive_type_base.h"
#include "unique_inst.hpp"
namespace cldnn {
// -----------------------------------------------
// unique_count
// -----------------------------------------------
GPU_DEFINE_PRIMITIVE_TYPE_ID(unique_count)
layout unique_count_inst::calc_output_layout(const unique_count_node& node, const kernel_impl_params& impl_param) {
OPENVINO_THROW("Only calc_output_layouts should be used!");
}
template <typename ShapeType>
std::vector<layout> unique_count_inst::calc_output_layouts(const unique_count_node& node,
const kernel_impl_params& impl_param) {
return {layout{ov::PartialShape{1}, cldnn::data_types::i64, cldnn::format::bfyx}};
}
template std::vector<layout> unique_count_inst::calc_output_layouts<ov::PartialShape>(
const unique_count_node& node,
const kernel_impl_params& impl_param);
std::string unique_count_inst::to_string(const unique_count_node& node) {
auto primitive = node.get_primitive();
json_composite unique_count_info;
unique_count_info.add("input", node.input().id());
if (!primitive->flattened) {
unique_count_info.add("axis", primitive->axis);
}
auto node_info = node.desc_to_json();
node_info->add("unique_count info", unique_count_info);
std::ostringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
// -----------------------------------------------
// unique_gather
// -----------------------------------------------
GPU_DEFINE_PRIMITIVE_TYPE_ID(unique_gather)
layout unique_gather_inst::calc_output_layout(const unique_gather_node& node, const kernel_impl_params& impl_param) {
OPENVINO_THROW("Only calc_output_layouts should be used!");
}
template <typename ShapeType>
std::vector<layout> unique_gather_inst::calc_output_layouts(const unique_gather_node& node,
const kernel_impl_params& impl_param) {
std::vector<layout> layouts;
const auto desc = impl_param.typed_desc<unique_gather>();
const auto input_layout = impl_param.get_input_layout();
std::vector<ShapeType> output_shapes = {ShapeType(), ShapeType(), ShapeType(), ShapeType()};
if (!impl_param.memory_deps.count(1)) {
if (desc->flattened) {
output_shapes.at(0) = ov::PartialShape{ov::Dimension::dynamic()};
} else {
output_shapes.at(0) = ov::PartialShape::dynamic(input_layout.get_partial_shape().rank());
}
output_shapes.at(1) = ov::PartialShape{ov::Dimension::dynamic()};
output_shapes.at(2) = ov::PartialShape{ov::Dimension::dynamic()};
output_shapes.at(3) = ov::PartialShape{ov::Dimension::dynamic()};
} else {
const auto input_shape = input_layout.get_shape();
const size_t unique_count = read_vector<int64_t>(impl_param.memory_deps.at(1), impl_param.get_stream()).at(0);
if (desc->flattened) {
const auto input_tensor_capacity = ov::shape_size(input_shape);
output_shapes.at(0) = ov::Shape{unique_count};
output_shapes.at(1) = ov::Shape{unique_count};
output_shapes.at(2) = ov::Shape{input_tensor_capacity};
output_shapes.at(3) = ov::Shape{unique_count};
} else {
auto output_shape = input_shape;
auto& new_axis_dimension = output_shape.at(desc->axis);
const auto old_axis_dimension = new_axis_dimension;
new_axis_dimension = unique_count;
output_shapes.at(0) = output_shape;
output_shapes.at(1) = ov::Shape{new_axis_dimension};
output_shapes.at(2) = ov::Shape{old_axis_dimension};
output_shapes.at(3) = ov::Shape{new_axis_dimension};
}
}
for (auto i = 0U; i < desc->num_outputs; ++i) {
const auto& output_shape = output_shapes.at(i);
const auto output_dt = desc->output_data_types.at(i).value();
auto output_format = format::get_default_format(output_shape.size());
if (i == 0) {
if (desc->flattened) {
output_format = format::adjust_to_rank(input_layout.format, output_shape.size());
} else {
output_format = input_layout.format;
}
}
layouts.emplace_back(output_shape, output_dt, output_format);
}
return layouts;
}
template std::vector<layout> unique_gather_inst::calc_output_layouts<ov::PartialShape>(
const unique_gather_node& node,
const kernel_impl_params& impl_param);
std::string unique_gather_inst::to_string(const unique_gather_node& node) {
auto primitive = node.get_primitive();
json_composite unique_gather_info;
unique_gather_info.add("input", node.input().id());
if (!primitive->flattened) {
unique_gather_info.add("axis", primitive->axis);
}
unique_gather_info.add("sorted", primitive->sorted);
auto node_info = node.desc_to_json();
node_info->add("unique_gather info", unique_gather_info);
std::ostringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn

View File

@ -0,0 +1,65 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifdef FLATTENED
# define LENGTH TOTAL_DATA_SIZE
#else
# define LENGTH AXIS_LENGTH
#endif
#ifndef FLATTENED
inline bool FUNC(slices_are_equal)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* out_unique_elements,
uint lhs,
const __global INPUT0_TYPE* input,
uint rhs) {
ITERATE(if (out_unique_elements[GET_INDEX(INPUT0, lhs)] != input[GET_INDEX(INPUT0, rhs)]) { return false; })
return true;
}
inline void FUNC(assign_slice)(OPTIONAL_SHAPE_INFO_ARG __global INPUT0_TYPE* out_unique_elements,
uint lhs,
const __global INPUT0_TYPE* input,
uint rhs) {
ITERATE(out_unique_elements[GET_INDEX(INPUT0, lhs)] = input[GET_INDEX(INPUT0, rhs)];)
}
#endif
// Works on unsorted data, but has worse complexity
inline uint FUNC(unique)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
__global INPUT0_TYPE* out_unique_elements,
uint first,
const uint last) {
uint unique_length = 0;
for (; first != last; ++first) {
bool unique = true;
for (uint unique_idx = 0; unique_idx < unique_length; ++unique_idx) {
#ifdef FLATTENED
if (out_unique_elements[unique_idx] == input[GET_INDEX(INPUT0, first)]) {
#else
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_idx, input, first)) {
#endif
unique = false;
break;
}
}
if (unique) {
#ifdef FLATTENED
out_unique_elements[unique_length] = input[GET_INDEX(INPUT0, first)];
#else
FUNC_CALL(assign_slice)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_length, input, first);
#endif
++unique_length;
}
}
return unique_length;
}
KERNEL(unique_count_ref)
(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* out_total_count,
__global INPUT0_TYPE* out_unique_elements) {
out_total_count[0] = FUNC_CALL(unique)(OPTIONAL_SHAPE_INFO_TENSOR input, out_unique_elements, 0, LENGTH);
}
#undef LENGTH

View File

@ -0,0 +1,181 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifdef FLATTENED
# define LENGTH TOTAL_DATA_SIZE
#else
# define LENGTH AXIS_LENGTH
#endif
inline void FUNC(swap_out_unique_elements)(__global OUTPUT_TYPE* a, __global OUTPUT_TYPE* b) {
const OUTPUT_TYPE temp = *a;
*a = *b;
*b = temp;
}
inline void FUNC(swap_out_indices)(__global OUTPUT1_TYPE* a, __global OUTPUT1_TYPE* b) {
const OUTPUT1_TYPE temp = *a;
*a = *b;
*b = temp;
}
inline void FUNC(swap_out_counts)(__global OUTPUT3_TYPE* a, __global OUTPUT3_TYPE* b) {
const OUTPUT3_TYPE temp = *a;
*a = *b;
*b = temp;
}
#ifndef FLATTENED
inline bool FUNC(compare_slices_ascending)(OPTIONAL_SHAPE_INFO_ARG const __global OUTPUT_TYPE* out_unique_elements,
uint lhs,
uint rhs) {
ITERATE(
if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] > out_unique_elements[GET_INDEX(OUTPUT, rhs)]) {
return true;
} else if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] < out_unique_elements[GET_INDEX(OUTPUT, rhs)]) {
return false;
} else { continue; })
return false;
}
inline void FUNC(swap_slices)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements, uint lhs, uint rhs) {
ITERATE(FUNC_CALL(swap_out_unique_elements)(&out_unique_elements[GET_INDEX(OUTPUT, lhs)],
&out_unique_elements[GET_INDEX(OUTPUT, rhs)]);)
}
inline bool FUNC(slices_are_equal)(OPTIONAL_SHAPE_INFO_ARG const __global OUTPUT_TYPE* out_unique_elements,
uint lhs,
const __global INPUT0_TYPE* input,
uint rhs) {
ITERATE(if (out_unique_elements[GET_INDEX(OUTPUT, lhs)] != input[GET_INDEX(INPUT0, rhs)]) { return false; })
return true;
}
inline void FUNC(assign_slice)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements,
uint lhs,
const __global INPUT0_TYPE* input,
uint rhs) {
ITERATE(out_unique_elements[GET_INDEX(OUTPUT, lhs)] = input[GET_INDEX(INPUT0, rhs)];)
}
#endif
// We use bubble sort here, because we need stable sort
// TODO: Change to better stable sort algorithm
inline void FUNC(bubbleSort)(OPTIONAL_SHAPE_INFO_ARG __global OUTPUT_TYPE* out_unique_elements,
__global OUTPUT1_TYPE* out_indices,
__global OUTPUT3_TYPE* out_counts,
int l,
int h) {
for (int i = 0; i < h - l; ++i) {
bool swapped = false;
for (int j = l; j < h - i; ++j) {
#ifdef FLATTENED
int j1 = j + 1;
if ((out_unique_elements[GET_INDEX(OUTPUT, j)] > out_unique_elements[GET_INDEX(OUTPUT, j1)])) {
FUNC_CALL(swap_out_unique_elements)
(&out_unique_elements[GET_INDEX(OUTPUT, j)], &out_unique_elements[GET_INDEX(OUTPUT, j1)]);
#else
if (FUNC_CALL(compare_slices_ascending)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, j + 1)) {
FUNC_CALL(swap_slices)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, j + 1);
#endif
FUNC_CALL(swap_out_indices)(&out_indices[j], &out_indices[j + 1]);
FUNC_CALL(swap_out_counts)(&out_counts[j], &out_counts[j + 1]);
swapped = true;
}
}
if (!swapped) {
break;
}
}
}
// Works on unsorted data, but has worse complexity
inline uint FUNC(unique)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* out_unique_elements,
__global OUTPUT1_TYPE* out_indices,
__global OUTPUT2_TYPE* out_rev_indices,
__global OUTPUT3_TYPE* out_counts,
uint first,
const uint last) {
uint unique_length = 0;
for (; first != last; ++first) {
bool unique = true;
for (uint unique_idx = 0; unique_idx < unique_length; ++unique_idx) {
#ifdef FLATTENED
if (out_unique_elements[GET_INDEX(OUTPUT, unique_idx)] == input[GET_INDEX(INPUT0, first)]) {
#else
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_idx, input, first)) {
#endif
unique = false;
out_rev_indices[first] = unique_idx;
++out_counts[unique_idx];
break;
}
}
if (unique) {
#ifdef FLATTENED
out_unique_elements[GET_INDEX(OUTPUT, unique_length)] = input[GET_INDEX(INPUT0, first)];
#else
FUNC_CALL(assign_slice)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, unique_length, input, first);
#endif
out_indices[unique_length] = first;
out_rev_indices[first] = unique_length;
++out_counts[unique_length];
++unique_length;
}
}
return unique_length;
}
inline uint FUNC(fill_out_rev_indices)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* out_unique_elements,
__global OUTPUT2_TYPE* out_rev_indices,
const uint end) {
for (uint i = 0; i < LENGTH; ++i) {
for (uint j = 0; j < end; ++j) {
#ifdef FLATTENED
if (out_unique_elements[GET_INDEX(OUTPUT, j)] == input[GET_INDEX(INPUT0, i)]) {
#else
if (FUNC_CALL(slices_are_equal)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, j, input, i)) {
#endif
out_rev_indices[i] = j;
break;
}
}
}
}
KERNEL(unique_gather_ref)
(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input,
const __global INPUT1_TYPE* out_total_count,
__global OUTPUT_TYPE* out_unique_elements,
__global OUTPUT1_TYPE* out_indices,
__global OUTPUT2_TYPE* out_rev_indices,
__global OUTPUT3_TYPE* out_counts) {
// TODO: Think of better approach to initialize with zeros
for (uint i = 0; i < LENGTH; ++i) {
out_counts[i] = 0;
}
// Run unique algorithm
const uint end = FUNC_CALL(unique)(OPTIONAL_SHAPE_INFO_TENSOR input,
out_unique_elements,
out_indices,
out_rev_indices,
out_counts,
0,
LENGTH);
#ifdef SORTED
// Sort out data
FUNC_CALL(bubbleSort)(OPTIONAL_SHAPE_INFO_TENSOR out_unique_elements, out_indices, out_counts, 0, end - 1);
// After sorting all out_unique_elements will shuffle and out_rev_indices should change not only order, but their
// values (indexes).
// So, we need to fill them again...
// Another approach would be to allocate whole separate buffer as input, sort whole dataset first and then run
// deduplicate algorithm with correct filling of out_rev_indices.
FUNC_CALL(fill_out_rev_indices)
(OPTIONAL_SHAPE_INFO_TENSOR input, out_unique_elements, out_rev_indices, end);
#endif
}
#undef LENGTH

View File

@ -93,7 +93,9 @@ enum class KernelType {
PRIOR_BOX,
EYE,
GENERATE_PROPOSALS,
MULTICLASS_NMS
MULTICLASS_NMS,
UNIQUE_COUNT,
UNIQUE_GATHER,
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -0,0 +1,343 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "unique_kernel_ref.hpp"
#include "kernel_selector_utils.h"
namespace kernel_selector {
namespace {
JitConstants MakeAxisJitConstants(size_t rank, int64_t axis, const std::string& prefix_for_iterate) {
const std::map<char, std::string> dimensions_sizes_map = {
{'b', "_BATCH_NUM"},
{'f', "_FEATURE_NUM"},
{'w', "_SIZE_W"},
{'z', "_SIZE_Z"},
{'y', "_SIZE_Y"},
{'x', "_SIZE_X"},
};
auto dimensions = [rank]() -> std::vector<char> {
switch (rank) {
case 4:
return {'b', 'f', 'y', 'x'};
case 5:
return {'b', 'f', 'z', 'y', 'x'};
case 6:
return {'b', 'f', 'w', 'z', 'y', 'x'};
}
throw std::invalid_argument("Unsupported input rank for unique primitive");
}();
auto& axis_dimension = dimensions.at(axis);
const auto axis_length_name = "AXIS_LENGTH";
const auto axis_length_val = "INPUT0" + dimensions_sizes_map.at(axis_dimension);
// Mark axis dimension as 'i' for indexing
axis_dimension = 'i';
const auto get_index_name = "GET_INDEX(prefix, i)";
const auto get_index_val = [&dimensions]() {
std::string str = "CAT(prefix, _GET_INDEX)";
str += '(';
for (auto ch : dimensions) {
str += ch;
str += ',';
}
str.back() = ')';
return str;
}();
const auto iterate_name = "ITERATE(body)";
const auto iterate_val = [&dimensions, &dimensions_sizes_map, &prefix_for_iterate]() {
std::stringstream ss;
for (auto ch : dimensions) {
// No need to iterate through axis index
if (ch == 'i') {
continue;
}
const auto size = prefix_for_iterate + dimensions_sizes_map.at(ch);
ss << "for (uint " << ch << " = 0; " << ch << " < " << size << "; ++" << ch << ") {";
}
ss << "body";
// Note size - 1 here as we don't iterate through axis index
for (auto i = 0U; i < dimensions.size() - 1; ++i) {
ss << '}';
}
return ss.str();
}();
return {MakeJitConstant(axis_length_name, axis_length_val),
MakeJitConstant(get_index_name, get_index_val),
MakeJitConstant(iterate_name, iterate_val)};
}
JitConstants MakeFlattenedJitConstants(size_t rank, bool simple_layout) {
const auto get_index_name = "GET_INDEX(prefix, i)";
if (simple_layout) {
const auto get_index_val = "i";
return {MakeJitConstant("FLATTENED", true), MakeJitConstant(get_index_name, get_index_val)};
}
const auto dimensions = [rank]() -> std::vector<std::string> {
switch (rank) {
case 4:
return {"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_FEATURE_NUM)",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_FEATURE_NUM",
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
"i % prefix##_SIZE_X"};
case 5:
return {"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_FEATURE_NUM)",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z) % prefix##_FEATURE_NUM",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_SIZE_Z",
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
"i % prefix##_SIZE_X"};
case 6:
return {
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_SIZE_W * prefix##_FEATURE_NUM)",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z * prefix##_SIZE_W) % prefix##_FEATURE_NUM",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y * prefix##_SIZE_Z) % prefix##_SIZE_W",
"i / (prefix##_SIZE_X * prefix##_SIZE_Y) % prefix##_SIZE_Z",
"i / prefix##_SIZE_X % prefix##_SIZE_Y",
"i % prefix##_SIZE_X"};
}
throw std::invalid_argument("Unsupported rank for unique primitive");
}();
const auto get_index_val = [&dimensions]() {
std::string str = "CAT(prefix, _GET_INDEX)";
str += '(';
for (const auto& dimension : dimensions) {
str += dimension;
str += ',';
}
str.back() = ')';
return str;
}();
return {MakeJitConstant("FLATTENED", true), MakeJitConstant(get_index_name, get_index_val)};
}
} // namespace
KernelsData UniqueCountKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
auto kernel_data = KernelData::Default<unique_count_params>(params);
const auto& kernel_params = dynamic_cast<const unique_count_params&>(*kernel_data.params);
const auto dispatch_data = SetDefault(kernel_params);
const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options);
const auto jit_constants = GetJitConstants(kernel_params);
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto& kernel = kernel_data.kernels.front();
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
const auto& prim_params = dynamic_cast<const unique_count_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
// Need to adjust buffer size according to input size
kd.internalBufferSizes.front() = prim_params.inputs.front().PhysicalSizeInBytes();
kd.internalBufferDataType = prim_params.inputs.front().GetDType();
};
FillCLKernelData(kernel,
dispatch_data,
params.engineInfo,
kernelName,
jit,
entry_point,
{},
false,
false,
static_cast<int>(kernel_params.inputs.size()),
GetFusedPrimitiveInputsCount(kernel_params),
static_cast<int>(kernel_params.outputs.size()),
kernel_params.inputs.front().is_dynamic());
// Additional buffer to save intermediate algorithm results
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
kernel_data.internalBufferSizes.push_back(kernel_params.inputs.front().PhysicalSizeInBytes());
kernel_data.internalBufferDataType = kernel_params.inputs.front().GetDType();
return {kernel_data};
}
ParamsKey UniqueCountKernelRef::GetSupportedKey() const {
ParamsKey key;
key.EnableAllInputDataType();
key.EnableAllOutputDataType();
key.EnableDifferentTypes();
key.EnableAllInputLayout();
key.EnableAllOutputLayout();
key.EnableTensorOffset();
key.EnableTensorPitches();
key.EnableBatching();
key.EnableDynamicShapesSupport();
return key;
}
bool UniqueCountKernelRef::Validate(const Params& params, const optional_params& options) const {
if (params.GetType() != KernelType::UNIQUE_COUNT || options.GetType() != KernelType::UNIQUE_COUNT) {
return false;
}
const auto& kernel_params = dynamic_cast<const unique_count_params&>(params);
if (kernel_params.inputs.size() != 1) {
return false;
}
if (kernel_params.outputs.size() != 1) {
return false;
}
return true;
}
JitConstants UniqueCountKernelRef::GetJitConstants(const unique_count_params& kernel_params) const {
const auto input = kernel_params.inputs.front();
auto jit_constants = MakeBaseParamsJitConstants(kernel_params);
if (kernel_params.flattened) {
jit_constants.Merge(MakeFlattenedJitConstants(input.Dimentions(), input.SimpleLayout()));
} else {
jit_constants.Merge(MakeAxisJitConstants(input.Dimentions(), kernel_params.axis, "INPUT0"));
}
if (input.is_dynamic()) {
DimensionAccessHelper dims(input);
const std::string total_data_size =
toVectorMulString({dims.x(), dims.y(), dims.z(), dims.w(), dims.f(), dims.b()});
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", total_data_size));
} else {
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", input.LogicalSize()));
}
return jit_constants;
}
CommonDispatchData UniqueCountKernelRef::SetDefault(const unique_count_params& /* kernel_params */) {
CommonDispatchData dispatch_data;
// For now we run only in one thread
// TODO: Parallelize
dispatch_data.gws = {1, 1, 1};
dispatch_data.lws = {1, 1, 1};
return dispatch_data;
}
KernelsData UniqueGatherKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
auto kernel_data = KernelData::Default<unique_gather_params>(params);
const auto& kernel_params = dynamic_cast<const unique_gather_params&>(*kernel_data.params);
const auto dispatch_data = SetDefault(kernel_params);
const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params, options);
const auto jit_constants = GetJitConstants(kernel_params);
const auto jit = CreateJit(kernelName, jit_constants, entry_point);
auto& kernel = kernel_data.kernels.front();
kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) {
const auto& prim_params = dynamic_cast<const unique_gather_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params);
};
FillCLKernelData(kernel,
dispatch_data,
params.engineInfo,
kernelName,
jit,
entry_point,
{},
false,
false,
static_cast<int>(kernel_params.inputs.size()),
GetFusedPrimitiveInputsCount(kernel_params),
static_cast<int>(kernel_params.outputs.size()),
kernel_params.outputs.front().is_dynamic());
return {kernel_data};
}
ParamsKey UniqueGatherKernelRef::GetSupportedKey() const {
ParamsKey key;
key.EnableAllInputDataType();
key.EnableAllOutputDataType();
key.EnableDifferentTypes();
key.EnableAllInputLayout();
key.EnableAllOutputLayout();
key.EnableTensorOffset();
key.EnableTensorPitches();
key.EnableBatching();
key.EnableDynamicShapesSupport();
return key;
}
bool UniqueGatherKernelRef::Validate(const Params& params, const optional_params& options) const {
if (params.GetType() != KernelType::UNIQUE_GATHER || options.GetType() != KernelType::UNIQUE_GATHER) {
return false;
}
const auto& kernel_params = dynamic_cast<const unique_gather_params&>(params);
if (kernel_params.inputs.size() != 2) {
return false;
}
if (kernel_params.outputs.size() != 4) {
return false;
}
return true;
}
JitConstants UniqueGatherKernelRef::GetJitConstants(const unique_gather_params& kernel_params) const {
const auto input = kernel_params.inputs.front();
auto jit_constants = MakeBaseParamsJitConstants(kernel_params);
if (kernel_params.sorted) {
jit_constants.AddConstant(MakeJitConstant("SORTED", true));
}
if (kernel_params.flattened) {
jit_constants.Merge(MakeFlattenedJitConstants(input.Dimentions(), input.SimpleLayout()));
} else {
jit_constants.Merge(MakeAxisJitConstants(input.Dimentions(), kernel_params.axis, "OUTPUT"));
}
if (input.is_dynamic()) {
DimensionAccessHelper dims(input);
const std::string total_data_size =
toVectorMulString({dims.x(), dims.y(), dims.z(), dims.w(), dims.f(), dims.b()});
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", total_data_size));
} else {
jit_constants.AddConstant(MakeJitConstant("TOTAL_DATA_SIZE", input.LogicalSize()));
}
return jit_constants;
}
CommonDispatchData UniqueGatherKernelRef::SetDefault(const unique_gather_params& /* kernel_params */) {
CommonDispatchData dispatch_data;
// For now we run only in one thread
// TODO: Parallelize
dispatch_data.gws = {1, 1, 1};
dispatch_data.lws = {1, 1, 1};
return dispatch_data;
}
} // namespace kernel_selector

View File

@ -0,0 +1,74 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
namespace kernel_selector {
/**
* UniqueCount reference kernel parameters.
*/
struct unique_count_params : base_params {
unique_count_params() : base_params(KernelType::UNIQUE_COUNT) {}
bool flattened{};
int64_t axis{};
};
/**
* UniqueCount reference kernel optional parameters.
*/
struct unique_count_optional_params : optional_params {
unique_count_optional_params() : optional_params(KernelType::UNIQUE_COUNT) {}
};
/**
* Reference kernel for UniqueCount.
*/
class UniqueCountKernelRef : public KernelBaseOpenCL {
public:
UniqueCountKernelRef() : KernelBaseOpenCL{"unique_count_ref"} {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& params, const optional_params& options) const override;
JitConstants GetJitConstants(const unique_count_params& kernel_params) const;
static CommonDispatchData SetDefault(const unique_count_params& kernel_params);
};
/**
* UniqueGather reference kernel parameters.
*/
struct unique_gather_params : base_params {
unique_gather_params() : base_params(KernelType::UNIQUE_GATHER) {}
bool flattened{};
int64_t axis{};
bool sorted{};
};
/**
* UniqueGather reference kernel optional parameters.
*/
struct unique_gather_optional_params : optional_params {
unique_gather_optional_params() : optional_params(KernelType::UNIQUE_GATHER) {}
};
/**
* Reference kernel for UniqueGather.
*/
class UniqueGatherKernelRef : public KernelBaseOpenCL {
public:
UniqueGatherKernelRef() : KernelBaseOpenCL{"unique_gather_ref"} {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& params, const optional_params& options) const override;
JitConstants GetJitConstants(const unique_gather_params& kernel_params) const;
static CommonDispatchData SetDefault(const unique_gather_params& kernel_params);
};
} // namespace kernel_selector

View File

@ -0,0 +1,37 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "unique_kernel_selector.hpp"
#include "unique_kernel_ref.hpp"
namespace kernel_selector {
unique_count_kernel_selector::unique_count_kernel_selector() {
Attach<UniqueCountKernelRef>();
}
KernelsData unique_count_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::UNIQUE_COUNT);
}
unique_count_kernel_selector& unique_count_kernel_selector::Instance() {
static unique_count_kernel_selector instance;
return instance;
}
unique_gather_kernel_selector::unique_gather_kernel_selector() {
Attach<UniqueGatherKernelRef>();
}
KernelsData unique_gather_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::UNIQUE_GATHER);
}
unique_gather_kernel_selector& unique_gather_kernel_selector::Instance() {
static unique_gather_kernel_selector instance;
return instance;
}
} // namespace kernel_selector

View File

@ -0,0 +1,25 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class unique_count_kernel_selector : public kernel_selector_base {
public:
unique_count_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
static unique_count_kernel_selector& Instance();
};
class unique_gather_kernel_selector : public kernel_selector_base {
public:
unique_gather_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
static unique_gather_kernel_selector& Instance();
};
} // namespace kernel_selector

View File

@ -0,0 +1,54 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/primitives/unique.hpp"
#include "intel_gpu/plugin/program.hpp"
#include "ngraph/op/unique.hpp"
namespace ov {
namespace intel_gpu {
namespace {
void CreateUniqueOp(Program& p, const std::shared_ptr<ngraph::op::v10::Unique>& op) {
validate_inputs_count(op, {1, 2});
bool flattened = true;
int64_t axis{};
if (op->get_input_size() == 2) {
auto axis_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!axis_constant) {
IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() << " ("
<< op->get_type_name() << ")";
}
axis = axis_constant->cast_vector<int64_t>().at(0);
axis = ov::normalize_axis(op.get(), axis, op->get_input_partial_shape(0).rank());
flattened = false;
}
const auto input = p.GetInputInfo(op).front();
const auto layer_name = layer_type_name_ID(op);
const auto count_prim_id = layer_name + "_count";
const cldnn::unique_count unique_count_prim(count_prim_id, input, flattened, axis);
p.add_primitive(*op, unique_count_prim);
const cldnn::unique_gather unique_gather_prim(layer_name,
{input, count_prim_id},
flattened,
axis,
op->get_sorted(),
cldnn::element_type_to_data_type(op->get_input_element_type(0)),
cldnn::element_type_to_data_type(op->get_index_element_type()),
cldnn::element_type_to_data_type(op->get_count_element_type()));
p.add_primitive(*op, unique_gather_prim);
}
} // namespace
REGISTER_FACTORY_IMPL(v10, Unique);
} // namespace intel_gpu
} // namespace ov

View File

@ -126,5 +126,8 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*smoke_GroupDeconv_2D_Dynamic_.*FP32/GroupDeconvolutionLayerGPUTest.CompareWithRefs.*)",
// Issue: 111440
R"(.*smoke_set1/GatherElementsGPUTest.CompareWithRefs.*)",
// For some strange reason (bug?) output format cannot have a rank greater than 4 for dynamic shape case,
// because it crashes in some random places during "reorder_inputs" pass.
R"(.*UniqueLayerDynamicGPUTest.*\(\d*\.\d*\.\d*\.\d*\.\d*\).*axis.*)",
};
}

View File

@ -0,0 +1,170 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "common_test_utils/ov_tensor_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace ov::test;
namespace GPULayerTestsDefinitions {
typedef std::tuple<std::vector<InputShape>, // Input shapes
std::tuple<bool, int>, // Is flattened and axis
bool, // Sorted
ElementType // Data precision
>
UniqueDynamicGPUTestParams;
class UniqueLayerDynamicGPUTest : public testing::WithParamInterface<UniqueDynamicGPUTestParams>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<UniqueDynamicGPUTestParams>& obj) {
std::vector<InputShape> inputShapes;
std::tuple<bool, int> flatOrAxis;
bool sorted;
ElementType dataPrecision;
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision) = obj.param;
std::ostringstream result;
result << "IS=(";
for (size_t i = 0lu; i < inputShapes.size(); i++) {
result << CommonTestUtils::partialShape2str({inputShapes[i].first})
<< (i < inputShapes.size() - 1lu ? "_" : "");
}
result << ")_TS=";
for (size_t i = 0lu; i < inputShapes.front().second.size(); i++) {
result << "{";
for (size_t j = 0lu; j < inputShapes.size(); j++) {
result << CommonTestUtils::vec2str(inputShapes[j].second[i])
<< (j < inputShapes.size() - 1lu ? "_" : "");
}
result << "}_";
}
if (!std::get<0>(flatOrAxis)) {
result << "axis=" << std::get<1>(flatOrAxis) << "_";
} else {
result << "flattened"
<< "_";
}
result << "sorted=" << (sorted ? "True" : "False") << "_";
result << "dataPrc=" << dataPrecision;
return result.str();
}
protected:
void SetUp() override {
std::vector<InputShape> inputShapes;
std::tuple<bool, int> flatOrAxis;
bool sorted, flattened;
int axis;
ElementType dataPrecision;
std::tie(inputShapes, flatOrAxis, sorted, dataPrecision) = this->GetParam();
targetDevice = CommonTestUtils::DEVICE_GPU;
init_input_shapes(inputShapes);
flattened = std::get<0>(flatOrAxis);
auto params = ngraph::builder::makeDynamicParams(dataPrecision, inputDynamicShapes);
params[0]->set_friendly_name("data");
auto paramOuts =
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
std::shared_ptr<ov::Node> uniqueNode;
if (flattened) {
uniqueNode = std::make_shared<ov::op::v10::Unique>(paramOuts[0], sorted);
} else {
axis = std::get<1>(flatOrAxis);
uniqueNode = std::make_shared<ov::op::v10::Unique>(
paramOuts[0],
ov::op::v0::Constant::create(ov::element::i64, ov::Shape({1}), {axis}),
sorted);
}
// Need to create results for all outputs
ngraph::ResultVector results;
for (auto i = 0U; i < uniqueNode->get_output_size(); ++i) {
results.push_back(std::make_shared<ngraph::opset1::Result>(uniqueNode->output(i)));
}
function = std::make_shared<ngraph::Function>(results, params, "Unique");
}
void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
for (size_t i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::Tensor tensor;
if (funcInput.get_node()->get_friendly_name() == "data") {
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(),
targetInputStaticShapes[0].end(),
1,
std::multiplies<size_t>());
tensor = utils::create_and_fill_tensor(funcInput.get_element_type(),
targetInputStaticShapes[0],
range,
-range / 2,
1);
}
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
};
TEST_P(UniqueLayerDynamicGPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
namespace {
const std::vector<ElementType> dataPrecision = {
ElementType::f16,
ElementType::i32,
};
std::vector<std::tuple<bool, int>> flatOrAxis{{true, 0}, {false, 0}, {false, 1}, {false, -1}};
std::vector<bool> sorted{true, false};
std::vector<std::vector<InputShape>> getStaticShapes() {
return {
{{{}, {{7, 2, 3}}}},
{{{}, {{7, 2, 3, 5}}}},
{{{}, {{7, 2, 3, 5, 4}}}},
};
}
INSTANTIATE_TEST_SUITE_P(smoke_static,
UniqueLayerDynamicGPUTest,
::testing::Combine(::testing::ValuesIn(getStaticShapes()),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecision)),
UniqueLayerDynamicGPUTest::getTestCaseName);
std::vector<std::vector<InputShape>> getDynamicShapes() {
return {
{{{ov::Dimension(2, 15), -1, -1, -1}, // Dynamic shape
{{8, 3, 3, 3}, {6, 5, 2, 5}, {4, 7, 1, 11}, {2, 9, 3, 4}}}}, // Target shapes
{{{-1, -1, -1, -1, -1}, // Dynamic shape
{{1, 2, 1, 13, 2}, {3, 4, 7, 2, 2}, {5, 6, 3, 5, 2}, {7, 8, 4, 4, 2}}}}, // Target shapes
};
}
INSTANTIATE_TEST_SUITE_P(smoke_dynamic,
UniqueLayerDynamicGPUTest,
::testing::Combine(::testing::ValuesIn(getDynamicShapes()),
::testing::ValuesIn(flatOrAxis),
::testing::ValuesIn(sorted),
::testing::ValuesIn(dataPrecision)),
UniqueLayerDynamicGPUTest::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions

View File

@ -0,0 +1,394 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <test_utils/test_utils.h>
#include <intel_gpu/primitives/unique.hpp>
#include <vector>
using namespace cldnn;
using namespace tests;
namespace {
template <typename vecElementType>
std::string vec2str(const std::vector<vecElementType>& vec) {
if (!vec.empty()) {
std::ostringstream result;
result << "(";
std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<vecElementType>(result, "."));
result << vec.back() << ")";
return result.str();
}
return "()";
}
template <class ElemT, class IndexT, class CountT>
struct unique_test_inputs {
ov::Shape data_shape;
std::vector<ElemT> input_data;
std::vector<ElemT> expected_unique_values;
std::vector<IndexT> expected_indices;
std::vector<IndexT> expected_rev_indices;
std::vector<CountT> expected_counts;
bool flattened;
int64_t axis;
bool sorted;
};
template <class ElemT, class IndexT, class CountT>
using unique_test_params = std::tuple<unique_test_inputs<ElemT, IndexT, CountT>, format::type>;
template <class ElemT, class IndexT, class CountT>
struct unique_gpu_test : public testing::TestWithParam<unique_test_params<ElemT, IndexT, CountT>> {
public:
void test() {
format::type fmt;
unique_test_inputs<ElemT, IndexT, CountT> p;
std::tie(p, fmt) = testing::TestWithParam<unique_test_params<ElemT, IndexT, CountT>>::GetParam();
auto& engine = get_test_engine();
const auto elem_data_type = type_to_data_type<ElemT>::value;
const auto index_data_type = type_to_data_type<IndexT>::value;
const auto count_data_type = type_to_data_type<CountT>::value;
const auto plain_format = format::bfyx;
const layout in_layout(p.data_shape, elem_data_type, plain_format);
auto input = engine.allocate_memory(in_layout);
set_values(input, p.input_data);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(reorder("reordered_input", input_info("input"), fmt, elem_data_type));
topology.add(unique_count("unique_count", {input_info("reordered_input")}, p.flattened, p.axis));
topology.add(unique_gather("unique_gather",
{input_info("reordered_input"), input_info("unique_count")},
p.flattened,
p.axis,
p.sorted,
elem_data_type,
index_data_type,
count_data_type));
topology.add(reorder("expected_unique_values", input_info("unique_gather", 0), plain_format, elem_data_type));
topology.add(reorder("expected_indices", input_info("unique_gather", 1), plain_format, index_data_type));
topology.add(reorder("expected_rev_indices", input_info("unique_gather", 2), plain_format, index_data_type));
topology.add(reorder("expected_counts", input_info("unique_gather", 3), plain_format, count_data_type));
auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input);
const auto outputs = network.execute();
const auto expected_unique_values = outputs.at("expected_unique_values").get_memory();
cldnn::mem_lock<ElemT> expected_unique_values_ptr(expected_unique_values, get_test_stream());
ASSERT_EQ(expected_unique_values_ptr.size(), p.expected_unique_values.size());
for (auto i = 0U; i < expected_unique_values_ptr.size(); ++i) {
ASSERT_EQ(expected_unique_values_ptr[i], p.expected_unique_values[i]);
}
const auto expected_indices = outputs.at("expected_indices").get_memory();
cldnn::mem_lock<IndexT> expected_indices_ptr(expected_indices, get_test_stream());
ASSERT_EQ(expected_indices_ptr.size(), p.expected_indices.size());
for (auto i = 0U; i < expected_indices_ptr.size(); ++i) {
ASSERT_EQ(expected_indices_ptr[i], p.expected_indices[i]);
}
const auto expected_rev_indices = outputs.at("expected_rev_indices").get_memory();
cldnn::mem_lock<IndexT> expected_rev_indices_ptr(expected_rev_indices, get_test_stream());
ASSERT_EQ(expected_rev_indices_ptr.size(), p.expected_rev_indices.size());
for (auto i = 0U; i < expected_rev_indices_ptr.size(); ++i) {
ASSERT_EQ(expected_rev_indices_ptr[i], p.expected_rev_indices[i]);
}
const auto expected_counts = outputs.at("expected_counts").get_memory();
cldnn::mem_lock<CountT> expected_counts_ptr(expected_counts, get_test_stream());
ASSERT_EQ(expected_counts_ptr.size(), p.expected_counts.size());
for (auto i = 0U; i < expected_counts_ptr.size(); ++i) {
ASSERT_EQ(expected_counts_ptr[i], p.expected_counts[i]);
}
}
static std::string PrintToStringParamName(
const testing::TestParamInfo<unique_test_params<ElemT, IndexT, CountT>>& info) {
format::type fmt;
unique_test_inputs<ElemT, IndexT, CountT> p;
std::tie(p, fmt) = info.param;
std::ostringstream result;
result << "data_shape=" << vec2str(p.data_shape) << "; ";
result << "input_data=" << vec2str(p.input_data) << "; ";
result << "data_type=" << type_to_data_type<ElemT>::value << "; ";
result << "index_type=" << type_to_data_type<IndexT>::value << "; ";
result << "counts_type=" << type_to_data_type<CountT>::value << "; ";
result << "sorted=" << p.sorted << "; ";
if (!p.flattened) {
result << "axis=" << p.axis << "; ";
}
result << "fmt=" << fmt_to_str(fmt) << "; ";
return result.str();
}
};
template <class ElemT, class IndexT, class CountT>
std::vector<unique_test_inputs<ElemT, IndexT, CountT>> getUniqueParams() {
return {
{
ov::Shape{5},
std::vector<ElemT>{5, 4, 3, 2, 1},
std::vector<ElemT>{5, 4, 3, 2, 1},
std::vector<IndexT>{0, 1, 2, 3, 4},
std::vector<IndexT>{0, 1, 2, 3, 4},
std::vector<CountT>{1, 1, 1, 1, 1},
true,
0,
false,
},
{
ov::Shape{5},
std::vector<ElemT>{5, 4, 3, 2, 1},
std::vector<ElemT>{1, 2, 3, 4, 5},
std::vector<IndexT>{4, 3, 2, 1, 0},
std::vector<IndexT>{4, 3, 2, 1, 0},
std::vector<CountT>{1, 1, 1, 1, 1},
true,
0,
true,
},
{
ov::Shape{7},
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
std::vector<ElemT>{1, 3, 5, 2, 4},
std::vector<IndexT>{0, 1, 2, 4, 5},
std::vector<IndexT>{0, 1, 2, 1, 3, 4, 3},
std::vector<CountT>{1, 2, 1, 2, 1},
true,
0,
false,
},
{
ov::Shape{7},
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
std::vector<ElemT>{1, 2, 3, 4, 5},
std::vector<IndexT>{0, 4, 1, 5, 2},
std::vector<IndexT>{0, 2, 4, 2, 1, 3, 1},
std::vector<CountT>{1, 2, 2, 1, 1},
true,
0,
true,
},
{
ov::Shape{7},
std::vector<ElemT>{3, 1, 5, 3, 2, 4, 2},
std::vector<ElemT>{1, 2, 3, 4, 5},
std::vector<IndexT>{1, 4, 0, 5, 2},
std::vector<IndexT>{2, 0, 4, 2, 1, 3, 1},
std::vector<CountT>{1, 2, 2, 1, 1},
true,
0,
true,
},
{
ov::Shape{7},
std::vector<ElemT>{3, 3, 5, 3, 2, 4, 2},
std::vector<ElemT>{2, 3, 4, 5},
std::vector<IndexT>{4, 0, 5, 2},
std::vector<IndexT>{1, 1, 3, 1, 0, 2, 0},
std::vector<CountT>{2, 3, 1, 1},
true,
0,
true,
},
{
ov::Shape{7},
std::vector<ElemT>{1, 3, 5, 3, 2, 4, 2},
std::vector<ElemT>{1, 2, 3, 4, 5},
std::vector<IndexT>{0, 4, 1, 5, 2},
std::vector<IndexT>{0, 2, 4, 2, 1, 3, 1},
std::vector<CountT>{1, 2, 2, 1, 1},
false,
0,
true,
},
{
ov::Shape{2, 6},
std::vector<ElemT>{3, 5, 3, 2, 4, 2, 1, 2, 3, 4, 5, 6},
std::vector<ElemT>{3, 5, 2, 4, 1, 6},
std::vector<IndexT>{0, 1, 3, 4, 6, 11},
std::vector<IndexT>{0, 1, 0, 2, 3, 2, 4, 2, 0, 3, 1, 5},
std::vector<CountT>{3, 2, 3, 2, 1, 1},
true,
0,
false,
},
{
ov::Shape{2, 4},
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
std::vector<IndexT>{0, 1},
std::vector<IndexT>{0, 1},
std::vector<CountT>{1, 1},
false,
0,
false,
},
{
ov::Shape{2, 4},
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
std::vector<ElemT>{1, 2, 3, 4, 1, 2, 3, 5},
std::vector<IndexT>{0, 1, 2, 3},
std::vector<IndexT>{0, 1, 2, 3},
std::vector<CountT>{1, 1, 1, 1},
false,
1,
false,
},
{
ov::Shape{2, 4},
std::vector<ElemT>{1, 2, 2, 4, 1, 2, 2, 5},
std::vector<ElemT>{1, 2, 4, 1, 2, 5},
std::vector<IndexT>{0, 1, 3},
std::vector<IndexT>{0, 1, 1, 2},
std::vector<CountT>{1, 2, 1},
false,
1,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6},
std::vector<ElemT>{1, 2, 3, 4, 5, 6},
std::vector<IndexT>{0},
std::vector<IndexT>{0, 0},
std::vector<CountT>{2},
false,
0,
false,
},
{
ov::Shape{2, 3, 2},
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
std::vector<IndexT>{0, 1},
std::vector<IndexT>{0, 1},
std::vector<CountT>{1, 1},
false,
0,
true,
},
{
ov::Shape{2, 3, 2},
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
std::vector<ElemT>{-3, -2, -5, 4, -3, 2, 3, -4, 1, 2, -1, 4},
std::vector<IndexT>{0, 1},
std::vector<IndexT>{0, 1},
std::vector<CountT>{1, 1},
false,
0,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{6, 5, 4, 6, 5, 4, 3, 2, 1, 3, 2, 1},
std::vector<ElemT>{6, 5, 4, 3, 2, 1},
std::vector<IndexT>{0},
std::vector<IndexT>{0, 0},
std::vector<CountT>{2},
false,
1,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{-1, 2, -1, 5, -3, 5, 7, -8, 7, 4, 4, 4},
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
std::vector<IndexT>{0, 1},
std::vector<IndexT>{0, 1, 0},
std::vector<CountT>{2, 1},
false,
2,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{-1, -1, 2, 5, 5, -3, 7, 7, -8, 4, 4, 4},
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
std::vector<IndexT>{0, 2},
std::vector<IndexT>{0, 0, 1},
std::vector<CountT>{2, 1},
false,
2,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{2, -1, -1, -3, 5, 5, -8, 7, 7, 4, 4, 4},
std::vector<ElemT>{2, -1, -3, 5, -8, 7, 4, 4},
std::vector<IndexT>{0, 1},
std::vector<IndexT>{0, 1, 1},
std::vector<CountT>{1, 2},
false,
2,
false,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{2, -1, -1, -3, 5, 5, -8, 7, 7, 4, 4, 4},
std::vector<ElemT>{-1, 2, 5, -3, 7, -8, 4, 4},
std::vector<IndexT>{1, 0},
std::vector<IndexT>{1, 0, 0},
std::vector<CountT>{2, 1},
false,
2,
true,
},
{
ov::Shape{2, 2, 3},
std::vector<ElemT>{-1, -1, -1, 3, 2, 2, 6, 7, 7, 4, 4, 4},
std::vector<ElemT>{-1, -1, 2, 3, 7, 6, 4, 4},
std::vector<IndexT>{1, 0},
std::vector<IndexT>{1, 0, 0},
std::vector<CountT>{2, 1},
false,
2,
true,
},
{
ov::Shape{1, 3, 16},
std::vector<ElemT>{15, -20, -11, 10, -21, 8, -15, -10, 7, 20, -19, -14, -13, -16, -7, -2,
-17, -4, 21, -6, 11, 8, 17, 6, 7, 20, -3, 2, -13, -16, -23, 14,
-1, 12, 5, -6, 11, -8, 1, -10, 23, 20, -19, 18, 3, -16, -7, 14},
std::vector<ElemT>{-23, -21, -20, -19, -17, -16, -15, -14, -13, -11, -10, -8, -7, -6, -4, -3, -2, -1,
1, 2, 3, 5, 6, 7, 8, 10, 11, 12, 14, 15, 17, 18, 20, 21, 23},
std::vector<IndexT>{30, 4, 1, 10, 16, 13, 6, 11, 12, 2, 7, 37, 14, 19, 17, 26, 15, 32,
38, 27, 44, 34, 23, 8, 5, 3, 20, 33, 31, 0, 22, 43, 9, 18, 40},
std::vector<IndexT>{29, 2, 9, 25, 1, 24, 6, 10, 23, 32, 3, 7, 8, 5, 12, 16,
4, 14, 33, 13, 26, 24, 30, 22, 23, 32, 15, 19, 8, 5, 0, 28,
17, 27, 21, 13, 26, 11, 18, 10, 34, 32, 3, 31, 20, 5, 12, 28},
std::vector<CountT>{1, 1, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 3, 1, 1},
true,
0,
true,
},
};
}
const std::vector<format::type> layout_formats = {format::bfyx, format::b_fs_yx_fsv16};
#define INSTANTIATE_UNIQUE_TEST_SUITE(elem_type, index_type, count_type) \
using unique_gpu_test_##elem_type##index_type##count_type = unique_gpu_test<elem_type, index_type, count_type>; \
TEST_P(unique_gpu_test_##elem_type##index_type##count_type, test) { \
ASSERT_NO_FATAL_FAILURE(test()); \
} \
INSTANTIATE_TEST_SUITE_P(smoke_unique_##elem_type##index_type##count_type, \
unique_gpu_test_##elem_type##index_type##count_type, \
testing::Combine(testing::ValuesIn(getUniqueParams<elem_type, index_type, count_type>()), \
testing::ValuesIn(layout_formats)), \
unique_gpu_test_##elem_type##index_type##count_type::PrintToStringParamName);
INSTANTIATE_UNIQUE_TEST_SUITE(float, int64_t, int32_t);
} // namespace