[GPU] NMSRotated-13 (#20411)

* Add Rotation support to primitive and kernel

* Add unit tests

* Add transformation for NMSRotated

* add single-layer tests

* Fix: angle value for the same box may have its sign changed several times passing through iterations of batch and class loops.

* fix review comments
This commit is contained in:
Oleksii Khovan 2023-10-24 21:36:02 +02:00 committed by GitHub
parent eb55360f10
commit 984e4dbf35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1164 additions and 28 deletions

View File

@ -21,6 +21,10 @@ public:
NonMaxSuppressionIEInternal() = default;
static constexpr int Rotation_None = 0;
static constexpr int Rotation_Clockwise = 1;
static constexpr int Rotation_Counterclockwise = 2;
NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
const Output<Node>& max_output_boxes_per_class,
@ -29,7 +33,8 @@ public:
int center_point_box,
bool sort_result_descending,
const element::Type& output_type = element::i64,
const element::Type& score_output_type = element::f32);
const element::Type& score_output_type = element::f32,
const int rotation = Rotation_None);
NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
@ -40,7 +45,8 @@ public:
int center_point_box,
bool sort_result_descending,
const element::Type& output_type = element::i64,
const element::Type& score_output_type = element::f32);
const element::Type& score_output_type = element::f32,
const int rotation = Rotation_None);
void validate_and_infer_types() override;
@ -52,6 +58,7 @@ public:
bool m_sort_result_descending = true;
element::Type m_output_type;
element::Type m_scores_output_type;
int m_rotation{Rotation_None};
private:
int64_t max_boxes_output_from_input() const;

View File

@ -0,0 +1,26 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "openvino/pass/graph_rewrite.hpp"
#include "transformations_visibility.hpp"
namespace ov {
namespace pass {
class TRANSFORMATIONS_API ConvertNMSRotatedToNMSIEInternal;
} // namespace pass
} // namespace ov
class ov::pass::ConvertNMSRotatedToNMSIEInternal : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("ConvertNMSRotatedToNMSIEInternal", "0");
ConvertNMSRotatedToNMSIEInternal();
};

View File

@ -20,12 +20,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out
int center_point_box,
bool sort_result_descending,
const ov::element::Type& output_type,
const ov::element::Type& score_output_type)
const ov::element::Type& score_output_type,
const int rotation)
: Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}),
m_center_point_box(center_point_box),
m_sort_result_descending(sort_result_descending),
m_output_type(output_type),
m_scores_output_type(score_output_type) {
m_scores_output_type(score_output_type),
m_rotation(rotation) {
constructor_validate_and_infer_types();
}
@ -38,12 +40,14 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out
int center_point_box,
bool sort_result_descending,
const ov::element::Type& output_type,
const ov::element::Type& score_output_type)
const ov::element::Type& score_output_type,
const int rotation)
: Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}),
m_center_point_box(center_point_box),
m_sort_result_descending(sort_result_descending),
m_output_type(output_type),
m_scores_output_type(score_output_type) {
m_scores_output_type(score_output_type),
m_rotation{rotation} {
constructor_validate_and_infer_types();
}
@ -59,7 +63,9 @@ std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_
new_args.at(5),
m_center_point_box,
m_sort_result_descending,
m_output_type);
m_output_type,
m_scores_output_type,
m_rotation);
} else if (new_args.size() == 5) {
return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0),
new_args.at(1),
@ -68,7 +74,9 @@ std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_
new_args.at(4),
m_center_point_box,
m_sort_result_descending,
m_output_type);
m_output_type,
m_scores_output_type,
m_rotation);
}
OPENVINO_THROW("Unsupported number of inputs: " + std::to_string(new_args.size()));
}
@ -79,6 +87,7 @@ bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisito
visitor.on_attribute("sort_result_descending", m_sort_result_descending);
visitor.on_attribute("output_type", m_output_type);
visitor.on_attribute("score_output_type", m_scores_output_type);
visitor.on_attribute("rotation", m_rotation);
return true;
}

View File

@ -0,0 +1,109 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp"
#include <memory>
#include <vector>
#include "itt.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/non_max_suppression.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ov_ops/nms_ie_internal.hpp"
#include "transformations/utils/utils.hpp"
ov::pass::ConvertNMSRotatedToNMSIEInternal::ConvertNMSRotatedToNMSIEInternal() {
MATCHER_SCOPE(ConvertNMSRotatedToNMSIEInternal);
auto nms = ov::pass::pattern::wrap_type<ov::op::v13::NMSRotated>();
matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto nms_rotated = std::dynamic_pointer_cast<ov::op::v13::NMSRotated>(m.get_match_root());
if (!nms_rotated || transformation_callback(nms_rotated)) {
return false;
}
const auto new_args = nms_rotated->input_values();
const std::size_t num_of_inputs = new_args.size();
OPENVINO_ASSERT(num_of_inputs == 5);
const auto& max_per_class = new_args.at(2);
const auto& iou_threshold = new_args.at(3);
const auto& score_threshold = new_args.at(4);
// vector of new openvino operations
NodeVector new_ops;
auto one_dim_shape = Shape{1};
Output<Node> new_max_per_class;
Output<Node> new_iou_threshold;
Output<Node> new_score_threshold;
Output<Node> new_soft_nms_sigma;
Output<Node> new_shape_for_max_per_class = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_iou_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_score_threshold = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_soft_nms_sigma = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {1});
new_max_per_class = std::make_shared<ov::op::v1::Reshape>(max_per_class, new_shape_for_max_per_class, true);
new_ops.emplace_back(new_max_per_class.get_node_shared_ptr());
new_iou_threshold = std::make_shared<ov::op::v1::Reshape>(iou_threshold, new_shape_for_iou_threshold, true);
new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr());
new_score_threshold =
std::make_shared<ov::op::v1::Reshape>(score_threshold, new_shape_for_score_threshold, true);
new_ops.emplace_back(new_score_threshold.get_node_shared_ptr());
constexpr int BoxEncodingType_Center = 1; // see NonMaxSuppression::BoxEncodingType
const int center_point_box = BoxEncodingType_Center; // for NMSRotated is it always Center
const auto rotation = nms_rotated->get_clockwise()
? op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise
: op::internal::NonMaxSuppressionIEInternal::Rotation_Counterclockwise;
std::shared_ptr<op::internal::NonMaxSuppressionIEInternal> nms_legacy{nullptr};
nms_legacy =
std::make_shared<op::internal::NonMaxSuppressionIEInternal>(new_args.at(0),
new_args.at(1),
new_max_per_class,
new_iou_threshold,
new_score_threshold,
center_point_box,
nms_rotated->get_sort_result_descending(),
element::i32,
nms_rotated->get_output_element_type(1),
rotation);
new_ops.push_back(nms_legacy);
Output<Node> output_0 = nms_legacy->output(0);
if (nms_rotated->output(0).get_element_type() != output_0.get_element_type()) {
output_0 = std::make_shared<ov::op::v0::Convert>(output_0, nms_rotated->output(0).get_element_type());
output_0.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(0)));
new_ops.emplace_back(output_0.get_node_shared_ptr());
}
Output<Node> output_2 = nms_legacy->output(2);
if (nms_rotated->output(2).get_element_type() != output_2.get_element_type()) {
output_2 = std::make_shared<ov::op::v0::Convert>(output_2, nms_rotated->output(2).get_element_type());
output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms_rotated->output(2)));
new_ops.emplace_back(output_2.get_node_shared_ptr());
}
nms_legacy->set_friendly_name(nms_rotated->get_friendly_name());
ov::copy_runtime_info(nms_rotated, new_ops);
ov::replace_node(nms_rotated, {output_0, nms_legacy->output(1), output_2});
return true;
};
auto m = std::make_shared<ov::pass::pattern::Matcher>(nms, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -127,7 +127,7 @@ void nms_rotated(const float* boxes_data,
for (int64_t batch = 0; batch < num_batches; batch++) {
const float* boxesPtr = boxes_data + batch * num_boxes * 5;
RotatedBox* r = reinterpret_cast<RotatedBox*>(const_cast<float*>(boxesPtr));
const RotatedBox* r = reinterpret_cast<const RotatedBox*>(boxesPtr);
for (int64_t class_idx = 0; class_idx < num_classes; class_idx++) {
const float* scoresPtr = scores_data + batch * (num_classes * num_boxes) + class_idx * num_boxes;
@ -137,11 +137,11 @@ void nms_rotated(const float* boxes_data,
for (int64_t box_idx = 0; box_idx < num_boxes; box_idx++) {
if (scoresPtr[box_idx] > score_threshold) {
candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx);
// Convert counterclockwise to clockwise
if (!clockwise) {
r[box_idx].a *= -1;
candidate_boxes.back().box.a *= -1.f;
}
candidate_boxes.emplace_back(r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx);
}
}

View File

@ -17,6 +17,12 @@ namespace cldnn {
struct non_max_suppression : public primitive_base<non_max_suppression> {
CLDNN_DECLARE_PRIMITIVE(non_max_suppression)
enum Rotation {
NONE,
CLOCKWISE,
COUNTERCLOCKWISE
};
non_max_suppression() : primitive_base("", {}),
selected_indices_num(0),
center_point_box(false),
@ -68,6 +74,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
primitive_id soft_nms_sigma;
primitive_id second_output;
primitive_id third_output;
Rotation rotation{Rotation::NONE};
size_t hash() const override {
size_t seed = primitive::hash();
@ -79,6 +86,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
seed = hash_combine(seed, soft_nms_sigma.empty());
seed = hash_combine(seed, second_output.empty());
seed = hash_combine(seed, third_output.empty());
seed = hash_combine(seed, rotation);
return seed;
}
@ -97,7 +105,8 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
cmp_fields(score_threshold.empty()) &&
cmp_fields(soft_nms_sigma.empty()) &&
cmp_fields(second_output.empty()) &&
cmp_fields(third_output.empty());
cmp_fields(third_output.empty()) &&
cmp_fields(rotation);
#undef cmp_fields
}
@ -130,6 +139,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
ob << soft_nms_sigma;
ob << second_output;
ob << third_output;
ob << make_data(&rotation, sizeof(rotation));
}
void load(BinaryInputBuffer& ib) override {
@ -143,6 +153,7 @@ struct non_max_suppression : public primitive_base<non_max_suppression> {
ib >> soft_nms_sigma;
ib >> second_output;
ib >> third_output;
ib >> make_data(&rotation, sizeof(rotation));
}
};
} // namespace cldnn

View File

@ -143,6 +143,17 @@ public:
params.sort_result_descending = primitive->sort_result_descending;
params.box_encoding = primitive->center_point_box ? kernel_selector::BoxEncodingType::BOX_ENCODING_CENTER
: kernel_selector::BoxEncodingType::BOX_ENCODING_CORNER;
switch (primitive->rotation) {
case non_max_suppression::Rotation::CLOCKWISE:
params.rotation = kernel_selector::NMSRotationType::CLOCKWISE;
break;
case non_max_suppression::Rotation::COUNTERCLOCKWISE:
params.rotation = kernel_selector::NMSRotationType::COUNTERCLOCKWISE;
break;
default:
params.rotation = kernel_selector::NMSRotationType::NONE;
}
if (impl_param.get_program().get_node(primitive->id).is_dynamic()) {
params.reuse_internal_buffer = true;
}

View File

@ -1484,17 +1484,21 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) {
preferred_impl = impl_types::ocl;
} else {
auto& nms_node = node.as<non_max_suppression>();
auto scores_layout = nms_node.input_scores().get_output_layout();
if (scores_layout.is_dynamic()) {
const auto& nms_node = node.as<non_max_suppression>();
if (nms_node.get_primitive()->rotation != non_max_suppression::Rotation::NONE) {
preferred_impl = impl_types::ocl;
} else {
const size_t kBatchNum = scores_layout.batch();
const size_t kClassNum = scores_layout.feature();
const size_t kNStreams =
static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu;
const auto scores_layout = nms_node.input_scores().get_output_layout();
if (scores_layout.is_dynamic()) {
preferred_impl = impl_types::ocl;
} else {
const size_t kBatchNum = scores_layout.batch();
const size_t kClassNum = scores_layout.feature();
const size_t kNStreams =
static_cast<size_t>(node.get_program().get_config().get_property(ov::streams::num));
const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast<size_t>(8)) * kNStreams;
preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu;
}
}
}
} else if (node.is_type<reorder>()) {

View File

@ -62,7 +62,7 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const
boxes[INPUT0_GET_INDEX(batch, boxId, 2, 0)],
boxes[INPUT0_GET_INDEX(batch, boxId, 3, 0)]);
#if BOX_ENCODING == 0
#if !defined(ROTATION) && BOX_ENCODING == 0
const COORD_TYPE ax1 = min(coords[1], coords[3]);
const COORD_TYPE ax2 = max(coords[1], coords[3]);
const COORD_TYPE ay1 = min(coords[0], coords[2]);
@ -76,9 +76,331 @@ inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE *boxes, const
return coords;
}
#ifdef ROTATION
typedef struct {
float x, y;
} FUNC(Point2D);
#define POINT_2D FUNC(Point2D)
inline void FUNC(getRotatedVertices)(const COORD_TYPE_4 box, const INPUT0_TYPE angle, POINT_2D* pts) {
const float theta = angle
#if ROTATION == 2
* -1.0f
#endif
;
float cosTheta2 = cos(theta) * 0.5f;
float sinTheta2 = sin(theta) * 0.5f;
// y: top --> down; x: left --> right
// Left-Down
pts[0].x = box[0]/*.x_ctr*/ - sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/;
pts[0].y = box[1]/*.y_ctr*/ + cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/;
// Left-Top
pts[1].x = box[0]/*.x_ctr*/ + sinTheta2 * box[3]/*.h*/ - cosTheta2 * box[2]/*.w*/;
pts[1].y = box[1]/*.y_ctr*/ - cosTheta2 * box[3]/*.h*/ - sinTheta2 * box[2]/*.w*/;
// Right-Top
pts[2].x = 2 * box[0]/*.x_ctr*/ - pts[0].x;
pts[2].y = 2 * box[1]/*.y_ctr*/ - pts[0].y;
// Right-Down
pts[3].x = 2 * box[0]/*.x_ctr*/ - pts[1].x;
pts[3].y = 2 * box[1]/*.y_ctr*/ - pts[1].y;
}
inline float FUNC(dot2D)(const POINT_2D A, const POINT_2D B) {
return A.x * B.x + A.y * B.y;
}
inline float FUNC(cross2D)(const POINT_2D A, const POINT_2D B) {
return A.x * B.y - B.x * A.y;
}
inline int FUNC(getIntersectionPoints)(const POINT_2D* pts1, const POINT_2D* pts2, POINT_2D* intersections) {
// Line vector
// A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
POINT_2D vec1[4], vec2[4];
for (int i = 0; i < 4; i++) {
vec1[i].x = pts1[(i + 1) % 4].x - pts1[i].x;
vec1[i].y = pts1[(i + 1) % 4].y - pts1[i].y;
vec2[i].x = pts2[(i + 1) % 4].x - pts2[i].x;
vec2[i].y = pts2[(i + 1) % 4].y - pts2[i].y;
}
// Line test - test all line combos for intersection
int num = 0; // number of intersections
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
// Solve for 2x2 Ax=b
float det = FUNC_CALL(cross2D)(vec2[j], vec1[i]);
// This takes care of parallel lines
if (fabs(det) <= 1e-14f) {
continue;
}
POINT_2D vec12;
vec12.x= pts2[j].x - pts1[i].x;
vec12.y= pts2[j].y - pts1[i].y;
float t1 = FUNC_CALL(cross2D)(vec2[j], vec12) / det;
float t2 = FUNC_CALL(cross2D)(vec1[i], vec12) / det;
if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
intersections[num].x = pts1[i].x + vec1[i].x * t1;
intersections[num].y = pts1[i].y + vec1[i].y * t1;
++num;
}
}
}
// Check for vertices of rect1 inside rect2
{
const POINT_2D AB = vec2[0];
const POINT_2D DA = vec2[3];
float ABdotAB = FUNC_CALL(dot2D)(AB, AB);
float ADdotAD = FUNC_CALL(dot2D)(DA, DA);
for (int i = 0; i < 4; i++) {
// assume ABCD is the rectangle, and P is the point to be judged
// P is inside ABCD iff. P's projection on AB lies within AB
// and P's projection on AD lies within AD
POINT_2D AP;
AP.x = pts1[i].x - pts2[0].x;
AP.y = pts1[i].y - pts2[0].y;
float APdotAB = FUNC_CALL(dot2D)(AP, AB);
float APdotAD = -FUNC_CALL(dot2D)(AP, DA);
if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) {
intersections[num].x = pts1[i].x;
intersections[num].y = pts1[i].y;
++num;
}
}
}
// Reverse the check - check for vertices of rect2 inside rect1
{
const POINT_2D AB = vec1[0];
const POINT_2D DA = vec1[3];
float ABdotAB = FUNC_CALL(dot2D)(AB, AB);
float ADdotAD = FUNC_CALL(dot2D)(DA, DA);
for (int i = 0; i < 4; i++) {
POINT_2D AP;
AP.x = pts2[i].x - pts1[0].x;
AP.y = pts2[i].y - pts1[0].y;
float APdotAB = FUNC_CALL(dot2D)(AP, AB);
float APdotAD = -FUNC_CALL(dot2D)(AP, DA);
if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= ADdotAD)) {
intersections[num].x = pts2[i].x;
intersections[num].y = pts2[i].y;
++num;
}
}
}
return num;
}
inline void FUNC(swapPoints)(POINT_2D* a, POINT_2D* b)
{
POINT_2D temp = *a;
*a = *b;
*b = temp;
}
inline void FUNC(sortPoints)(POINT_2D* arr, int l, int h)
{
for (int i = 0; i < h-l; i++) {
bool swapped = false;
for (int j = l; j < h-i; j++) {
bool is_less = false;
const float temp = FUNC_CALL(cross2D)(arr[j], arr[j+1]);
if (fabs(temp) < 1e-6f) {
is_less = FUNC_CALL(dot2D)(arr[j], arr[j]) < FUNC_CALL(dot2D)(arr[j+1], arr[j+1]);
} else {
is_less = temp > 0;
}
if (is_less) {
continue;
}
FUNC_CALL(swapPoints)(&arr[j], &arr[j+1]);
swapped = true;
}
if (!swapped) {
break;
}
}
}
inline int FUNC(convex_hull_graham)(const POINT_2D* p, const int num_in, POINT_2D* q, bool shift_to_zero) {
if (num_in < 2) {
return -1;
}
// Step 1:
// Find point with minimum y
// if more than 1 points have the same minimum y,
// pick the one with the minimum x.
int t = 0;
for (int i = 1; i < num_in; i++) {
if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
t = i;
}
}
const POINT_2D start = p[t]; // starting point
// Step 2:
// Subtract starting point from every points (for sorting in the next step)
for (int i = 0; i < num_in; i++) {
q[i].x = p[i].x - start.x;
q[i].y = p[i].y - start.y;
}
// Swap the starting point to position 0
FUNC_CALL(swapPoints)(&q[t], &q[0]);
// Step 3:
// Sort point 1 ~ num_in according to their relative cross-product values
// (essentially sorting according to angles)
// If the angles are the same, sort according to their distance to origin
float dist[24];
for (int i = 0; i < num_in; i++) {
dist[i] = FUNC_CALL(dot2D)(q[i], q[i]);
}
FUNC_CALL(sortPoints)(q, 1, num_in - 1);
// compute distance to origin after sort, since the points are now different.
for (int i = 0; i < num_in; i++) {
dist[i] = FUNC_CALL(dot2D)(q[i], q[i]);
}
// Step 4:
// Make sure there are at least 2 points (that don't overlap with each other)
// in the stack
int k; // index of the non-overlapped second point
for (k = 1; k < num_in; k++) {
if (dist[k] > 1e-8f) {
break;
}
}
if (k == num_in) {
// We reach the end, which means the convex hull is just one point
q[0].x = p[t].x;
q[0].y = p[t].y;
return 1;
}
q[1].x = q[k].x;
q[1].y = q[k].y;
int m = 2; // 2 points in the stack
// Step 5:
// Finally we can start the scanning process.
// When a non-convex relationship between the 3 points is found
// (either concave shape or duplicated points),
// we pop the previous point from the stack
// until the 3-point relationship is convex again, or
// until the stack only contains two points
for (int i = k + 1; i < num_in; i++) {
POINT_2D diff1, diff2;
diff1.x = q[i].x - q[m - 2].x;
diff1.y = q[i].y - q[m - 2].y;
diff2.x = q[m - 1].x - q[m - 2].x;
diff2.y = q[m - 1].y - q[m - 2].y;
float cross2d_diff = FUNC_CALL(cross2D)(diff1, diff2);
while (m > 1 && cross2d_diff >= 0) {
m--;
}
q[m].x = q[i].x;
q[m].y = q[i].y;
++m;
}
// Step 6 (Optional):
// In general sense we need the original coordinates, so we
// need to shift the points back (reverting Step 2)
// But if we're only interested in getting the area/perimeter of the shape
// We can simply return.
if (!shift_to_zero) {
for (int i = 0; i < m; i++) {
q[i].x += start.x;
q[i].y += start.y;
}
}
return m;
}
inline float FUNC(polygon_area)(const POINT_2D* q, const int m) {
if (m <= 2) {
return 0.f;
}
float area = 0.f;
for (int i = 1; i < m - 1; i++) {
POINT_2D diff1, diff2;
diff1.x = q[i].x - q[0].x;
diff1.y = q[i].y - q[0].y;
diff2.x = q[i + 1].x - q[0].x;
diff2.y = q[i + 1].y - q[0].y;
float cross_result = FUNC_CALL(cross2D)(diff1, diff2);
area += fabs(cross_result);
}
return area / 2.0f;
}
inline float FUNC(rotatedBoxesIntersection)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA,
const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB) {
// There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
// from get_intersection_points
POINT_2D intersectPts[24], orderedPts[24];
POINT_2D pts1[4];
POINT_2D pts2[4];
FUNC_CALL(getRotatedVertices)(boxA, angleA, pts1);
FUNC_CALL(getRotatedVertices)(boxB, angleB, pts2);
// Find points defining area of the boxes intersection
int num = FUNC_CALL(getIntersectionPoints)(pts1, pts2, intersectPts);
if (num <= 2) {
return 0.f;
}
// Convex Hull to order the intersection points in clockwise order and find
// the contour area.
int num_convex = FUNC_CALL(convex_hull_graham)(intersectPts, num, orderedPts, true);
return FUNC_CALL(polygon_area)(orderedPts, num_convex);
}
inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const INPUT0_TYPE angleA,
const COORD_TYPE_4 boxB, const INPUT0_TYPE angleB)
{
const float areaA = convert_float(boxA[3]) * convert_float(boxA[2]);
const float areaB = convert_float(boxB[3]) * convert_float(boxB[2]);
if (areaA <= 0.0f || areaB <= 0.0f)
return 0.0f;
const float intersection_area = FUNC_CALL(rotatedBoxesIntersection)(boxA, angleA, boxB, angleB);
const float union_area = areaA + areaB - intersection_area;
return intersection_area / union_area;
}
#else
inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TYPE_4 boxB)
{
#if BOX_ENCODING == 0
#if !defined(ROTATION) && BOX_ENCODING == 0
/// CORNER
const float areaA = convert_float(boxA[3] - boxA[1]) * convert_float(boxA[2] - boxA[0]);
const float areaB = convert_float(boxB[3] - boxB[1]) * convert_float(boxB[2] - boxB[0]);
@ -110,6 +432,7 @@ inline float FUNC(intersectionOverUnion)(const COORD_TYPE_4 boxA, const COORD_TY
const float union_area = areaA + areaB - intersection_area;
return intersection_area / union_area;
}
#endif // ROTATION
inline float FUNC(scaleIOU)(float iou, float iou_threshold, float scale)
{
@ -240,6 +563,16 @@ inline void FUNC(swap)(__global BOX_INFO* a, __global BOX_INFO* b)
*b = temp;
}
#ifdef ROTATION
inline void FUNC(reverseOutputBoxList)(__global BOX_INFO *outBoxes, int boxNum)
{
for (int i = 0; i < boxNum / 2; ++i) {
FUNC_CALL(swap)(&outBoxes[i], &outBoxes[boxNum - 1 - i]);
}
}
#else
inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNum)
{
for (int i = 0; i < boxNum - 1; ++i) {
@ -261,6 +594,7 @@ inline void FUNC(sortOutputBoxList)(__global BOX_INFO *outSortedBoxes, int boxNu
break;
}
}
#endif // ROTATION
#ifdef NMS_STAGE_0
@ -427,9 +761,11 @@ KERNEL (non_max_suppression_ref_stage_2)(
const ushort classId = get_global_id(1);
float scale = 0.0f;
#ifndef ROTATION
if (SOFT_NMS_SIGMA_VAL > 0.0f) {
scale = -0.5f / SOFT_NMS_SIGMA_VAL;
}
#endif
__global SBOX_INFO *sortedBoxList = (__global SBOX_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE];
const int kSortedBoxNum = buffer2[batchId * NUM_CLASSES + classId];
@ -442,12 +778,22 @@ KERNEL (non_max_suppression_ref_stage_2)(
SBOX_INFO next_candidate = sortedBoxList[i];
INPUT1_TYPE original_score = next_candidate.score;
const COORD_TYPE_4 next_candidate_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, next_candidate.boxId);
#ifdef ROTATION
const INPUT0_TYPE next_candidate_angle = boxes[INPUT0_GET_INDEX(batchId, next_candidate.boxId, 4, 0)];
#endif
++i;
bool should_hard_suppress = false;
for (int j = selectedBoxNum - 1; j >= next_candidate.suppress_begin_index; --j) {
const COORD_TYPE_4 selected_box_coord = FUNC_CALL(getBoxCoords)(boxes, batchId, selectedBoxList[j].boxId);
#ifdef ROTATION
const INPUT0_TYPE selected_box_angle = boxes[INPUT0_GET_INDEX(batchId, selectedBoxList[j].boxId, 4, 0)];
const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, next_candidate_angle,
selected_box_coord, selected_box_angle);
#else
const float iou = FUNC_CALL(intersectionOverUnion)(next_candidate_coord, selected_box_coord);
#endif
next_candidate.score *= FUNC_CALL(scaleIOU)(iou, IOU_THRESHOLD_VAL, scale);
if (iou >= IOU_THRESHOLD_VAL && !(SOFT_NMS_SIGMA_VAL > 0.0f)) {
@ -531,7 +877,11 @@ KERNEL (non_max_suppression_ref_stage_3)(
}
#if SORT_RESULT_DESCENDING == 1
#ifdef ROTATION
FUNC_CALL(reverseOutputBoxList)(sortedBoxList, outputIdx);
#else
FUNC_CALL(sortOutputBoxList)(sortedBoxList, outputIdx);
#endif
#endif
unroll_for (int i = 0; i < outputIdx; i++) {

View File

@ -570,6 +570,15 @@ enum class BoxEncodingType {
BOX_ENCODING_CENTER,
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// NMSRotationType
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
enum class NMSRotationType {
NONE,
CLOCKWISE,
COUNTERCLOCKWISE
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// ConvertColor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -149,11 +149,17 @@ JitConstants NonMaxSuppressionKernelRef::GetJitConstants(const non_max_suppressi
jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD_VAL", params.score_threshold));
}
if (params.soft_nms_sigma_type == base_params::ArgType::Input) {
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma())));
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])"));
if (params.rotation == NMSRotationType::NONE) {
if (params.soft_nms_sigma_type == base_params::ArgType::Input) {
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_TYPE", GetInputTypeStr(params.GetIndexSoftNmsSigma())));
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", "convert_float(soft_nms_sigma[0])"));
} else {
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma));
}
} else {
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", params.soft_nms_sigma));
jit.AddConstant(MakeJitConstant("ROTATION", static_cast<int>(params.rotation)));
// for NMSRotated it is always zero
jit.AddConstant(MakeJitConstant("SOFT_NMS_SIGMA_VAL", 0.0f));
}
if (params.has_second_output) {

View File

@ -36,6 +36,7 @@ struct non_max_suppression_params : public base_params {
bool has_third_output;
bool use_multiple_outputs;
bool reuse_internal_buffer = false;
NMSRotationType rotation = NMSRotationType::NONE;
uint32_t GetIndexNumSelectPerClass() const {
uint32_t input_idx = 2;

View File

@ -17,7 +17,18 @@ namespace ov {
namespace intel_gpu {
static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::shared_ptr<ov::op::internal::NonMaxSuppressionIEInternal>& op) {
validate_inputs_count(op, {2, 3, 4, 5, 6});
cldnn::non_max_suppression::Rotation rotation = cldnn::non_max_suppression::Rotation::NONE;
const bool is_nms_rotated = op->m_rotation != ov::op::internal::NonMaxSuppressionIEInternal::Rotation_None;
if (is_nms_rotated) {
// For NMSRotated threshold inputs are mandatory, and soft_nms_sigma input is absent
validate_inputs_count(op, {5});
rotation = op->m_rotation == ov::op::internal::NonMaxSuppressionIEInternal::Rotation_Clockwise ?
cldnn::non_max_suppression::Rotation::CLOCKWISE
: cldnn::non_max_suppression::Rotation::COUNTERCLOCKWISE;
} else {
validate_inputs_count(op, {2, 3, 4, 5, 6});
}
auto inputs = p.GetInputInfo(op);
std::vector<cldnn::input_info> reordered_inputs;
reordered_inputs.resize(inputs.size());
@ -75,6 +86,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh
prim.output_paddings = get_output_paddings();
prim.output_data_types = get_output_data_types();
prim.rotation = rotation;
switch (reordered_inputs.size()) {
case 6: prim.soft_nms_sigma = reordered_inputs[5].pid;
@ -142,6 +154,7 @@ static void CreateNonMaxSuppressionIEInternalOp(ProgramBuilder& p, const std::sh
"", "", "", "", "", "");
prim.output_data_types = get_output_data_types();
prim.rotation = rotation;
switch (reordered_inputs.size()) {
case 6: prim.soft_nms_sigma = reordered_inputs[5].pid;

View File

@ -88,6 +88,7 @@
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
#include "transformations/op_conversions/convert_nms_rotated_to_nms_ie_internal.hpp"
#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
#include "transformations/op_conversions/convert_gather_downgrade.hpp"
@ -272,6 +273,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<ov::pass::ConvertNMSRotatedToNMSIEInternal>();
manager.register_pass<ov::pass::ConvertGP9ToGPIEInternal>();
manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
manager.register_pass<ov::pass::ConvertGather0D>();

View File

@ -0,0 +1,40 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/nms_rotated.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace InferenceEngine;
using namespace ngraph;
const std::vector<InputShapeParams> inShapeParams = {
InputShapeParams{2, 50, 50},
InputShapeParams {9, 10, 10}
};
const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
const std::vector<float> threshold = {0.3f, 0.7f};
const std::vector<bool> sortResDesc = {true, false};
const std::vector<element::Type> outType = {element::i32, element::i64};
const std::vector<bool> clockwise = {true, false};
const std::vector<Precision> inputPrecisions = {Precision::FP32, Precision::FP16};
INSTANTIATE_TEST_SUITE_P(smoke_NmsRotatedLayerTest,
NmsRotatedLayerTest,
::testing::Combine(::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::ValuesIn(inputPrecisions),
::testing::Values(Precision::I32),
::testing::Values(Precision::FP32)),
::testing::ValuesIn(maxOutBoxPerClass),
::testing::ValuesIn(threshold),
::testing::ValuesIn(threshold),
::testing::ValuesIn(sortResDesc),
::testing::ValuesIn(outType),
::testing::ValuesIn(clockwise),
::testing::Values(ov::test::utils::DEVICE_GPU)),
NmsRotatedLayerTest::getTestCaseName);

View File

@ -709,3 +709,250 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma_cached) {
TYPED_TEST(non_max_suppression_basic, multiple_outputs_cached) {
this->test_multiple_outputs(true);
}
namespace {
template<typename T, typename T_IND>
struct NmsRotatedParams {
std::string test_name;
int num_batches;
int num_boxes;
int num_classes;
std::vector<T> boxes;
std::vector<T> scores;
int max_output_boxes_per_class;
float iou_threshold;
float score_threshold;
bool sort_result_descending;
bool clockwise;
std::vector<T_IND> expected_indices;
std::vector<T> expected_scores;
};
template <typename T> float getError();
template<>
float getError<float>() {
return 0.001;
}
template<>
float getError<ov::float16>() {
return 0.1;
}
template<typename T, typename T_IND>
struct nms_rotated_test : public ::testing::TestWithParam<NmsRotatedParams<T, T_IND>> {
public:
void test(bool is_caching_test = false
) {
const NmsRotatedParams<T, T_IND> param = testing::TestWithParam<NmsRotatedParams<T, T_IND>>::GetParam();
const auto data_type = ov::element::from<T>();
auto& engine = tests::get_test_engine();
const auto boxes_layout = layout(ov::PartialShape{param.num_batches, param.num_boxes, 5}, data_type,
format::bfyx);
const auto scores_layout = layout(ov::PartialShape{param.num_batches, param.num_classes, param.num_boxes},
data_type, format::bfyx);
const int selected_indices_num = param.num_batches * param.num_classes * param.num_boxes;
const auto selected_scores_layout = layout(ov::PartialShape{selected_indices_num/*expected_indices_count*/, 3},
data_type, format::bfyx);
const auto valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, format::bfyx);
const auto boxes_mem = engine.allocate_memory(boxes_layout);
tests::set_values(boxes_mem, param.boxes);
const auto scores_mem = engine.allocate_memory(scores_layout);
tests::set_values(scores_mem, param.scores);
const auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
tests::set_values(num_per_class_mem, {1.f * param.max_output_boxes_per_class});
const auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
tests::set_values(iou_threshold_mem, {param.iou_threshold});
const auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1))));
tests::set_values(score_threshold_mem, {param.score_threshold});
const auto selected_scores_mem = engine.allocate_memory(selected_scores_layout);
const auto valid_outputs_mem = engine.allocate_memory(valid_outputs_layout);
topology topo;
topo.add(input_layout("boxes", boxes_layout));
topo.add(input_layout("scores", scores_layout));
topo.add(data("num_per_class", num_per_class_mem));
topo.add(data("iou_threshold", iou_threshold_mem));
topo.add(data("score_threshold", score_threshold_mem));
topo.add(mutable_data("selected_scores", selected_scores_mem));
topo.add(mutable_data("valid_outputs", valid_outputs_mem));
auto nms = non_max_suppression("nms",
input_info("boxes"),
input_info("scores"),
selected_indices_num,
false,
param.sort_result_descending,
"num_per_class",
"iou_threshold",
"score_threshold",
"",
"selected_scores",
"valid_outputs");
nms.rotation = param.clockwise ? non_max_suppression::Rotation::CLOCKWISE :
non_max_suppression::Rotation::COUNTERCLOCKWISE;
topo.add(nms);
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
net->set_input_data("boxes", boxes_mem);
net->set_input_data("scores", scores_mem);
const auto result = net->execute();
const auto indices_mem = result.at("nms").get_memory();
const cldnn::mem_lock<T_IND> indices_ptr(indices_mem, get_test_stream());
const cldnn::mem_lock<T> selected_scores_ptr(selected_scores_mem, get_test_stream());
const cldnn::mem_lock<int> valid_outputs_ptr(valid_outputs_mem, get_test_stream());
const auto expected_valid_outputs = param.expected_indices.size() / 3;
const size_t num_valid_outputs = static_cast<size_t>(valid_outputs_ptr[0]);
EXPECT_EQ(num_valid_outputs, expected_valid_outputs);
ASSERT_GE(indices_ptr.size(), param.expected_indices.size());
ASSERT_GE(selected_scores_ptr.size(), param.expected_scores.size());
for (size_t i = 0; i < indices_ptr.size(); ++i) {
if (i < num_valid_outputs * 3) {
EXPECT_EQ(param.expected_indices[i], indices_ptr[i]) << "at i = " << i;
EXPECT_NEAR(param.expected_scores[i], selected_scores_ptr[i], getError<T>()) << "at i = " << i;
} else {
EXPECT_EQ(indices_ptr[i], -1) << "at i = " << i;
EXPECT_NEAR(selected_scores_ptr[i], -1, getError<T>()) << "at i = " << i;
}
}
}
};
struct PrintToStringParamName {
template<class T, class T_IND>
std::string operator()(const testing::TestParamInfo<NmsRotatedParams<T, T_IND>>& info) {
const auto& p = info.param;
std::ostringstream result;
result << p.test_name << "_";
result << "DataType=" << ov::element::Type(ov::element::from<T>());
result << "_IndexType=" << ov::element::Type(ov::element::from<T_IND>());
return result.str();
}
};
using nms_rotated_test_f32_i32 = nms_rotated_test<float, int32_t>;
using nms_rotated_test_f16_i32 = nms_rotated_test<ov::float16, int32_t>;
TEST_P(nms_rotated_test_f32_i32, basic) {
ASSERT_NO_FATAL_FAILURE(test());
}
TEST_P(nms_rotated_test_f16_i32, basic) {
ASSERT_NO_FATAL_FAILURE(test());
}
template<typename T, typename T_IND>
std::vector<NmsRotatedParams<T, T_IND>> getNmsRotatedParams() {
const std::vector<NmsRotatedParams<T, T_IND>> params = {
{"basic",
1, 4, 1,
std::vector<T>{
7.0, 4.0, 8.0, 7.0, 0.5,
4.0, 7.0, 9.0, 11.0, 0.6,
4.0, 8.0, 10.0, 12.0, 0.3,
2.0, 5.0, 13.0, 7.0, 0.6},
std::vector<T>{0.65, 0.7, 0.55, 0.96},
5000, 0.5f, 0.0f, false, true,
std::vector<T_IND>{0, 0, 3, 0, 0, 1, 0, 0, 0},
std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7, 0.0, 0.0, 0.65},
},
{"max_out_2",
1, 4, 1,
std::vector<T>{
7.0, 4.0, 8.0, 7.0, 0.5,
4.0, 7.0, 9.0, 11.0, 0.6,
4.0, 8.0, 10.0, 12.0, 0.3,
2.0, 5.0, 13.0, 7.0, 0.6},
std::vector<T>{0.65, 0.7, 0.55, 0.96},
2, 0.5f, 0.0f, false, true,
std::vector<T_IND>{0, 0, 3, 0, 0, 1},
std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7},
},
{"score_thresold",
1, 4, 1,
std::vector<T>{
7.0, 4.0, 8.0, 7.0, 0.5,
4.0, 7.0, 9.0, 11.0, 0.6,
4.0, 8.0, 10.0, 12.0, 0.3,
2.0, 5.0, 13.0, 7.0, 0.6},
std::vector<T>{0.65, 0.7, 0.55, 0.96},
5000, 0.5f, 0.67f, false, true,
std::vector<T_IND>{0, 0, 3, 0, 0, 1},
std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.7},
},
{"iou_thresold_2",
1, 4, 1,
std::vector<T>{
7.0, 4.0, 8.0, 7.0, 0.5,
4.0, 7.0, 9.0, 11.0, 0.6,
4.0, 8.0, 10.0, 12.0, 0.3,
2.0, 5.0, 13.0, 7.0, 0.6},
std::vector<T>{0.65, 0.7, 0.55, 0.96},
5000, 0.3f, 0.0f, false, true,
std::vector<T_IND>{0, 0, 3, 0, 0, 0},
std::vector<T>{0.0, 0.0, 0.96, 0.0, 0.0, 0.65},
},
{"negative_cw",
1, 2, 1,
std::vector<T>{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0},
std::vector<T>{0.8, 0.7},
5000, 0.1f, 0.0f, false, true,
std::vector<T_IND>{0, 0, 0, 0, 0, 1},
std::vector<T>{0.0, 0.0, 0.8, 0.0, 0.0, 0.7}
},
{"negative_ccw",
1, 2, 1,
std::vector<T>{6.0, 34.0, 4.0, 8.0, -0.7854, 9.0, 32, 2.0, 4.0, 0.0},
std::vector<T>{0.8, 0.7},
5000, 0.1f, 0.0f, false, false,
std::vector<T_IND>{0, 0, 0},
std::vector<T>{0.0, 0.0, 0.8}
},
{"positive_ccw",
1, 2, 1,
std::vector<T>{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0},
std::vector<T>{0.8, 0.7},
5000, 0.1f, 0.0f, false, false,
std::vector<T_IND>{0, 0, 0, 0, 0, 1},
std::vector<T>{0.0, 0.0, 0.8, 0.0, 0.0, 0.7}
},
{"positive_cw",
1, 2, 1,
std::vector<T>{6.0, 34.0, 4.0, 8.0, 0.7854, 9.0, 32, 2.0, 4.0, 0.0},
std::vector<T>{0.8, 0.7},
5000, 0.1f, 0.0f, false, true,
std::vector<T_IND>{0, 0, 0},
std::vector<T>{0.0, 0.0, 0.8}
}
};
return params;
}
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
nms_rotated_test_f32_i32,
::testing::ValuesIn(getNmsRotatedParams<float, int32_t>()),
PrintToStringParamName());
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
nms_rotated_test_f16_i32,
::testing::ValuesIn(getNmsRotatedParams<ov::float16, int32_t>()),
PrintToStringParamName());
} // namespace

View File

@ -0,0 +1,15 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/single_layer/nms_rotated.hpp"
namespace LayerTestsDefinitions {
TEST_P(NmsRotatedLayerTest, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,46 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <tuple>
#include "shared_test_classes/base/layer_test_utils.hpp"
namespace LayerTestsDefinitions {
using InputShapeParams = std::tuple<size_t, // Number of batches
size_t, // Number of boxes
size_t>; // Number of classes
using InputPrecisions =
std::tuple<InferenceEngine::Precision, // boxes and scores precisions
InferenceEngine::Precision, // max_output_boxes_per_class precision
InferenceEngine::Precision>; // iou_threshold, score_threshold, soft_nms_sigma precisions
using NmsRotatedParams = std::tuple<InputShapeParams, // Params using to create 1st and 2nd inputs
InputPrecisions, // Input precisions
int32_t, // Max output boxes per class
float, // IOU threshold
float, // Score threshold
bool, // Sort result descending
ov::element::Type, // Output type
bool, // Clockwise
std::string>; // Device name
class NmsRotatedLayerTest : public testing::WithParamInterface<NmsRotatedParams>, virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj);
void GenerateInputs() override;
void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override;
protected:
void SetUp() override;
InputShapeParams inShapeParams;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,230 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/nms_rotated.hpp"
#include "openvino/op/nms_rotated.hpp"
#include <vector>
namespace LayerTestsDefinitions {
using namespace InferenceEngine;
using namespace FuncTestUtils::PrecisionUtils;
std::string NmsRotatedLayerTest::getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj) {
InputShapeParams inShapeParams;
InputPrecisions inPrecisions;
int32_t maxOutBoxesPerClass;
float iouThr, scoreThr;
bool sortResDescend, clockwise;
ov::element::Type outType;
std::string targetDevice;
std::tie(inShapeParams,
inPrecisions,
maxOutBoxesPerClass,
iouThr,
scoreThr,
sortResDescend,
outType,
clockwise,
targetDevice) = obj.param;
size_t numBatches, numBoxes, numClasses;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
Precision inputPrec, maxBoxPrec, thrPrec;
std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions;
std::ostringstream result;
result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
result << "inputPrec=" << inputPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
result << "maxOutBoxesPerClass=" << maxOutBoxesPerClass << "_";
result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_";
result << "sortResDescend=" << sortResDescend << "_outType=" << outType << "_";
result << "clockwise=" << clockwise << "_";
result << "TargetDevice=" << targetDevice;
return result.str();
}
void NmsRotatedLayerTest::GenerateInputs() {
size_t it = 0;
for (const auto& input : cnnNetwork.getInputsInfo()) {
const auto& info = input.second;
Blob::Ptr blob;
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
if (info->getTensorDesc().getPrecision() == Precision::FP32) {
ov::test::utils::fill_data_random_float<InferenceEngine::Precision::FP32>(blob, 1, 0, 1000);
} else {
ov::test::utils::fill_data_random_float<InferenceEngine::Precision::FP16>(blob, 1, 0, 1000);
}
} else {
blob = GenerateInput(*info);
}
inputs.push_back(blob);
it++;
}
}
void NmsRotatedLayerTest::Compare(
const std::vector<std::pair<ov::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) {
size_t num_batches, num_boxes, num_classes;
std::tie(num_batches, num_boxes, num_classes) = inShapeParams;
struct OutBox {
OutBox() = default;
OutBox(int32_t batchId, int32_t classId, int32_t boxId, float score) {
this->batchId = batchId;
this->classId = classId;
this->boxId = boxId;
this->score = score;
}
bool operator==(const OutBox& rhs) const {
return batchId == rhs.batchId && classId == rhs.classId && boxId == rhs.boxId;
}
int32_t batchId;
int32_t classId;
int32_t boxId;
float score;
};
std::vector<OutBox> expected;
{
const auto selected_indices_size = expectedOutputs[0].second.size() / expectedOutputs[0].first.size();
const auto selected_scores_size = expectedOutputs[1].second.size() / expectedOutputs[1].first.size();
ASSERT_EQ(selected_indices_size, selected_scores_size);
const auto boxes_count = selected_indices_size / 3;
expected.resize(boxes_count);
if (expectedOutputs[0].first.size() == 4) {
auto selected_indices_data = reinterpret_cast<const int32_t*>(expectedOutputs[0].second.data());
for (size_t i = 0; i < selected_indices_size; i += 3) {
expected[i / 3].batchId = selected_indices_data[i + 0];
expected[i / 3].classId = selected_indices_data[i + 1];
expected[i / 3].boxId = selected_indices_data[i + 2];
}
} else {
auto selected_indices_data = reinterpret_cast<const int64_t*>(expectedOutputs[0].second.data());
for (size_t i = 0; i < selected_indices_size; i += 3) {
expected[i / 3].batchId = static_cast<int32_t>(selected_indices_data[i + 0]);
expected[i / 3].classId = static_cast<int32_t>(selected_indices_data[i + 1]);
expected[i / 3].boxId = static_cast<int32_t>(selected_indices_data[i + 2]);
}
}
if (expectedOutputs[1].first.size() == 4) {
auto selected_scores_data = reinterpret_cast<const float*>(expectedOutputs[1].second.data());
for (size_t i = 0; i < selected_scores_size; i += 3) {
expected[i / 3].score = selected_scores_data[i + 2];
}
} else {
auto selected_scores_data = reinterpret_cast<const double*>(expectedOutputs[1].second.data());
for (size_t i = 0; i < selected_scores_size; i += 3) {
expected[i / 3].score = static_cast<float>(selected_scores_data[i + 2]);
}
}
}
std::vector<OutBox> actual;
{
const auto selected_indices_size = actualOutputs[0]->byteSize() / sizeof(float);
const auto selected_indices_memory = as<MemoryBlob>(actualOutputs[0]);
IE_ASSERT(selected_indices_memory);
const auto selected_indices_lockedMemory = selected_indices_memory->rmap();
const auto selected_indices_data = selected_indices_lockedMemory.as<const int32_t*>();
const auto selected_scores_memory = as<MemoryBlob>(actualOutputs[1]);
IE_ASSERT(selected_scores_memory);
const auto selected_scores_lockedMemory = selected_scores_memory->rmap();
const auto selected_scores_data = selected_scores_lockedMemory.as<const float*>();
for (size_t i = 0; i < selected_indices_size; i += 3) {
const int32_t batchId = selected_indices_data[i + 0];
const int32_t classId = selected_indices_data[i + 1];
const int32_t boxId = selected_indices_data[i + 2];
const float score = selected_scores_data[i + 2];
if (batchId == -1 || classId == -1 || boxId == -1)
break;
actual.emplace_back(batchId, classId, boxId, score);
}
}
ASSERT_EQ(expected.size(), actual.size());
for (size_t i = 0; i < expected.size(); ++i) {
ASSERT_EQ(expected[i], actual[i]) << ", i=" << i;
ASSERT_NEAR(expected[i].score, actual[i].score, abs_threshold) << ", i=" << i;
}
}
void NmsRotatedLayerTest::SetUp() {
InputPrecisions inPrecisions;
size_t maxOutBoxesPerClass;
float iouThr, scoreThr;
bool sortResDescend, clockwise;
ov::element::Type outType;
std::tie(inShapeParams,
inPrecisions,
maxOutBoxesPerClass,
iouThr,
scoreThr,
sortResDescend,
outType,
clockwise,
targetDevice) = this->GetParam();
size_t numBatches, numBoxes, numClasses;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
Precision inputPrec, maxBoxPrec, thrPrec;
std::tie(inputPrec, maxBoxPrec, thrPrec) = inPrecisions;
if (inputPrec == Precision::FP16) {
abs_threshold = 0.1;
} else {
abs_threshold = std::numeric_limits<float>::epsilon();
}
ov::ParameterVector params;
const std::vector<size_t> boxesShape{numBatches, numBoxes, 5}, scoresShape{numBatches, numClasses, numBoxes};
const auto ngPrc = convertIE2nGraphPrc(inputPrec);
const auto boxesNode = std::make_shared<ov::op::v0::Parameter>(ngPrc, ov::Shape(boxesShape));
params.push_back(boxesNode);
const auto scoresNode = std::make_shared<ov::op::v0::Parameter>(ngPrc, ov::Shape(scoresShape));
params.push_back(scoresNode);
const auto maxOutputBoxesPerClassNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::u32,
ov::Shape{},
std::vector<size_t>{maxOutBoxesPerClass});
const auto iouThresholdNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::f32,
ov::Shape{},
std::vector<float>{iouThr});
const auto scoreTresholdNode = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::f32,
ov::Shape{},
std::vector<float>{scoreThr});
const auto nmsNode = std::make_shared<ov::op::v13::NMSRotated>(params[0],
params[1],
maxOutputBoxesPerClassNode,
iouThresholdNode,
scoreTresholdNode,
sortResDescend,
outType,
clockwise);
function = std::make_shared<ov::Model>(nmsNode, params, "NMS");
}
} // namespace LayerTestsDefinitions