[CPU] NMSRotated operation implementation. (#20410)

This commit is contained in:
Nikolay Shchegolev 2023-10-31 16:10:52 +04:00 committed by GitHub
parent 3077bad26f
commit 57571d36e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1906 additions and 1065 deletions

View File

@ -27,13 +27,13 @@ The general algorithm is described below:
Here ``func(rotated_iou(b_i, b)) = 1 if rotated_iou(b_i, b) <= iou_threshold else 0``.
Having two bouding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``:
Having two bounding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``:
1. Calculate rotated vertices, (x, y) coordinates of the 4 corners of each box transformed by the corresponding angle in radians according to the direction specified by the *clockwise* attribute.
2. Find all intersection points between edges of ``B1`` and ``B2``. Add them to the ``intersection_points``.
3. Find all corners of ``B1`` within area of ``B2``, and all corners of ``B2`` within area of ``B1``. Add them to the ``intersection_points``.
4. Calculate ``intersection_area`` of the polygon described by ``intersection_points`` (see Sholeace formula).
5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = (B1_area + B2_area) - intersection_area`.
5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = B1_area + B2_area`.
6. Return intersection over union ``rotated_iou = intersection_area / (union_area - intersection_area)``.

View File

@ -49,6 +49,7 @@ bool fuse_type_to_nms3(const std::shared_ptr<ov::Node>& node, const precisions_m
bool fuse_type_to_nms4(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_nms5(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_nms9(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_nms_rotated(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_matrix_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_multiclass_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
bool fuse_type_to_generate_proposals(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
@ -383,6 +384,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
{opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4},
{opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5},
{opset9::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms9},
{op::v13::NMSRotated::get_type_info_static(), fuse_type_to_nms_rotated},
{opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms},
{opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
{opset9::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
@ -691,6 +693,51 @@ bool fuse_type_to_nms9(const std::shared_ptr<ov::Node>& node, const precisions_m
return res;
}
bool fuse_type_to_nms_rotated(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions) {
auto nms = ov::as_type_ptr<op::v13::NMSRotated>(node);
if (!nms) {
return false;
}
bool res = false;
auto it = precisions.find(node->get_output_element_type(0));
if (it != precisions.end()) {
const auto& to = it->second;
if (to == ov::element::i32 || to == ov::element::i64) {
nms->set_output_type_attr(to);
res = true;
if (precisions.count(node->get_output_element_type(1)) == 0) {
return res;
}
}
}
auto type_relaxed = std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(node);
ov::element::TypeVector output_types;
for (size_t i = 0; i < node->get_output_size(); i++) {
it = precisions.find(node->get_output_element_type(i));
if (it == precisions.end()) {
output_types.push_back(node->get_output_element_type(i));
continue;
}
const auto& to = it->second;
if (type_relaxed) {
type_relaxed->set_overridden_output_type(to, i);
res = true;
}
output_types.push_back(to);
}
if (!type_relaxed) {
auto relaxed_op =
std::make_shared<ov::op::TypeRelaxed<op::v13::NMSRotated>>(*nms, ov::element::TypeVector{}, output_types);
replace_node(node, relaxed_op);
res = true;
}
return res;
}
namespace {
bool update_type(size_t idx,

View File

@ -201,6 +201,7 @@ static const TypeToNameMap& get_type_to_name_tbl() {
{ "ExtractImagePatches", Type::ExtractImagePatches},
{ "NonMaxSuppression", Type::NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", Type::NonMaxSuppression},
{ "NMSRotated", Type::NonMaxSuppression},
{ "MatrixNms", Type::MatrixNms},
{ "MulticlassNms", Type::MulticlassNms},
{ "MulticlassNmsIEInternal", Type::MulticlassNms},

View File

@ -615,26 +615,31 @@ bool Node::outputShapeDataDependency() const {
void Node::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
if (newOutputShapes.size() != outputShapes.size()) {
IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName();
THROW_CPU_NODE_ERR("has shapes number mismatch with real outputs number.");
}
for (size_t i = 0; i < outputShapes.size(); i++) {
const auto edges = getChildEdgesAtPort(i);
for (size_t i = 0lu; i < outputShapes.size(); i++) {
redefineOutputMemory(i, newOutputShapes[i]);
}
}
void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_shape) {
const auto edges = getChildEdgesAtPort(port);
// avoid 0D shape incompatible
auto newOutputShape = newOutputShapes[i];
if (newOutputShape.empty()) {
newOutputShape.push_back(1);
auto new_shape = new_output_shape;
if (new_shape.empty()) {
new_shape.push_back(1);
}
const auto &currDesc = edges[0]->getMemory().getDesc();
if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape)
continue;
const bool hasZeroDims = std::count(std::begin(newOutputShape), std::end(newOutputShape), 0) > 0;
const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShape, hasZeroDims);
for (size_t j = 0; j < edges.size(); j++) {
edges[j]->getMemoryPtr()->redefineDesc(memDesc);
const auto& curr_desc = edges[0]->getMemory().getDesc();
if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) {
return;
}
const bool has_zero_dims = std::count(std::begin(new_shape), std::end(new_shape), 0lu) > 0;
const auto mem_desc = getBaseMemDescAtOutputPort(port)->cloneWithNewDims(new_shape, has_zero_dims);
for (size_t j = 0lu; j < edges.size(); j++) {
edges[j]->getMemoryPtr()->redefineDesc(mem_desc);
}
}

View File

@ -366,6 +366,7 @@ public:
void updateDynamicParams();
void executeDynamic(dnnl::stream strm);
virtual void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
void redefineOutputMemory(const size_t port, const VectorDims& new_output_shape);
bool outputShapeDataDependency() const;
virtual void initSupportedPrimitiveDescriptors();

View File

@ -0,0 +1,465 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "non_max_suppression.hpp"
#include "utils/general_utils.h"
using namespace InferenceEngine;
using namespace dnnl::impl::cpu;
#define GET_OFF(field) offsetof(NmsCallArgs, field)
namespace ov {
namespace intel_cpu {
namespace kernel {
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::generate() {
load_vector_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, vector_step));
load_scalar_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, scalar_step));
exp_injector.reset(new x64::jit_uni_eltwise_injector_f32<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
this->preamble();
uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
load_pool_gpr_idxs = {static_cast<size_t>(reg_load_store_mask.getIdx()), static_cast<size_t>(reg_load_table.getIdx())};
store_pool_gpr_idxs = {static_cast<size_t>(reg_load_store_mask.getIdx())};
store_pool_vec_idxs = {static_cast<size_t>(vmm_zero.getIdx())};
mov(reg_boxes_coord0, ptr[reg_params + GET_OFF(selected_boxes_coord[0])]);
mov(reg_boxes_coord1, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 1 * sizeof(size_t)]);
mov(reg_boxes_coord2, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 2 * sizeof(size_t)]);
mov(reg_boxes_coord3, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 3 * sizeof(size_t)]);
mov(reg_candidate_box, ptr[reg_params + GET_OFF(candidate_box)]);
mov(reg_candidate_status, ptr[reg_params + GET_OFF(candidate_status)]);
mov(reg_boxes_num, ptr[reg_params + GET_OFF(selected_boxes_num)]);
mov(reg_iou_threshold, ptr[reg_params + GET_OFF(iou_threshold)]);
// soft
mov(reg_score_threshold, ptr[reg_params + GET_OFF(score_threshold)]);
mov(reg_score, ptr[reg_params + GET_OFF(score)]);
mov(reg_scale, ptr[reg_params + GET_OFF(scale)]);
// could use rcx(reg_table) and rdi(reg_temp) now as abi parse finished
mov(reg_table, l_table_constant);
if (x64::mayiuse(x64::avx512_core)) {
kmovw(k_mask_one, word[reg_table + vlen]);
}
uni_vbroadcastss(vmm_iou_threshold, ptr[reg_iou_threshold]);
uni_vbroadcastss(vmm_score_threshold, ptr[reg_score_threshold]);
uni_vbroadcastss(vmm_candidate_coord0, ptr[reg_candidate_box]);
uni_vbroadcastss(vmm_candidate_coord1, ptr[reg_candidate_box + 1 * sizeof(float)]);
uni_vbroadcastss(vmm_candidate_coord2, ptr[reg_candidate_box + 2 * sizeof(float)]);
uni_vbroadcastss(vmm_candidate_coord3, ptr[reg_candidate_box + 3 * sizeof(float)]);
if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) {
// box format: y1, x1, y2, x2
uni_vminps(vmm_temp1, vmm_candidate_coord0, vmm_candidate_coord2);
uni_vmaxps(vmm_temp2, vmm_candidate_coord0, vmm_candidate_coord2);
uni_vmovups(vmm_candidate_coord0, vmm_temp1);
uni_vmovups(vmm_candidate_coord2, vmm_temp2);
uni_vminps(vmm_temp1, vmm_candidate_coord1, vmm_candidate_coord3);
uni_vmaxps(vmm_temp2, vmm_candidate_coord1, vmm_candidate_coord3);
uni_vmovups(vmm_candidate_coord1, vmm_temp1);
uni_vmovups(vmm_candidate_coord3, vmm_temp2);
} else {
// box format: x_center, y_center, width, height --> y1, x1, y2, x2
uni_vmulps(vmm_temp1, vmm_candidate_coord2, ptr[reg_table]); // width/2
uni_vmulps(vmm_temp2, vmm_candidate_coord3, ptr[reg_table]); // height/2
uni_vaddps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center + width/2
uni_vmovups(vmm_candidate_coord3, vmm_temp3);
uni_vaddps(vmm_temp3, vmm_candidate_coord1, vmm_temp2); // y_center + height/2
uni_vmovups(vmm_candidate_coord2, vmm_temp3);
uni_vsubps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center - width/2
uni_vsubps(vmm_temp4, vmm_candidate_coord1, vmm_temp2); // y_center - height/2
uni_vmovups(vmm_candidate_coord1, vmm_temp3);
uni_vmovups(vmm_candidate_coord0, vmm_temp4);
}
// check from last to first
imul(reg_temp_64, reg_boxes_num, sizeof(float));
add(reg_boxes_coord0, reg_temp_64); // y1
add(reg_boxes_coord1, reg_temp_64); // x1
add(reg_boxes_coord2, reg_temp_64); // y2
add(reg_boxes_coord3, reg_temp_64); // x2
Xbyak::Label hard_nms_label;
Xbyak::Label nms_end_label;
mov(reg_temp_32, ptr[reg_scale]);
test(reg_temp_32, reg_temp_32);
jz(hard_nms_label, T_NEAR);
soft_nms();
jmp(nms_end_label, T_NEAR);
L(hard_nms_label);
hard_nms();
L(nms_end_label);
this->postamble();
load_vector_emitter->emit_data();
load_scalar_emitter->emit_data();
prepare_table();
exp_injector->prepare_table();
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::hard_nms() {
Xbyak::Label main_loop_label_hard;
Xbyak::Label main_loop_end_label_hard;
Xbyak::Label tail_loop_label_hard;
Xbyak::Label terminate_label_hard;
L(main_loop_label_hard);
{
cmp(reg_boxes_num, vector_step);
jl(main_loop_end_label_hard, T_NEAR);
sub(reg_boxes_coord0, vector_step * sizeof(float));
sub(reg_boxes_coord1, vector_step * sizeof(float));
sub(reg_boxes_coord2, vector_step * sizeof(float));
sub(reg_boxes_coord3, vector_step * sizeof(float));
// iou result is in vmm_temp3
iou(vector_step);
sub(reg_boxes_num, vector_step);
suppressed_by_iou(false);
// if zero continue, else set result to suppressed and terminate
jz(main_loop_label_hard, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label_hard, T_NEAR);
}
L(main_loop_end_label_hard);
L(tail_loop_label_hard);
{
cmp(reg_boxes_num, 1);
jl(terminate_label_hard, T_NEAR);
sub(reg_boxes_coord0, scalar_step * sizeof(float));
sub(reg_boxes_coord1, scalar_step * sizeof(float));
sub(reg_boxes_coord2, scalar_step * sizeof(float));
sub(reg_boxes_coord3, scalar_step * sizeof(float));
// iou result is in vmm_temp3
iou(scalar_step);
sub(reg_boxes_num, scalar_step);
suppressed_by_iou(true);
jz(tail_loop_label_hard, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label_hard, T_NEAR);
}
L(terminate_label_hard);
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::soft_nms() {
uni_vbroadcastss(vmm_scale, ptr[reg_scale]);
Xbyak::Label main_loop_label;
Xbyak::Label main_loop_end_label;
Xbyak::Label tail_loop_label;
Xbyak::Label terminate_label;
Xbyak::Label main_loop_label_soft;
Xbyak::Label tail_loop_label_soft;
L(main_loop_label);
{
cmp(reg_boxes_num, vector_step);
jl(main_loop_end_label, T_NEAR);
sub(reg_boxes_coord0, vector_step * sizeof(float));
sub(reg_boxes_coord1, vector_step * sizeof(float));
sub(reg_boxes_coord2, vector_step * sizeof(float));
sub(reg_boxes_coord3, vector_step * sizeof(float));
// result(iou and weight) is in vmm_temp3
iou(vector_step);
sub(reg_boxes_num, vector_step);
// soft suppressed by iou_threshold
if (m_jcp.is_soft_suppressed_by_iou) {
suppressed_by_iou(false);
// if zero continue soft suppression, else set result to suppressed and terminate
jz(main_loop_label_soft, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label, T_NEAR);
L(main_loop_label_soft);
}
// weight: std::exp(scale * iou * iou)
soft_coeff();
// vector weights multiply
horizontal_mul();
uni_vbroadcastss(vmm_temp1, ptr[reg_score]);
// new score in vmm3[0]
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1);
// store new score
uni_vmovss(ptr[reg_score], vmm_temp3);
// cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold
suppressed_by_score();
jz(main_loop_label, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label, T_NEAR);
}
L(main_loop_end_label);
L(tail_loop_label);
{
cmp(reg_boxes_num, 1);
jl(terminate_label, T_NEAR);
sub(reg_boxes_coord0, scalar_step * sizeof(float));
sub(reg_boxes_coord1, scalar_step * sizeof(float));
sub(reg_boxes_coord2, scalar_step * sizeof(float));
sub(reg_boxes_coord3, scalar_step * sizeof(float));
iou(scalar_step);
sub(reg_boxes_num, scalar_step);
// soft suppressed by iou_threshold
if (m_jcp.is_soft_suppressed_by_iou) {
suppressed_by_iou(true);
jz(tail_loop_label_soft, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label, T_NEAR);
L(tail_loop_label_soft);
}
soft_coeff();
uni_vbroadcastss(vmm_temp1, ptr[reg_score]);
// vmm3[0] is valide, no need horizontal mul.
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1);
uni_vmovss(ptr[reg_score], vmm_temp3);
// cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold
suppressed_by_score();
jz(tail_loop_label, T_NEAR);
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
jmp(terminate_label, T_NEAR);
}
L(terminate_label);
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::suppressed_by_iou(bool is_scalar) {
if (x64::mayiuse(x64::avx512_core)) {
vcmpps(k_mask, vmm_temp3, vmm_iou_threshold, 0x0D); // _CMP_GE_OS. vcmpps w/ kmask only on V5
if (is_scalar)
kandw(k_mask, k_mask, k_mask_one);
kortestw(k_mask, k_mask); // bitwise check if all zero
} else if (x64::mayiuse(x64::avx)) {
// vex instructions with xmm on avx and ymm on avx2
vcmpps(vmm_temp4, vmm_temp3, vmm_iou_threshold, 0x0D); // xmm and ymm only on V1.
if (is_scalar) {
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
test(reg_temp_32, reg_temp_32);
} else {
uni_vtestps(vmm_temp4, vmm_temp4); // vtestps: sign bit check if all zeros, ymm and xmm only on V1, N/A on V5
}
} else {
// pure sse path, make sure don't spoil vmm_temp3, which may used in after soft-suppression
uni_vmovups(vmm_temp4, vmm_temp3);
cmpps(vmm_temp4, vmm_iou_threshold, 0x07); // order compare, 0 for at least one is NaN
uni_vmovups(vmm_temp2, vmm_temp3);
cmpps(vmm_temp2, vmm_iou_threshold, 0x05); // _CMP_GE_US on sse, no direct _CMP_GE_OS supported.
uni_vandps(vmm_temp4, vmm_temp4, vmm_temp2);
if (is_scalar) {
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
test(reg_temp_32, reg_temp_32);
} else {
uni_vtestps(vmm_temp4, vmm_temp4); // ptest: bitwise check if all zeros, on sse41
}
}
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::suppressed_by_score() {
if (x64::mayiuse(x64::avx512_core)) {
vcmpps(k_mask, vmm_temp3, vmm_score_threshold, 0x02); // vcmpps w/ kmask only on V5, w/o kmask version N/A on V5
kandw(k_mask, k_mask, k_mask_one);
kortestw(k_mask, k_mask); // bitwise check if all zero
} else if (x64::mayiuse(x64::avx)) {
vcmpps(vmm_temp4, vmm_temp3, vmm_score_threshold, 0x02);
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
test(reg_temp_32, reg_temp_32);
} else {
cmpps(vmm_temp3, vmm_score_threshold, 0x02); // _CMP_LE_OS on sse
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp3.getIdx()), 0);
test(reg_temp_32, reg_temp_32);
}
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::iou(int ele_num) {
auto load = [&](Xbyak::Reg64 reg_src, Vmm vmm_dst) {
if (ele_num != scalar_step && ele_num != vector_step)
OPENVINO_THROW("NMS JIT implementation supports load emitter with only element count scalar_step or vector_step! Get: ", ele_num);
const auto& load_emitter = ele_num == 1 ? load_scalar_emitter : load_vector_emitter;
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_dst.getIdx())},
{}, {load_pool_gpr_idxs});
};
load(reg_boxes_coord0, vmm_boxes_coord0);
load(reg_boxes_coord1, vmm_boxes_coord1);
load(reg_boxes_coord2, vmm_boxes_coord2);
load(reg_boxes_coord3, vmm_boxes_coord3);
if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) {
// box format: y1, x1, y2, x2
uni_vminps(vmm_temp1, vmm_boxes_coord0, vmm_boxes_coord2);
uni_vmaxps(vmm_temp2, vmm_boxes_coord0, vmm_boxes_coord2);
uni_vmovups(vmm_boxes_coord0, vmm_temp1);
uni_vmovups(vmm_boxes_coord2, vmm_temp2);
uni_vminps(vmm_temp1, vmm_boxes_coord1, vmm_boxes_coord3);
uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_boxes_coord3);
uni_vmovups(vmm_boxes_coord1, vmm_temp1);
uni_vmovups(vmm_boxes_coord3, vmm_temp2);
} else {
// box format: x_center, y_center, width, height --> y1, x1, y2, x2
uni_vmulps(vmm_temp1, vmm_boxes_coord2, ptr[reg_table]); // width/2
uni_vmulps(vmm_temp2, vmm_boxes_coord3, ptr[reg_table]); // height/2
uni_vaddps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center + width/2
uni_vmovups(vmm_boxes_coord3, vmm_temp3);
uni_vaddps(vmm_temp3, vmm_boxes_coord1, vmm_temp2); // y_center + height/2
uni_vmovups(vmm_boxes_coord2, vmm_temp3);
uni_vsubps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center - width/2
uni_vsubps(vmm_temp4, vmm_boxes_coord1, vmm_temp2); // y_center - height/2
uni_vmovups(vmm_boxes_coord1, vmm_temp3);
uni_vmovups(vmm_boxes_coord0, vmm_temp4);
}
uni_vsubps(vmm_temp1, vmm_boxes_coord2, vmm_boxes_coord0);
uni_vsubps(vmm_temp2, vmm_boxes_coord3, vmm_boxes_coord1);
uni_vmulps(vmm_temp1, vmm_temp1, vmm_temp2); // boxes area
uni_vsubps(vmm_temp2, vmm_candidate_coord2, vmm_candidate_coord0);
uni_vsubps(vmm_temp3, vmm_candidate_coord3, vmm_candidate_coord1);
uni_vmulps(vmm_temp2, vmm_temp2, vmm_temp3); // candidate(bc) area // candidate area calculate once and check if 0
uni_vaddps(vmm_temp1, vmm_temp1, vmm_temp2); // areaI + areaJ to free vmm_temp2
// y of intersection
uni_vminps(vmm_temp3, vmm_boxes_coord2, vmm_candidate_coord2); // min(Ymax)
uni_vmaxps(vmm_temp4, vmm_boxes_coord0, vmm_candidate_coord0); // max(Ymin)
uni_vsubps(vmm_temp3, vmm_temp3, vmm_temp4); // min(Ymax) - max(Ymin)
uni_vmaxps(vmm_temp3, vmm_temp3, vmm_zero);
// x of intersection
uni_vminps(vmm_temp4, vmm_boxes_coord3, vmm_candidate_coord3); // min(Xmax)
uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_candidate_coord1); // max(Xmin)
uni_vsubps(vmm_temp4, vmm_temp4, vmm_temp2); // min(Xmax) - max(Xmin)
uni_vmaxps(vmm_temp4, vmm_temp4, vmm_zero);
// intersection_area
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp4);
// iou: intersection_area / (areaI + areaJ - intersection_area);
uni_vsubps(vmm_temp1, vmm_temp1, vmm_temp3);
uni_vdivps(vmm_temp3, vmm_temp3, vmm_temp1);
}
// std::exp(scale * iou * iou)
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::soft_coeff() {
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp3);
uni_vmulps(vmm_temp3, vmm_temp3, vmm_scale);
exp_injector->compute_vector_range(vmm_temp3.getIdx(), vmm_temp3.getIdx() + 1);
}
template <x64::cpu_isa_t isa>
void NonMaxSuppression<isa>::horizontal_mul_xmm(const Xbyak::Xmm &xmm_weight, const Xbyak::Xmm &xmm_aux) {
uni_vmovshdup(xmm_aux, xmm_weight); // weight:1,2,3,4; aux:2,2,4,4
uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2,2*2,3*4,4*4
uni_vmovhlps(xmm_aux, xmm_aux, xmm_weight); // aux:3*4,4*4,4,4
uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2*3*4,...
}
// horizontal mul for vmm_weight(Vmm(3)), temp1 and temp2 as aux
template <x64::cpu_isa_t isa>
inline void NonMaxSuppression<isa>::horizontal_mul() {
Xbyak::Xmm xmm_weight = Xbyak::Xmm(vmm_temp3.getIdx());
Xbyak::Xmm xmm_temp1 = Xbyak::Xmm(vmm_temp1.getIdx());
Xbyak::Xmm xmm_temp2 = Xbyak::Xmm(vmm_temp2.getIdx());
if (isa == x64::sse41) {
horizontal_mul_xmm(xmm_weight, xmm_temp1);
} else if (isa == x64::avx2) {
Xbyak::Ymm ymm_weight = Xbyak::Ymm(vmm_temp3.getIdx());
vextractf128(xmm_temp1, ymm_weight, 0);
vextractf128(xmm_temp2, ymm_weight, 1);
uni_vmulps(xmm_weight, xmm_temp1, xmm_temp2);
horizontal_mul_xmm(xmm_weight, xmm_temp1);
} else {
Xbyak::Zmm zmm_weight = Xbyak::Zmm(vmm_temp3.getIdx());
vextractf32x4(xmm_temp1, zmm_weight, 0);
vextractf32x4(xmm_temp2, zmm_weight, 1);
uni_vmulps(xmm_temp1, xmm_temp1, xmm_temp2);
vextractf32x4(xmm_temp2, zmm_weight, 2);
vextractf32x4(xmm_weight, zmm_weight, 3);
uni_vmulps(xmm_weight, xmm_weight, xmm_temp2);
uni_vmulps(xmm_weight, xmm_weight, xmm_temp1);
horizontal_mul_xmm(xmm_weight, xmm_temp1);
}
}
template class NonMaxSuppression<x64::avx512_core>;
template class NonMaxSuppression<x64::avx2>;
template class NonMaxSuppression<x64::sse41>;
} // namespace kernel
} // namespace intel_cpu
} // namespace ov

View File

@ -0,0 +1,152 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "jit_kernel_base.hpp"
#if defined(OPENVINO_ARCH_X86_64)
#include "emitters/x64/jit_load_store_emitters.hpp"
#include "cpu/x64/injectors/jit_uni_eltwise_injector.hpp"
#endif // OPENVINO_ARCH_X86_64
namespace ov {
namespace intel_cpu {
enum class NMSBoxEncodeType {
CORNER,
CENTER
};
#if defined(OPENVINO_ARCH_X86_64)
namespace kernel {
struct NmsCompileParams {
NMSBoxEncodeType box_encode_type;
bool is_soft_suppressed_by_iou;
};
struct NmsCallArgs {
const void* selected_boxes_coord[4];
size_t selected_boxes_num;
const void* candidate_box;
const void* iou_threshold;
void* candidate_status;
// for soft suppression, score *= scale * iou * iou;
const void* score_threshold;
const void* scale;
void* score;
};
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
class NonMaxSuppression : public JitKernel<NmsCompileParams, NmsCallArgs> {
public:
DECLARE_CPU_JIT_AUX_FUNCTIONS(NonMaxSuppression)
explicit NonMaxSuppression(const NmsCompileParams& jcp) : JitKernel(jit_name(), jcp, isa) {}
void generate() override;
private:
using Vmm = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::avx512_core, Xbyak::Zmm,
isa == dnnl::impl::cpu::x64::avx2, Xbyak::Ymm,
Xbyak::Xmm>::type;
uint32_t vlen = dnnl::impl::cpu::x64::cpu_isa_traits<isa>::vlen;
const int vector_step = vlen / sizeof(float);
const int scalar_step = 1;
Xbyak::Reg64 reg_boxes_coord0 = r8;
Xbyak::Reg64 reg_boxes_coord1 = r9;
Xbyak::Reg64 reg_boxes_coord2 = r10;
Xbyak::Reg64 reg_boxes_coord3 = r11;
Xbyak::Reg64 reg_candidate_box = r12;
Xbyak::Reg64 reg_candidate_status = r13;
Xbyak::Reg64 reg_boxes_num = r14;
Xbyak::Reg64 reg_iou_threshold = r15;
// more for soft
Xbyak::Reg64 reg_score_threshold = rdx;
Xbyak::Reg64 reg_score = rbp;
Xbyak::Reg64 reg_scale = rsi;
Xbyak::Reg64 reg_load_table = rax;
Xbyak::Reg64 reg_load_store_mask = rbx;
// reuse
Xbyak::Label l_table_constant;
Xbyak::Reg64 reg_table = rcx;
Xbyak::Reg64 reg_temp_64 = rdi;
Xbyak::Reg32 reg_temp_32 = edi;
const Xbyak::Reg64 reg_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]);
std::unique_ptr<jit_load_emitter> load_vector_emitter = nullptr;
std::unique_ptr<jit_load_emitter> load_scalar_emitter = nullptr;
std::vector<size_t> store_pool_gpr_idxs;
std::vector<size_t> store_pool_vec_idxs;
std::vector<size_t> load_pool_gpr_idxs;
Vmm vmm_boxes_coord0 = Vmm(1);
Vmm vmm_boxes_coord1 = Vmm(2);
Vmm vmm_boxes_coord2 = Vmm(3);
Vmm vmm_boxes_coord3 = Vmm(4);
Vmm vmm_candidate_coord0 = Vmm(5);
Vmm vmm_candidate_coord1 = Vmm(6);
Vmm vmm_candidate_coord2 = Vmm(7);
Vmm vmm_candidate_coord3 = Vmm(8);
Vmm vmm_temp1 = Vmm(9);
Vmm vmm_temp2 = Vmm(10);
Vmm vmm_temp3 = Vmm(11);
Vmm vmm_temp4 = Vmm(12);
Vmm vmm_iou_threshold = Vmm(13);
Vmm vmm_zero = Vmm(15);
// soft
Vmm vmm_score_threshold = Vmm(14);
Vmm vmm_scale = Vmm(0);
Xbyak::Opmask k_mask = Xbyak::Opmask(7);
Xbyak::Opmask k_mask_one = Xbyak::Opmask(6);
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>> exp_injector;
inline void hard_nms();
inline void soft_nms();
inline void suppressed_by_iou(bool is_scalar);
inline void suppressed_by_score();
inline void iou(int ele_num);
inline void soft_coeff();
inline void horizontal_mul_xmm(const Xbyak::Xmm& xmm_weight, const Xbyak::Xmm& xmm_aux);
inline void horizontal_mul();
inline void prepare_table() {
auto broadcast_d = [&](int val) {
for (size_t d = 0; d < vlen / sizeof(int); ++d) {
dd(val);
}
};
align(64);
L(l_table_constant);
broadcast_d(0x3f000000); // 0.5f
dw(0x0001);
}
};
} // namespace kernel
#endif // OPENVINO_ARCH_X86_64
} // namespace intel_cpu
} // namespace ov

File diff suppressed because it is too large Load Diff

View File

@ -4,82 +4,43 @@
#pragma once
#include <ie_common.h>
#include <node.h>
#include <string>
#include <memory>
#include <vector>
#include "node.h"
#include "kernels/x64/non_max_suppression.hpp"
#define BOX_COORD_NUM 4
using namespace InferenceEngine;
namespace ov {
namespace intel_cpu {
namespace node {
enum class NMSBoxEncodeType {
CORNER,
CENTER
};
enum NMSCandidateStatus {
SUPPRESSED = 0,
SELECTED = 1,
UPDATED = 2
};
struct jit_nms_config_params {
NMSBoxEncodeType box_encode_type;
bool is_soft_suppressed_by_iou;
};
struct jit_nms_args {
const void* selected_boxes_coord[BOX_COORD_NUM];
size_t selected_boxes_num;
const void* candidate_box;
const void* iou_threshold;
void* candidate_status;
// for soft suppression, score *= scale * iou * iou;
const void* score_threshold;
const void* scale;
void* score;
};
struct jit_uni_nms_kernel {
void (*ker_)(const jit_nms_args *);
void operator()(const jit_nms_args *args) {
assert(ker_);
ker_(args);
}
explicit jit_uni_nms_kernel(jit_nms_config_params jcp_) : ker_(nullptr), jcp(jcp_) {}
virtual ~jit_uni_nms_kernel() {}
virtual void create_ker() = 0;
jit_nms_config_params jcp;
};
class NonMaxSuppression : public Node {
public:
NonMaxSuppression(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
NonMaxSuppression(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void execute(dnnl::stream strm) override;
void executeDynamicImpl(dnnl::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
struct filteredBoxes {
struct FilteredBox {
float score;
int batch_index;
int class_index;
int box_index;
filteredBoxes() = default;
filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) :
FilteredBox() = default;
FilteredBox(float _score, int _batch_index, int _class_index, int _box_index) :
score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {}
};
@ -89,66 +50,101 @@ public:
int suppress_begin_index;
};
float intersectionOverUnion(const float *boxesI, const float *boxesJ);
void nmsWithSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides,
const SizeVector &scoresStrides, std::vector<filteredBoxes> &filtBoxes);
void nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides,
const SizeVector &scoresStrides, std::vector<filteredBoxes> &filtBoxes);
void executeDynamicImpl(dnnl::stream strm) override;
bool isExecutable() const override;
bool needShapeInfer() const override { return false; }
void prepareParams() override;
struct Point2D {
float x, y;
Point2D(const float px = 0.f, const float py = 0.f) : x(px), y(py) {}
Point2D operator+(const Point2D& p) const {
return Point2D(x + p.x, y + p.y);
}
Point2D& operator+=(const Point2D& p) {
x += p.x;
y += p.y;
return *this;
}
Point2D operator-(const Point2D& p) const {
return Point2D(x - p.x, y - p.y);
}
Point2D operator*(const float coeff) const {
return Point2D(x * coeff, y * coeff);
}
};
private:
// input
enum {
NMS_BOXES,
NMS_SCORES,
NMS_MAXOUTPUTBOXESPERCLASS,
NMS_IOUTHRESHOLD,
NMS_SCORETHRESHOLD,
NMS_SOFTNMSSIGMA,
NMS_MAX_OUTPUT_BOXES_PER_CLASS,
NMS_IOU_THRESHOLD,
NMS_SCORE_THRESHOLD,
NMS_SOFT_NMS_SIGMA,
};
// output
enum {
NMS_SELECTEDINDICES,
NMS_SELECTEDSCORES,
NMS_VALIDOUTPUTS
NMS_SELECTED_INDICES,
NMS_SELECTED_SCORES,
NMS_VALID_OUTPUTS
};
NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER;
bool sortResultDescending = true;
float intersectionOverUnion(const float *boxesI, const float *boxesJ);
size_t numBatches = 0;
size_t numBoxes = 0;
size_t numClasses = 0;
float rotatedIntersectionOverUnion(const Point2D (&vertices_0)[4], const float area_0, const float* box_1);
size_t maxOutputBoxesPerClass = 0lu;
float iouThreshold = 0.0f;
float scoreThreshold = 0.0f;
float softNMSSigma = 0.0f;
float scale = 1.f;
// control placeholder for NMS in new opset.
bool isSoftSuppressedByIOU = false;
void nmsWithSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
bool m_outStaticShape = false;
void nmsWithoutSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
std::string errorPrefix;
void nmsRotated(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
std::vector<std::vector<size_t>> numFiltBox;
const std::string inType = "input", outType = "output";
void check1DInput(const Shape& shape,
const std::string& name,
const size_t port);
void checkPrecision(const Precision& prec, const std::vector<Precision>& precList, const std::string& name, const std::string& type);
void check1DInput(const Shape& shape, const std::vector<Precision>& precList, const std::string& name, const size_t port);
void checkOutput(const Shape& shape, const std::vector<Precision>& precList, const std::string& name, const size_t port);
void checkOutput(const Shape& shape,
const std::string& name,
const size_t port);
void createJitKernel();
std::shared_ptr<jit_uni_nms_kernel> nms_kernel = nullptr;
NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER;
bool m_sort_result_descending = true;
bool m_clockwise = false;
bool m_rotated_boxes = false;
size_t m_coord_num = 1lu;
size_t m_batches_num = 0lu;
size_t m_boxes_num = 0lu;
size_t m_classes_num = 0lu;
size_t m_max_output_boxes_per_class = 0lu; // Original value of input NMS_MAX_OUTPUT_BOXES_PER_CLASS
size_t m_output_boxes_per_class = 0lu; // Actual number of output boxes
float m_iou_threshold = 0.f;
float m_score_threshold = 0.f;
float m_soft_nms_sigma = 0.f;
float m_scale = 0.f;
// control placeholder for NMS in new opset.
bool m_is_soft_suppressed_by_iou = false;
bool m_out_static_shape = false;
std::vector<std::vector<size_t>> m_num_filtered_boxes;
const std::string inType = "input";
const std::string outType = "output";
bool m_defined_outputs[NMS_VALID_OUTPUTS + 1] = { false, false, false };
std::vector<FilteredBox> m_filtered_boxes;
std::shared_ptr<kernel::JitKernelBase> m_jit_kernel;
};
} // namespace node

View File

@ -197,6 +197,8 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*RDFTLayerTest.*SignalSize=().*)",
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)",
// Issue: 122356
R"(.*NmsRotatedOpTest.*(SortDesc=True|Clockwise=False).*)",
};
#if defined(OPENVINO_ARCH_X86)

View File

@ -0,0 +1,95 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_op_tests/nms_rotated.hpp"
using namespace LayerTestsDefinitions;
using namespace ov::test;
static const std::vector<std::vector<InputShape>> input_shapes = {
{
{ {}, {{1, 5, 5}} },
{ {}, {{1, 7, 5}} }
},
{
{ {}, {{2, 9, 5}} },
{ {}, {{2, 15, 9}} }
},
{
{ {}, {{5, 17, 5}} },
{ {}, {{5, 7, 17}} }
},
{
{ {}, {{9, 75, 5}} },
{ {}, {{9, 55, 75}} }
},
{
{ {-1, -1, 5}, {{5, 20, 5}, {3, 50, 5}, {2, 99, 5}} },
{ {-1, -1, -1}, {{5, 30, 20}, {3, 100, 50}, {2, 133, 99}} }
}
};
static const std::vector<std::vector<InputShape>> input_shapes_nightly = {
{
{ {}, {{3, 11, 5}} },
{ {}, {{3, 15, 11}} }
},
{
{ {}, {{15, 29, 5}} },
{ {}, {{15, 31, 29}} }
},
{
{ {}, {{21, 64, 5}} },
{ {}, {{21, 32, 64}} }
},
{
{ {-1, -1, 5}, {{7, 35, 5}, {7, 35, 5}, {7, 35, 5}} },
{ {-1, -1, -1}, {{7, 30, 35}, {7, 100, 35}, {7, 133, 35}} }
}
};
const ov::AnyMap empty_plugin_config{};
INSTANTIATE_TEST_SUITE_P(smoke_, NmsRotatedOpTest,
::testing::Combine(
::testing::ValuesIn(input_shapes), // Input shapes
::testing::Values(ElementType::f32), // Boxes and scores input precisions
::testing::Values(ElementType::i32), // Max output boxes input precisions
::testing::Values(ElementType::f32), // Thresholds precisions
::testing::Values(ElementType::i32), // Output type
::testing::Values(5, 20), // Max output boxes per class
::testing::Values(0.3f, 0.7f), // IOU threshold
::testing::Values(0.3f, 0.7f), // Score threshold
::testing::Values(true, false), // Sort result descending
::testing::Values(true, false), // Clockwise
::testing::Values(false), // Is 1st input constant
::testing::Values(false), // Is 2nd input constant
::testing::Values(false), // Is 3rd input constant
::testing::Values(false), // Is 4th input constant
::testing::Values(false), // Is 5th input constant
::testing::Values(empty_plugin_config), // Additional plugin configuration
::testing::Values(utils::DEVICE_CPU)), // Device name
NmsRotatedOpTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(nightly_, NmsRotatedOpTest,
::testing::Combine(
::testing::ValuesIn(input_shapes_nightly),
::testing::Values(ElementType::f16, ElementType::bf16),
::testing::Values(ElementType::i64),
::testing::Values(ElementType::f16, ElementType::bf16),
::testing::Values(ElementType::i64),
::testing::Values(10),
::testing::Values(0.5f),
::testing::Values(0.4f),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(true, false),
::testing::Values(empty_plugin_config),
::testing::Values(utils::DEVICE_CPU)),
NmsRotatedOpTest::getTestCaseName);

View File

@ -43,9 +43,9 @@ using NmsParams = std::tuple<InputShapeParams,
int32_t, // Max output boxes per class
ThresholdValues, // IOU, Score, Soft NMS sigma
ngraph::helpers::InputLayerType, // max_output_boxes_per_class input type
ngraph::op::v9::NonMaxSuppression::BoxEncodingType, // Box encoding
ov::op::v9::NonMaxSuppression::BoxEncodingType, // Box encoding
bool, // Sort result descending
ngraph::element::Type, // Output type
ElementType, // Output type
std::string>; // Device name
class NmsLayerCPUTest : public testing::WithParamInterface<NmsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
@ -57,9 +57,9 @@ public:
ngraph::helpers::InputLayerType maxOutBoxesType;
ThresholdValues thrValues;
float iouThr, scoreThr, softNmsSigma;
op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
bool sortResDescend;
element::Type outType;
ElementType outType;
std::string targetDevice;
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType, targetDevice) = obj.param;
@ -115,12 +115,12 @@ protected:
ThresholdValues thrValues;
ngraph::helpers::InputLayerType maxOutBoxesType;
float iouThr, scoreThr, softNmsSigma;
op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
bool sortResDescend;
element::Type outType;
ElementType outType;
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType,
targetDevice) = this->GetParam();
element::Type paramsPrec, maxBoxPrec, thrPrec;
ElementType paramsPrec, maxBoxPrec, thrPrec;
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
std::tie(iouThr, scoreThr, softNmsSigma) = thrValues;
@ -156,7 +156,7 @@ protected:
if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) {
inputDynamicShapes.push_back(ngraph::PartialShape{1});
params.push_back(std::make_shared<ngraph::opset1::Parameter>(element::Type_t::i32, inputDynamicShapes.back()));
params.push_back(std::make_shared<ngraph::opset1::Parameter>(ElementType::i32, inputDynamicShapes.back()));
params[1]->set_friendly_name("param_3");
maxOutBoxesPerClassNode = params.back();
} else {
@ -166,7 +166,7 @@ protected:
auto iouThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{iouThr})->output(0);
auto scoreThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{scoreThr})->output(0);
auto softNmsSigmaNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{softNmsSigma})->output(0);
auto nms = std::make_shared<ngraph::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
auto nms = std::make_shared<ov::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
softNmsSigmaNode, boxEncoding, sortResDescend, outType);
function = makeNgraphFunction(paramsPrec, params, nms, "NMS");
@ -276,7 +276,7 @@ private:
expectedList.resize(selected_indices_size);
if (indeces_iter->get_element_type() == ov::element::i32) {
if (indeces_iter->get_element_type() == ElementType::i32) {
auto selected_indices_data = indeces_iter->data<int32_t>();
for (size_t i = 0; i < selected_indices_size; i += 3) {
@ -296,7 +296,7 @@ private:
}
}
if (scores_iter->get_element_type() == ov::element::f32) {
if (scores_iter->get_element_type() == ElementType::f32) {
auto selected_scores_data = scores_iter->data<float>();
for (size_t i = 0; i < selected_scores_size; i += 3) {
expectedList[i/3].score = selected_scores_data[i+2];
@ -319,7 +319,7 @@ private:
size_t selected_indices_size = indeces_iter->get_size();
const auto selected_scores_data = scores_iter->data<float>();
if (indeces_iter->get_element_type() == ov::element::i32) {
if (indeces_iter->get_element_type() == ElementType::i32) {
const auto selected_indices_data = indeces_iter->data<int32_t>();
for (size_t i = 0; i < selected_indices_size; i += 3) {
const int32_t batchId = selected_indices_data[i+0];
@ -415,10 +415,10 @@ const std::vector<InputShapeParams> inShapeParams = {
const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
const std::vector<float> threshold = {0.3f, 0.7f};
const std::vector<float> sigmaThreshold = {0.0f, 0.5f};
const std::vector<op::v9::NonMaxSuppression::BoxEncodingType> encodType = {op::v9::NonMaxSuppression::BoxEncodingType::CENTER,
op::v9::NonMaxSuppression::BoxEncodingType::CORNER};
const std::vector<ov::op::v9::NonMaxSuppression::BoxEncodingType> encodType = {ov::op::v9::NonMaxSuppression::BoxEncodingType::CENTER,
ov::op::v9::NonMaxSuppression::BoxEncodingType::CORNER};
const std::vector<bool> sortResDesc = {true, false};
const std::vector<element::Type> outType = {element::i32, element::i64};
const std::vector<ElementType> outType = {ElementType::i32, ElementType::i64};
const std::vector<ngraph::helpers::InputLayerType> maxBoxInputTypes = {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT};
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),

View File

@ -0,0 +1,15 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/single_op/nms_rotated.hpp"
namespace LayerTestsDefinitions {
TEST_P(NmsRotatedOpTest, CompareWithRefs) {
run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,47 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/base/ov_subgraph.hpp"
namespace LayerTestsDefinitions {
typedef std::tuple<
std::vector<ov::test::InputShape>, // Input shapes
ov::test::ElementType, // Boxes and scores input precisions
ov::test::ElementType, // Max output boxes input precisions
ov::test::ElementType, // Thresholds precisions
ov::test::ElementType, // Output type
int64_t, // Max output boxes per class
float, // IOU threshold
float, // Score threshold
bool, // Sort result descending
bool, // Clockwise
bool, // Is 1st input constant
bool, // Is 2nd input constant
bool, // Is 3rd input constant
bool, // Is 4th input constant
bool, // Is 5th input constant
ov::AnyMap, // Additional configuration
std::string // Device name
> NmsRotatedParams;
class NmsRotatedOpTest : public testing::WithParamInterface<NmsRotatedParams>,
public ov::test::SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj);
protected:
void SetUp() override;
void generate_inputs(const std::vector<ov::Shape>& target_shapes) override;
private:
int64_t m_max_out_boxes_per_class;
float m_iou_threshold;
float m_score_threshold;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,207 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_op/nms_rotated.hpp"
#include "ov_models/builders.hpp"
#include "common_test_utils/data_utils.hpp"
#include "openvino/op/nms_rotated.hpp"
using namespace ov::test;
namespace LayerTestsDefinitions {
std::string NmsRotatedOpTest::getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj) {
const auto& in_shapes = std::get<0>(obj.param);
std::ostringstream result;
result << "IS=(";
for (size_t i = 0lu; i < in_shapes.size(); i++) {
result << utils::partialShape2str({in_shapes[i].first}) << (i < in_shapes.size() - 1lu ? "_" : "");
}
result << ")_TS=";
for (size_t i = 0lu; i < in_shapes.front().second.size(); i++) {
result << "{";
for (size_t j = 0lu; j < in_shapes.size(); j++) {
result << utils::vec2str(in_shapes[j].second[i]) << (j < in_shapes.size() - 1lu ? "_" : "");
}
result << "}_";
}
result << "_BoxPrc=" << std::get<1>(obj.param);
result << "_MaxPrc=" << std::get<2>(obj.param);
result << "_ThrPrc=" << std::get<3>(obj.param);
result << "_OutPrc=" << std::get<4>(obj.param);
result << "_MaxBox=" << std::get<5>(obj.param);
result << "_IouThr=" << std::get<6>(obj.param);
result << "_ScoreThr=" << std::get<7>(obj.param);
result << "_SortDesc=" << utils::bool2str(std::get<8>(obj.param));
result << "_Clockwise=" << utils::bool2str(std::get<9>(obj.param));
result << "_ConstIn={" << utils::bool2str(std::get<10>(obj.param)) << ","
<< utils::bool2str(std::get<11>(obj.param)) << ","
<< utils::bool2str(std::get<12>(obj.param)) << ","
<< utils::bool2str(std::get<13>(obj.param)) << ","
<< utils::bool2str(std::get<14>(obj.param)) << "}";
const auto& config = std::get<15>(obj.param);
if (!config.empty()) {
result << "_Config={";
for (const auto& conf_item : config) {
result << "_" << conf_item.first << "=";
conf_item.second.print(result);
}
result << "}";
}
result << "_Device=" << std::get<16>(obj.param);
return result.str();
}
void NmsRotatedOpTest::SetUp() {
const auto& params = this->GetParam();
const auto& in_shapes = std::get<0>(params);
const auto& boxes_prc = std::get<1>(params);
const auto& max_boxes_prc = std::get<2>(params);
const auto& thresholds_prc = std::get<3>(params);
const auto& out_prc = std::get<4>(params);
m_max_out_boxes_per_class = std::get<5>(params);
m_iou_threshold = std::get<6>(params);
m_score_threshold = std::get<7>(params);
const auto& sort_descending = std::get<8>(params);
const auto& clockwise = std::get<9>(params);
const auto& is_0_in_const = std::get<10>(params);
const auto& is_1_in_const = std::get<11>(params);
const auto& is_2_in_const = std::get<12>(params);
const auto& is_3_in_const = std::get<13>(params);
const auto& is_4_in_const = std::get<14>(params);
configuration = std::get<15>(params);
targetDevice = std::get<16>(params);
std::vector<InputShape> actual_shapes;
ov::ParameterVector in_params;
std::vector<std::shared_ptr<ov::Node>> inputs;
const auto in_shape_1d = InputShape{{1}, {{1}}};
#define CONST_CASE(P, S, H, L) \
case P: \
inputs.push_back(ngraph::builder::makeConstant(P, S, std::vector<ov::element_type_traits<P>::value_type>{}, true, \
ov::element_type_traits<P>::value_type(H), ov::element_type_traits<P>::value_type(L))); \
break;
#define CREATE_INPUT(C, P, S, N, H, L) \
if (C) { \
switch (P) { \
CONST_CASE(ElementType::f32, S.second[0], H, L) \
CONST_CASE(ElementType::f16, S.second[0], H, L) \
CONST_CASE(ElementType::bf16, S.second[0], H, L) \
CONST_CASE(ElementType::i32, S.second[0], H, L) \
CONST_CASE(ElementType::i64, S.second[0], H, L) \
default: OPENVINO_THROW("NmsRotated does not support precision ", P, " for the ", N, " input."); \
} \
} else { \
actual_shapes.push_back(S); \
if (S.first.rank() == 0) { \
in_params.push_back(std::make_shared<ov::op::v0::Parameter>(P, S.second.front())); \
} else { \
in_params.push_back(std::make_shared<ov::op::v0::Parameter>(P, S.first)); \
} \
in_params.back()->set_friendly_name(N); \
inputs.push_back(in_params.back()); \
}
CREATE_INPUT(is_0_in_const, boxes_prc, in_shapes[0], "Boxes", 30, 10)
CREATE_INPUT(is_1_in_const, boxes_prc, in_shapes[1], "Scores", 1, 0)
CREATE_INPUT(is_2_in_const, max_boxes_prc, in_shape_1d, "MaxOutputBoxesPerClass", m_max_out_boxes_per_class, m_max_out_boxes_per_class)
CREATE_INPUT(is_3_in_const, thresholds_prc, in_shape_1d, "IouThreshold", m_iou_threshold, m_iou_threshold)
CREATE_INPUT(is_4_in_const, thresholds_prc, in_shape_1d, "ScoreThreshold", m_score_threshold, m_score_threshold)
#undef CONST_CASE
#undef CREATE_INPUT
init_input_shapes(actual_shapes);
const auto nms_op = std::make_shared<ov::op::v13::NMSRotated>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4],
sort_descending, out_prc, clockwise);
ov::ResultVector results;
for (size_t i = 0lu; i < nms_op->get_output_size(); i++) {
results.push_back(std::make_shared<ov::op::v0::Result>(nms_op->output(i)));
}
function = std::make_shared<ov::Model>(results, in_params, "NMSRotated");
}
template<typename TD, typename TS>
void fill_data(TD* dst, const TS* src, size_t len) {
for (size_t i = 0llu; i < len; i++) {
dst[i] = static_cast<TD>(src[i]);
}
}
void NmsRotatedOpTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
inputs.clear();
const auto& func_inputs = function->inputs();
for (size_t i = 0llu; i < func_inputs.size(); ++i) {
const auto& func_input = func_inputs[i];
const auto& name = func_input.get_node()->get_friendly_name();
const auto& in_prc = func_input.get_element_type();
auto tensor = ov::Tensor(in_prc, targetInputStaticShapes[i]);
#define FILL_DATA(P, S, L) \
case P : \
fill_data(tensor.data<ov::element_type_traits<P>::value_type>(), S, L); break;
#define GEN_DATA(P, R, S, K) \
case P : \
utils::fill_data_random(tensor.data<ov::element_type_traits<P>::value_type>(), shape_size(targetInputStaticShapes[i]), R, S, K); break;
if (name == "Boxes") {
switch (in_prc) {
GEN_DATA(ElementType::f32, 30, 20, 1)
GEN_DATA(ElementType::f16, 30, 20, 1)
GEN_DATA(ElementType::bf16, 30, 20, 1)
default:
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input.");
}
} else if (name == "Scores") {
switch (in_prc) {
GEN_DATA(ElementType::f32, 1, 0, 100)
GEN_DATA(ElementType::f16, 1, 0, 100)
GEN_DATA(ElementType::bf16, 1, 0, 100)
default:
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input.");
}
} else if (name == "MaxOutputBoxesPerClass") {
switch (in_prc) {
FILL_DATA(ElementType::i64, &m_max_out_boxes_per_class, 1)
FILL_DATA(ElementType::i32, &m_max_out_boxes_per_class, 1)
default:
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the MaxOutputBoxesPerClass input.");
}
} else if (name == "IouThreshold") {
switch (in_prc) {
FILL_DATA(ElementType::f32, &m_iou_threshold, 1)
FILL_DATA(ElementType::f16, &m_iou_threshold, 1)
FILL_DATA(ElementType::bf16, &m_iou_threshold, 1)
default:
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the IouThreshold input.");
}
} else if (name == "ScoreThreshold") {
switch (in_prc) {
FILL_DATA(ElementType::f32, &m_score_threshold, 1)
FILL_DATA(ElementType::f16, &m_score_threshold, 1)
FILL_DATA(ElementType::bf16, &m_score_threshold, 1)
default:
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the ScoreThreshold input.");
}
}
#undef GEN_DATA
#undef FILL_DATA
inputs.insert({func_input.get_node_shared_ptr(), tensor});
}
}
} // namespace LayerTestsDefinitions

View File

@ -1131,5 +1131,4 @@ conformance_RegionYolo/ReadIRTest.ImportExport/Op=RegionYolo.1_Type=f32_IR=Regio
conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07
conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281
conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1
conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=(),1
conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1

1 Test Name Fix Priority
1131 conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=() 9.72615e-07
1132 conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=() 0.000113281
1133 conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=() 1
conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=() 1
1134 conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=() 1