[CPU] NMSRotated operation implementation. (#20410)
This commit is contained in:
parent
3077bad26f
commit
57571d36e6
@ -27,13 +27,13 @@ The general algorithm is described below:
|
|||||||
|
|
||||||
Here ``func(rotated_iou(b_i, b)) = 1 if rotated_iou(b_i, b) <= iou_threshold else 0``.
|
Here ``func(rotated_iou(b_i, b)) = 1 if rotated_iou(b_i, b) <= iou_threshold else 0``.
|
||||||
|
|
||||||
Having two bouding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``:
|
Having two bounding boxes ``B1`` and ``B2`` the following steps are performed to calculate ``rotated_iou(B1, B2)``:
|
||||||
|
|
||||||
1. Calculate rotated vertices, (x, y) coordinates of the 4 corners of each box transformed by the corresponding angle in radians according to the direction specified by the *clockwise* attribute.
|
1. Calculate rotated vertices, (x, y) coordinates of the 4 corners of each box transformed by the corresponding angle in radians according to the direction specified by the *clockwise* attribute.
|
||||||
2. Find all intersection points between edges of ``B1`` and ``B2``. Add them to the ``intersection_points``.
|
2. Find all intersection points between edges of ``B1`` and ``B2``. Add them to the ``intersection_points``.
|
||||||
3. Find all corners of ``B1`` within area of ``B2``, and all corners of ``B2`` within area of ``B1``. Add them to the ``intersection_points``.
|
3. Find all corners of ``B1`` within area of ``B2``, and all corners of ``B2`` within area of ``B1``. Add them to the ``intersection_points``.
|
||||||
4. Calculate ``intersection_area`` of the polygon described by ``intersection_points`` (see Sholeace formula).
|
4. Calculate ``intersection_area`` of the polygon described by ``intersection_points`` (see Sholeace formula).
|
||||||
5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = (B1_area + B2_area) - intersection_area`.
|
5. Calculate ``union_area`` (the common area of ``B1`` and ``B2``), `union_area = B1_area + B2_area`.
|
||||||
6. Return intersection over union ``rotated_iou = intersection_area / (union_area - intersection_area)``.
|
6. Return intersection over union ``rotated_iou = intersection_area / (union_area - intersection_area)``.
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ bool fuse_type_to_nms3(const std::shared_ptr<ov::Node>& node, const precisions_m
|
|||||||
bool fuse_type_to_nms4(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_nms4(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
bool fuse_type_to_nms5(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_nms5(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
bool fuse_type_to_nms9(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_nms9(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
|
bool fuse_type_to_nms_rotated(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
bool fuse_type_to_matrix_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_matrix_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
bool fuse_type_to_multiclass_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_multiclass_nms(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
bool fuse_type_to_generate_proposals(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
bool fuse_type_to_generate_proposals(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
|
||||||
@ -383,6 +384,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
|
|||||||
{opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4},
|
{opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4},
|
||||||
{opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5},
|
{opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5},
|
||||||
{opset9::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms9},
|
{opset9::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms9},
|
||||||
|
{op::v13::NMSRotated::get_type_info_static(), fuse_type_to_nms_rotated},
|
||||||
{opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms},
|
{opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms},
|
||||||
{opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
|
{opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
|
||||||
{opset9::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
|
{opset9::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
|
||||||
@ -691,6 +693,51 @@ bool fuse_type_to_nms9(const std::shared_ptr<ov::Node>& node, const precisions_m
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool fuse_type_to_nms_rotated(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions) {
|
||||||
|
auto nms = ov::as_type_ptr<op::v13::NMSRotated>(node);
|
||||||
|
if (!nms) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool res = false;
|
||||||
|
auto it = precisions.find(node->get_output_element_type(0));
|
||||||
|
if (it != precisions.end()) {
|
||||||
|
const auto& to = it->second;
|
||||||
|
if (to == ov::element::i32 || to == ov::element::i64) {
|
||||||
|
nms->set_output_type_attr(to);
|
||||||
|
res = true;
|
||||||
|
if (precisions.count(node->get_output_element_type(1)) == 0) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto type_relaxed = std::dynamic_pointer_cast<ov::op::TypeRelaxedBase>(node);
|
||||||
|
ov::element::TypeVector output_types;
|
||||||
|
for (size_t i = 0; i < node->get_output_size(); i++) {
|
||||||
|
it = precisions.find(node->get_output_element_type(i));
|
||||||
|
if (it == precisions.end()) {
|
||||||
|
output_types.push_back(node->get_output_element_type(i));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto& to = it->second;
|
||||||
|
if (type_relaxed) {
|
||||||
|
type_relaxed->set_overridden_output_type(to, i);
|
||||||
|
res = true;
|
||||||
|
}
|
||||||
|
output_types.push_back(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!type_relaxed) {
|
||||||
|
auto relaxed_op =
|
||||||
|
std::make_shared<ov::op::TypeRelaxed<op::v13::NMSRotated>>(*nms, ov::element::TypeVector{}, output_types);
|
||||||
|
replace_node(node, relaxed_op);
|
||||||
|
res = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool update_type(size_t idx,
|
bool update_type(size_t idx,
|
||||||
|
@ -201,6 +201,7 @@ static const TypeToNameMap& get_type_to_name_tbl() {
|
|||||||
{ "ExtractImagePatches", Type::ExtractImagePatches},
|
{ "ExtractImagePatches", Type::ExtractImagePatches},
|
||||||
{ "NonMaxSuppression", Type::NonMaxSuppression},
|
{ "NonMaxSuppression", Type::NonMaxSuppression},
|
||||||
{ "NonMaxSuppressionIEInternal", Type::NonMaxSuppression},
|
{ "NonMaxSuppressionIEInternal", Type::NonMaxSuppression},
|
||||||
|
{ "NMSRotated", Type::NonMaxSuppression},
|
||||||
{ "MatrixNms", Type::MatrixNms},
|
{ "MatrixNms", Type::MatrixNms},
|
||||||
{ "MulticlassNms", Type::MulticlassNms},
|
{ "MulticlassNms", Type::MulticlassNms},
|
||||||
{ "MulticlassNmsIEInternal", Type::MulticlassNms},
|
{ "MulticlassNmsIEInternal", Type::MulticlassNms},
|
||||||
|
@ -615,26 +615,31 @@ bool Node::outputShapeDataDependency() const {
|
|||||||
|
|
||||||
void Node::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
|
void Node::redefineOutputMemory(const std::vector<VectorDims> &newOutputShapes) {
|
||||||
if (newOutputShapes.size() != outputShapes.size()) {
|
if (newOutputShapes.size() != outputShapes.size()) {
|
||||||
IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName();
|
THROW_CPU_NODE_ERR("has shapes number mismatch with real outputs number.");
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
for (size_t i = 0lu; i < outputShapes.size(); i++) {
|
||||||
const auto edges = getChildEdgesAtPort(i);
|
redefineOutputMemory(i, newOutputShapes[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// avoid 0D shape incompatible
|
void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_shape) {
|
||||||
auto newOutputShape = newOutputShapes[i];
|
const auto edges = getChildEdgesAtPort(port);
|
||||||
if (newOutputShape.empty()) {
|
|
||||||
newOutputShape.push_back(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto &currDesc = edges[0]->getMemory().getDesc();
|
// avoid 0D shape incompatible
|
||||||
if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape)
|
auto new_shape = new_output_shape;
|
||||||
continue;
|
if (new_shape.empty()) {
|
||||||
|
new_shape.push_back(1);
|
||||||
|
}
|
||||||
|
|
||||||
const bool hasZeroDims = std::count(std::begin(newOutputShape), std::end(newOutputShape), 0) > 0;
|
const auto& curr_desc = edges[0]->getMemory().getDesc();
|
||||||
const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShape, hasZeroDims);
|
if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) {
|
||||||
for (size_t j = 0; j < edges.size(); j++) {
|
return;
|
||||||
edges[j]->getMemoryPtr()->redefineDesc(memDesc);
|
}
|
||||||
}
|
|
||||||
|
const bool has_zero_dims = std::count(std::begin(new_shape), std::end(new_shape), 0lu) > 0;
|
||||||
|
const auto mem_desc = getBaseMemDescAtOutputPort(port)->cloneWithNewDims(new_shape, has_zero_dims);
|
||||||
|
for (size_t j = 0lu; j < edges.size(); j++) {
|
||||||
|
edges[j]->getMemoryPtr()->redefineDesc(mem_desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,6 +366,7 @@ public:
|
|||||||
void updateDynamicParams();
|
void updateDynamicParams();
|
||||||
void executeDynamic(dnnl::stream strm);
|
void executeDynamic(dnnl::stream strm);
|
||||||
virtual void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
|
virtual void redefineOutputMemory(const std::vector<VectorDims> &newShapes);
|
||||||
|
void redefineOutputMemory(const size_t port, const VectorDims& new_output_shape);
|
||||||
bool outputShapeDataDependency() const;
|
bool outputShapeDataDependency() const;
|
||||||
|
|
||||||
virtual void initSupportedPrimitiveDescriptors();
|
virtual void initSupportedPrimitiveDescriptors();
|
||||||
|
@ -0,0 +1,465 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "non_max_suppression.hpp"
|
||||||
|
#include "utils/general_utils.h"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace dnnl::impl::cpu;
|
||||||
|
|
||||||
|
#define GET_OFF(field) offsetof(NmsCallArgs, field)
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
namespace kernel {
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::generate() {
|
||||||
|
load_vector_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, vector_step));
|
||||||
|
load_scalar_emitter.reset(new jit_load_emitter(this, isa, Precision::FP32, Precision::FP32, scalar_step));
|
||||||
|
|
||||||
|
exp_injector.reset(new x64::jit_uni_eltwise_injector_f32<isa>(this, dnnl::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
|
||||||
|
|
||||||
|
this->preamble();
|
||||||
|
|
||||||
|
uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
|
||||||
|
|
||||||
|
load_pool_gpr_idxs = {static_cast<size_t>(reg_load_store_mask.getIdx()), static_cast<size_t>(reg_load_table.getIdx())};
|
||||||
|
store_pool_gpr_idxs = {static_cast<size_t>(reg_load_store_mask.getIdx())};
|
||||||
|
store_pool_vec_idxs = {static_cast<size_t>(vmm_zero.getIdx())};
|
||||||
|
|
||||||
|
mov(reg_boxes_coord0, ptr[reg_params + GET_OFF(selected_boxes_coord[0])]);
|
||||||
|
mov(reg_boxes_coord1, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 1 * sizeof(size_t)]);
|
||||||
|
mov(reg_boxes_coord2, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 2 * sizeof(size_t)]);
|
||||||
|
mov(reg_boxes_coord3, ptr[reg_params + GET_OFF(selected_boxes_coord[0]) + 3 * sizeof(size_t)]);
|
||||||
|
mov(reg_candidate_box, ptr[reg_params + GET_OFF(candidate_box)]);
|
||||||
|
mov(reg_candidate_status, ptr[reg_params + GET_OFF(candidate_status)]);
|
||||||
|
mov(reg_boxes_num, ptr[reg_params + GET_OFF(selected_boxes_num)]);
|
||||||
|
mov(reg_iou_threshold, ptr[reg_params + GET_OFF(iou_threshold)]);
|
||||||
|
// soft
|
||||||
|
mov(reg_score_threshold, ptr[reg_params + GET_OFF(score_threshold)]);
|
||||||
|
mov(reg_score, ptr[reg_params + GET_OFF(score)]);
|
||||||
|
mov(reg_scale, ptr[reg_params + GET_OFF(scale)]);
|
||||||
|
|
||||||
|
// could use rcx(reg_table) and rdi(reg_temp) now as abi parse finished
|
||||||
|
mov(reg_table, l_table_constant);
|
||||||
|
if (x64::mayiuse(x64::avx512_core)) {
|
||||||
|
kmovw(k_mask_one, word[reg_table + vlen]);
|
||||||
|
}
|
||||||
|
uni_vbroadcastss(vmm_iou_threshold, ptr[reg_iou_threshold]);
|
||||||
|
uni_vbroadcastss(vmm_score_threshold, ptr[reg_score_threshold]);
|
||||||
|
|
||||||
|
uni_vbroadcastss(vmm_candidate_coord0, ptr[reg_candidate_box]);
|
||||||
|
uni_vbroadcastss(vmm_candidate_coord1, ptr[reg_candidate_box + 1 * sizeof(float)]);
|
||||||
|
uni_vbroadcastss(vmm_candidate_coord2, ptr[reg_candidate_box + 2 * sizeof(float)]);
|
||||||
|
uni_vbroadcastss(vmm_candidate_coord3, ptr[reg_candidate_box + 3 * sizeof(float)]);
|
||||||
|
|
||||||
|
if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) {
|
||||||
|
// box format: y1, x1, y2, x2
|
||||||
|
uni_vminps(vmm_temp1, vmm_candidate_coord0, vmm_candidate_coord2);
|
||||||
|
uni_vmaxps(vmm_temp2, vmm_candidate_coord0, vmm_candidate_coord2);
|
||||||
|
uni_vmovups(vmm_candidate_coord0, vmm_temp1);
|
||||||
|
uni_vmovups(vmm_candidate_coord2, vmm_temp2);
|
||||||
|
|
||||||
|
uni_vminps(vmm_temp1, vmm_candidate_coord1, vmm_candidate_coord3);
|
||||||
|
uni_vmaxps(vmm_temp2, vmm_candidate_coord1, vmm_candidate_coord3);
|
||||||
|
uni_vmovups(vmm_candidate_coord1, vmm_temp1);
|
||||||
|
uni_vmovups(vmm_candidate_coord3, vmm_temp2);
|
||||||
|
} else {
|
||||||
|
// box format: x_center, y_center, width, height --> y1, x1, y2, x2
|
||||||
|
uni_vmulps(vmm_temp1, vmm_candidate_coord2, ptr[reg_table]); // width/2
|
||||||
|
uni_vmulps(vmm_temp2, vmm_candidate_coord3, ptr[reg_table]); // height/2
|
||||||
|
|
||||||
|
uni_vaddps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center + width/2
|
||||||
|
uni_vmovups(vmm_candidate_coord3, vmm_temp3);
|
||||||
|
|
||||||
|
uni_vaddps(vmm_temp3, vmm_candidate_coord1, vmm_temp2); // y_center + height/2
|
||||||
|
uni_vmovups(vmm_candidate_coord2, vmm_temp3);
|
||||||
|
|
||||||
|
uni_vsubps(vmm_temp3, vmm_candidate_coord0, vmm_temp1); // x_center - width/2
|
||||||
|
uni_vsubps(vmm_temp4, vmm_candidate_coord1, vmm_temp2); // y_center - height/2
|
||||||
|
|
||||||
|
uni_vmovups(vmm_candidate_coord1, vmm_temp3);
|
||||||
|
uni_vmovups(vmm_candidate_coord0, vmm_temp4);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check from last to first
|
||||||
|
imul(reg_temp_64, reg_boxes_num, sizeof(float));
|
||||||
|
add(reg_boxes_coord0, reg_temp_64); // y1
|
||||||
|
add(reg_boxes_coord1, reg_temp_64); // x1
|
||||||
|
add(reg_boxes_coord2, reg_temp_64); // y2
|
||||||
|
add(reg_boxes_coord3, reg_temp_64); // x2
|
||||||
|
|
||||||
|
Xbyak::Label hard_nms_label;
|
||||||
|
Xbyak::Label nms_end_label;
|
||||||
|
|
||||||
|
mov(reg_temp_32, ptr[reg_scale]);
|
||||||
|
test(reg_temp_32, reg_temp_32);
|
||||||
|
jz(hard_nms_label, T_NEAR);
|
||||||
|
|
||||||
|
soft_nms();
|
||||||
|
|
||||||
|
jmp(nms_end_label, T_NEAR);
|
||||||
|
|
||||||
|
L(hard_nms_label);
|
||||||
|
|
||||||
|
hard_nms();
|
||||||
|
|
||||||
|
L(nms_end_label);
|
||||||
|
|
||||||
|
this->postamble();
|
||||||
|
|
||||||
|
load_vector_emitter->emit_data();
|
||||||
|
load_scalar_emitter->emit_data();
|
||||||
|
|
||||||
|
prepare_table();
|
||||||
|
exp_injector->prepare_table();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::hard_nms() {
|
||||||
|
Xbyak::Label main_loop_label_hard;
|
||||||
|
Xbyak::Label main_loop_end_label_hard;
|
||||||
|
Xbyak::Label tail_loop_label_hard;
|
||||||
|
Xbyak::Label terminate_label_hard;
|
||||||
|
L(main_loop_label_hard);
|
||||||
|
{
|
||||||
|
cmp(reg_boxes_num, vector_step);
|
||||||
|
jl(main_loop_end_label_hard, T_NEAR);
|
||||||
|
|
||||||
|
sub(reg_boxes_coord0, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord1, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord2, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord3, vector_step * sizeof(float));
|
||||||
|
|
||||||
|
// iou result is in vmm_temp3
|
||||||
|
iou(vector_step);
|
||||||
|
|
||||||
|
sub(reg_boxes_num, vector_step);
|
||||||
|
|
||||||
|
suppressed_by_iou(false);
|
||||||
|
|
||||||
|
// if zero continue, else set result to suppressed and terminate
|
||||||
|
jz(main_loop_label_hard, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label_hard, T_NEAR);
|
||||||
|
}
|
||||||
|
L(main_loop_end_label_hard);
|
||||||
|
|
||||||
|
L(tail_loop_label_hard);
|
||||||
|
{
|
||||||
|
cmp(reg_boxes_num, 1);
|
||||||
|
jl(terminate_label_hard, T_NEAR);
|
||||||
|
|
||||||
|
sub(reg_boxes_coord0, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord1, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord2, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord3, scalar_step * sizeof(float));
|
||||||
|
|
||||||
|
// iou result is in vmm_temp3
|
||||||
|
iou(scalar_step);
|
||||||
|
|
||||||
|
sub(reg_boxes_num, scalar_step);
|
||||||
|
|
||||||
|
suppressed_by_iou(true);
|
||||||
|
|
||||||
|
jz(tail_loop_label_hard, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label_hard, T_NEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
L(terminate_label_hard);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::soft_nms() {
|
||||||
|
uni_vbroadcastss(vmm_scale, ptr[reg_scale]);
|
||||||
|
|
||||||
|
Xbyak::Label main_loop_label;
|
||||||
|
Xbyak::Label main_loop_end_label;
|
||||||
|
Xbyak::Label tail_loop_label;
|
||||||
|
Xbyak::Label terminate_label;
|
||||||
|
|
||||||
|
Xbyak::Label main_loop_label_soft;
|
||||||
|
Xbyak::Label tail_loop_label_soft;
|
||||||
|
L(main_loop_label);
|
||||||
|
{
|
||||||
|
cmp(reg_boxes_num, vector_step);
|
||||||
|
jl(main_loop_end_label, T_NEAR);
|
||||||
|
|
||||||
|
sub(reg_boxes_coord0, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord1, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord2, vector_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord3, vector_step * sizeof(float));
|
||||||
|
|
||||||
|
// result(iou and weight) is in vmm_temp3
|
||||||
|
iou(vector_step);
|
||||||
|
sub(reg_boxes_num, vector_step);
|
||||||
|
|
||||||
|
// soft suppressed by iou_threshold
|
||||||
|
if (m_jcp.is_soft_suppressed_by_iou) {
|
||||||
|
suppressed_by_iou(false);
|
||||||
|
|
||||||
|
// if zero continue soft suppression, else set result to suppressed and terminate
|
||||||
|
jz(main_loop_label_soft, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label, T_NEAR);
|
||||||
|
|
||||||
|
L(main_loop_label_soft);
|
||||||
|
}
|
||||||
|
|
||||||
|
// weight: std::exp(scale * iou * iou)
|
||||||
|
soft_coeff();
|
||||||
|
|
||||||
|
// vector weights multiply
|
||||||
|
horizontal_mul();
|
||||||
|
|
||||||
|
uni_vbroadcastss(vmm_temp1, ptr[reg_score]);
|
||||||
|
|
||||||
|
// new score in vmm3[0]
|
||||||
|
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1);
|
||||||
|
// store new score
|
||||||
|
uni_vmovss(ptr[reg_score], vmm_temp3);
|
||||||
|
|
||||||
|
// cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold
|
||||||
|
suppressed_by_score();
|
||||||
|
|
||||||
|
jz(main_loop_label, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label, T_NEAR);
|
||||||
|
}
|
||||||
|
L(main_loop_end_label);
|
||||||
|
|
||||||
|
L(tail_loop_label);
|
||||||
|
{
|
||||||
|
cmp(reg_boxes_num, 1);
|
||||||
|
jl(terminate_label, T_NEAR);
|
||||||
|
|
||||||
|
sub(reg_boxes_coord0, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord1, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord2, scalar_step * sizeof(float));
|
||||||
|
sub(reg_boxes_coord3, scalar_step * sizeof(float));
|
||||||
|
|
||||||
|
iou(scalar_step);
|
||||||
|
sub(reg_boxes_num, scalar_step);
|
||||||
|
|
||||||
|
// soft suppressed by iou_threshold
|
||||||
|
if (m_jcp.is_soft_suppressed_by_iou) {
|
||||||
|
suppressed_by_iou(true);
|
||||||
|
|
||||||
|
jz(tail_loop_label_soft, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label, T_NEAR);
|
||||||
|
|
||||||
|
L(tail_loop_label_soft);
|
||||||
|
}
|
||||||
|
|
||||||
|
soft_coeff();
|
||||||
|
|
||||||
|
uni_vbroadcastss(vmm_temp1, ptr[reg_score]);
|
||||||
|
|
||||||
|
// vmm3[0] is valide, no need horizontal mul.
|
||||||
|
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp1);
|
||||||
|
|
||||||
|
uni_vmovss(ptr[reg_score], vmm_temp3);
|
||||||
|
|
||||||
|
// cmpps(_CMP_LE_OS) if new score is less or equal than score_threshold
|
||||||
|
suppressed_by_score();
|
||||||
|
|
||||||
|
jz(tail_loop_label, T_NEAR);
|
||||||
|
|
||||||
|
uni_vpextrd(ptr[reg_candidate_status], Xbyak::Xmm(vmm_zero.getIdx()), 0);
|
||||||
|
|
||||||
|
jmp(terminate_label, T_NEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
L(terminate_label);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::suppressed_by_iou(bool is_scalar) {
|
||||||
|
if (x64::mayiuse(x64::avx512_core)) {
|
||||||
|
vcmpps(k_mask, vmm_temp3, vmm_iou_threshold, 0x0D); // _CMP_GE_OS. vcmpps w/ kmask only on V5
|
||||||
|
if (is_scalar)
|
||||||
|
kandw(k_mask, k_mask, k_mask_one);
|
||||||
|
kortestw(k_mask, k_mask); // bitwise check if all zero
|
||||||
|
} else if (x64::mayiuse(x64::avx)) {
|
||||||
|
// vex instructions with xmm on avx and ymm on avx2
|
||||||
|
vcmpps(vmm_temp4, vmm_temp3, vmm_iou_threshold, 0x0D); // xmm and ymm only on V1.
|
||||||
|
if (is_scalar) {
|
||||||
|
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
|
||||||
|
test(reg_temp_32, reg_temp_32);
|
||||||
|
} else {
|
||||||
|
uni_vtestps(vmm_temp4, vmm_temp4); // vtestps: sign bit check if all zeros, ymm and xmm only on V1, N/A on V5
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// pure sse path, make sure don't spoil vmm_temp3, which may used in after soft-suppression
|
||||||
|
uni_vmovups(vmm_temp4, vmm_temp3);
|
||||||
|
cmpps(vmm_temp4, vmm_iou_threshold, 0x07); // order compare, 0 for at least one is NaN
|
||||||
|
|
||||||
|
uni_vmovups(vmm_temp2, vmm_temp3);
|
||||||
|
cmpps(vmm_temp2, vmm_iou_threshold, 0x05); // _CMP_GE_US on sse, no direct _CMP_GE_OS supported.
|
||||||
|
|
||||||
|
uni_vandps(vmm_temp4, vmm_temp4, vmm_temp2);
|
||||||
|
if (is_scalar) {
|
||||||
|
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
|
||||||
|
test(reg_temp_32, reg_temp_32);
|
||||||
|
} else {
|
||||||
|
uni_vtestps(vmm_temp4, vmm_temp4); // ptest: bitwise check if all zeros, on sse41
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::suppressed_by_score() {
|
||||||
|
if (x64::mayiuse(x64::avx512_core)) {
|
||||||
|
vcmpps(k_mask, vmm_temp3, vmm_score_threshold, 0x02); // vcmpps w/ kmask only on V5, w/o kmask version N/A on V5
|
||||||
|
kandw(k_mask, k_mask, k_mask_one);
|
||||||
|
kortestw(k_mask, k_mask); // bitwise check if all zero
|
||||||
|
} else if (x64::mayiuse(x64::avx)) {
|
||||||
|
vcmpps(vmm_temp4, vmm_temp3, vmm_score_threshold, 0x02);
|
||||||
|
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp4.getIdx()), 0);
|
||||||
|
test(reg_temp_32, reg_temp_32);
|
||||||
|
} else {
|
||||||
|
cmpps(vmm_temp3, vmm_score_threshold, 0x02); // _CMP_LE_OS on sse
|
||||||
|
uni_vpextrd(reg_temp_32, Xbyak::Xmm(vmm_temp3.getIdx()), 0);
|
||||||
|
test(reg_temp_32, reg_temp_32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::iou(int ele_num) {
|
||||||
|
auto load = [&](Xbyak::Reg64 reg_src, Vmm vmm_dst) {
|
||||||
|
if (ele_num != scalar_step && ele_num != vector_step)
|
||||||
|
OPENVINO_THROW("NMS JIT implementation supports load emitter with only element count scalar_step or vector_step! Get: ", ele_num);
|
||||||
|
|
||||||
|
const auto& load_emitter = ele_num == 1 ? load_scalar_emitter : load_vector_emitter;
|
||||||
|
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_dst.getIdx())},
|
||||||
|
{}, {load_pool_gpr_idxs});
|
||||||
|
};
|
||||||
|
load(reg_boxes_coord0, vmm_boxes_coord0);
|
||||||
|
load(reg_boxes_coord1, vmm_boxes_coord1);
|
||||||
|
load(reg_boxes_coord2, vmm_boxes_coord2);
|
||||||
|
load(reg_boxes_coord3, vmm_boxes_coord3);
|
||||||
|
|
||||||
|
if (m_jcp.box_encode_type == NMSBoxEncodeType::CORNER) {
|
||||||
|
// box format: y1, x1, y2, x2
|
||||||
|
uni_vminps(vmm_temp1, vmm_boxes_coord0, vmm_boxes_coord2);
|
||||||
|
uni_vmaxps(vmm_temp2, vmm_boxes_coord0, vmm_boxes_coord2);
|
||||||
|
uni_vmovups(vmm_boxes_coord0, vmm_temp1);
|
||||||
|
uni_vmovups(vmm_boxes_coord2, vmm_temp2);
|
||||||
|
|
||||||
|
uni_vminps(vmm_temp1, vmm_boxes_coord1, vmm_boxes_coord3);
|
||||||
|
uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_boxes_coord3);
|
||||||
|
uni_vmovups(vmm_boxes_coord1, vmm_temp1);
|
||||||
|
uni_vmovups(vmm_boxes_coord3, vmm_temp2);
|
||||||
|
} else {
|
||||||
|
// box format: x_center, y_center, width, height --> y1, x1, y2, x2
|
||||||
|
uni_vmulps(vmm_temp1, vmm_boxes_coord2, ptr[reg_table]); // width/2
|
||||||
|
uni_vmulps(vmm_temp2, vmm_boxes_coord3, ptr[reg_table]); // height/2
|
||||||
|
|
||||||
|
uni_vaddps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center + width/2
|
||||||
|
uni_vmovups(vmm_boxes_coord3, vmm_temp3);
|
||||||
|
|
||||||
|
uni_vaddps(vmm_temp3, vmm_boxes_coord1, vmm_temp2); // y_center + height/2
|
||||||
|
uni_vmovups(vmm_boxes_coord2, vmm_temp3);
|
||||||
|
|
||||||
|
uni_vsubps(vmm_temp3, vmm_boxes_coord0, vmm_temp1); // x_center - width/2
|
||||||
|
uni_vsubps(vmm_temp4, vmm_boxes_coord1, vmm_temp2); // y_center - height/2
|
||||||
|
|
||||||
|
uni_vmovups(vmm_boxes_coord1, vmm_temp3);
|
||||||
|
uni_vmovups(vmm_boxes_coord0, vmm_temp4);
|
||||||
|
}
|
||||||
|
|
||||||
|
uni_vsubps(vmm_temp1, vmm_boxes_coord2, vmm_boxes_coord0);
|
||||||
|
uni_vsubps(vmm_temp2, vmm_boxes_coord3, vmm_boxes_coord1);
|
||||||
|
uni_vmulps(vmm_temp1, vmm_temp1, vmm_temp2); // boxes area
|
||||||
|
|
||||||
|
uni_vsubps(vmm_temp2, vmm_candidate_coord2, vmm_candidate_coord0);
|
||||||
|
uni_vsubps(vmm_temp3, vmm_candidate_coord3, vmm_candidate_coord1);
|
||||||
|
uni_vmulps(vmm_temp2, vmm_temp2, vmm_temp3); // candidate(bc) area // candidate area calculate once and check if 0
|
||||||
|
|
||||||
|
uni_vaddps(vmm_temp1, vmm_temp1, vmm_temp2); // areaI + areaJ to free vmm_temp2
|
||||||
|
|
||||||
|
// y of intersection
|
||||||
|
uni_vminps(vmm_temp3, vmm_boxes_coord2, vmm_candidate_coord2); // min(Ymax)
|
||||||
|
uni_vmaxps(vmm_temp4, vmm_boxes_coord0, vmm_candidate_coord0); // max(Ymin)
|
||||||
|
uni_vsubps(vmm_temp3, vmm_temp3, vmm_temp4); // min(Ymax) - max(Ymin)
|
||||||
|
uni_vmaxps(vmm_temp3, vmm_temp3, vmm_zero);
|
||||||
|
|
||||||
|
// x of intersection
|
||||||
|
uni_vminps(vmm_temp4, vmm_boxes_coord3, vmm_candidate_coord3); // min(Xmax)
|
||||||
|
uni_vmaxps(vmm_temp2, vmm_boxes_coord1, vmm_candidate_coord1); // max(Xmin)
|
||||||
|
uni_vsubps(vmm_temp4, vmm_temp4, vmm_temp2); // min(Xmax) - max(Xmin)
|
||||||
|
uni_vmaxps(vmm_temp4, vmm_temp4, vmm_zero);
|
||||||
|
|
||||||
|
// intersection_area
|
||||||
|
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp4);
|
||||||
|
|
||||||
|
// iou: intersection_area / (areaI + areaJ - intersection_area);
|
||||||
|
uni_vsubps(vmm_temp1, vmm_temp1, vmm_temp3);
|
||||||
|
uni_vdivps(vmm_temp3, vmm_temp3, vmm_temp1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// std::exp(scale * iou * iou)
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::soft_coeff() {
|
||||||
|
uni_vmulps(vmm_temp3, vmm_temp3, vmm_temp3);
|
||||||
|
uni_vmulps(vmm_temp3, vmm_temp3, vmm_scale);
|
||||||
|
exp_injector->compute_vector_range(vmm_temp3.getIdx(), vmm_temp3.getIdx() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
void NonMaxSuppression<isa>::horizontal_mul_xmm(const Xbyak::Xmm &xmm_weight, const Xbyak::Xmm &xmm_aux) {
|
||||||
|
uni_vmovshdup(xmm_aux, xmm_weight); // weight:1,2,3,4; aux:2,2,4,4
|
||||||
|
uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2,2*2,3*4,4*4
|
||||||
|
uni_vmovhlps(xmm_aux, xmm_aux, xmm_weight); // aux:3*4,4*4,4,4
|
||||||
|
uni_vmulps(xmm_weight, xmm_weight, xmm_aux); // weight:1*2*3*4,...
|
||||||
|
}
|
||||||
|
|
||||||
|
// horizontal mul for vmm_weight(Vmm(3)), temp1 and temp2 as aux
|
||||||
|
template <x64::cpu_isa_t isa>
|
||||||
|
inline void NonMaxSuppression<isa>::horizontal_mul() {
|
||||||
|
Xbyak::Xmm xmm_weight = Xbyak::Xmm(vmm_temp3.getIdx());
|
||||||
|
Xbyak::Xmm xmm_temp1 = Xbyak::Xmm(vmm_temp1.getIdx());
|
||||||
|
Xbyak::Xmm xmm_temp2 = Xbyak::Xmm(vmm_temp2.getIdx());
|
||||||
|
if (isa == x64::sse41) {
|
||||||
|
horizontal_mul_xmm(xmm_weight, xmm_temp1);
|
||||||
|
} else if (isa == x64::avx2) {
|
||||||
|
Xbyak::Ymm ymm_weight = Xbyak::Ymm(vmm_temp3.getIdx());
|
||||||
|
vextractf128(xmm_temp1, ymm_weight, 0);
|
||||||
|
vextractf128(xmm_temp2, ymm_weight, 1);
|
||||||
|
uni_vmulps(xmm_weight, xmm_temp1, xmm_temp2);
|
||||||
|
horizontal_mul_xmm(xmm_weight, xmm_temp1);
|
||||||
|
} else {
|
||||||
|
Xbyak::Zmm zmm_weight = Xbyak::Zmm(vmm_temp3.getIdx());
|
||||||
|
vextractf32x4(xmm_temp1, zmm_weight, 0);
|
||||||
|
vextractf32x4(xmm_temp2, zmm_weight, 1);
|
||||||
|
uni_vmulps(xmm_temp1, xmm_temp1, xmm_temp2);
|
||||||
|
vextractf32x4(xmm_temp2, zmm_weight, 2);
|
||||||
|
vextractf32x4(xmm_weight, zmm_weight, 3);
|
||||||
|
uni_vmulps(xmm_weight, xmm_weight, xmm_temp2);
|
||||||
|
uni_vmulps(xmm_weight, xmm_weight, xmm_temp1);
|
||||||
|
horizontal_mul_xmm(xmm_weight, xmm_temp1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template class NonMaxSuppression<x64::avx512_core>;
|
||||||
|
template class NonMaxSuppression<x64::avx2>;
|
||||||
|
template class NonMaxSuppression<x64::sse41>;
|
||||||
|
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
@ -0,0 +1,152 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "jit_kernel_base.hpp"
|
||||||
|
|
||||||
|
#if defined(OPENVINO_ARCH_X86_64)
|
||||||
|
#include "emitters/x64/jit_load_store_emitters.hpp"
|
||||||
|
#include "cpu/x64/injectors/jit_uni_eltwise_injector.hpp"
|
||||||
|
#endif // OPENVINO_ARCH_X86_64
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
enum class NMSBoxEncodeType {
|
||||||
|
CORNER,
|
||||||
|
CENTER
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(OPENVINO_ARCH_X86_64)
|
||||||
|
|
||||||
|
namespace kernel {
|
||||||
|
|
||||||
|
struct NmsCompileParams {
|
||||||
|
NMSBoxEncodeType box_encode_type;
|
||||||
|
bool is_soft_suppressed_by_iou;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NmsCallArgs {
|
||||||
|
const void* selected_boxes_coord[4];
|
||||||
|
size_t selected_boxes_num;
|
||||||
|
const void* candidate_box;
|
||||||
|
const void* iou_threshold;
|
||||||
|
void* candidate_status;
|
||||||
|
// for soft suppression, score *= scale * iou * iou;
|
||||||
|
const void* score_threshold;
|
||||||
|
const void* scale;
|
||||||
|
void* score;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||||
|
class NonMaxSuppression : public JitKernel<NmsCompileParams, NmsCallArgs> {
|
||||||
|
public:
|
||||||
|
DECLARE_CPU_JIT_AUX_FUNCTIONS(NonMaxSuppression)
|
||||||
|
|
||||||
|
explicit NonMaxSuppression(const NmsCompileParams& jcp) : JitKernel(jit_name(), jcp, isa) {}
|
||||||
|
|
||||||
|
void generate() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
using Vmm = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::avx512_core, Xbyak::Zmm,
|
||||||
|
isa == dnnl::impl::cpu::x64::avx2, Xbyak::Ymm,
|
||||||
|
Xbyak::Xmm>::type;
|
||||||
|
uint32_t vlen = dnnl::impl::cpu::x64::cpu_isa_traits<isa>::vlen;
|
||||||
|
const int vector_step = vlen / sizeof(float);
|
||||||
|
const int scalar_step = 1;
|
||||||
|
|
||||||
|
Xbyak::Reg64 reg_boxes_coord0 = r8;
|
||||||
|
Xbyak::Reg64 reg_boxes_coord1 = r9;
|
||||||
|
Xbyak::Reg64 reg_boxes_coord2 = r10;
|
||||||
|
Xbyak::Reg64 reg_boxes_coord3 = r11;
|
||||||
|
Xbyak::Reg64 reg_candidate_box = r12;
|
||||||
|
Xbyak::Reg64 reg_candidate_status = r13;
|
||||||
|
Xbyak::Reg64 reg_boxes_num = r14;
|
||||||
|
Xbyak::Reg64 reg_iou_threshold = r15;
|
||||||
|
// more for soft
|
||||||
|
Xbyak::Reg64 reg_score_threshold = rdx;
|
||||||
|
Xbyak::Reg64 reg_score = rbp;
|
||||||
|
Xbyak::Reg64 reg_scale = rsi;
|
||||||
|
|
||||||
|
Xbyak::Reg64 reg_load_table = rax;
|
||||||
|
Xbyak::Reg64 reg_load_store_mask = rbx;
|
||||||
|
|
||||||
|
// reuse
|
||||||
|
Xbyak::Label l_table_constant;
|
||||||
|
Xbyak::Reg64 reg_table = rcx;
|
||||||
|
Xbyak::Reg64 reg_temp_64 = rdi;
|
||||||
|
Xbyak::Reg32 reg_temp_32 = edi;
|
||||||
|
|
||||||
|
const Xbyak::Reg64 reg_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]);
|
||||||
|
|
||||||
|
std::unique_ptr<jit_load_emitter> load_vector_emitter = nullptr;
|
||||||
|
std::unique_ptr<jit_load_emitter> load_scalar_emitter = nullptr;
|
||||||
|
|
||||||
|
std::vector<size_t> store_pool_gpr_idxs;
|
||||||
|
std::vector<size_t> store_pool_vec_idxs;
|
||||||
|
std::vector<size_t> load_pool_gpr_idxs;
|
||||||
|
|
||||||
|
Vmm vmm_boxes_coord0 = Vmm(1);
|
||||||
|
Vmm vmm_boxes_coord1 = Vmm(2);
|
||||||
|
Vmm vmm_boxes_coord2 = Vmm(3);
|
||||||
|
Vmm vmm_boxes_coord3 = Vmm(4);
|
||||||
|
Vmm vmm_candidate_coord0 = Vmm(5);
|
||||||
|
Vmm vmm_candidate_coord1 = Vmm(6);
|
||||||
|
Vmm vmm_candidate_coord2 = Vmm(7);
|
||||||
|
Vmm vmm_candidate_coord3 = Vmm(8);
|
||||||
|
Vmm vmm_temp1 = Vmm(9);
|
||||||
|
Vmm vmm_temp2 = Vmm(10);
|
||||||
|
Vmm vmm_temp3 = Vmm(11);
|
||||||
|
Vmm vmm_temp4 = Vmm(12);
|
||||||
|
|
||||||
|
Vmm vmm_iou_threshold = Vmm(13);
|
||||||
|
Vmm vmm_zero = Vmm(15);
|
||||||
|
|
||||||
|
// soft
|
||||||
|
Vmm vmm_score_threshold = Vmm(14);
|
||||||
|
Vmm vmm_scale = Vmm(0);
|
||||||
|
|
||||||
|
Xbyak::Opmask k_mask = Xbyak::Opmask(7);
|
||||||
|
Xbyak::Opmask k_mask_one = Xbyak::Opmask(6);
|
||||||
|
|
||||||
|
std::shared_ptr<dnnl::impl::cpu::x64::jit_uni_eltwise_injector_f32<isa>> exp_injector;
|
||||||
|
|
||||||
|
inline void hard_nms();
|
||||||
|
|
||||||
|
inline void soft_nms();
|
||||||
|
|
||||||
|
inline void suppressed_by_iou(bool is_scalar);
|
||||||
|
|
||||||
|
inline void suppressed_by_score();
|
||||||
|
|
||||||
|
inline void iou(int ele_num);
|
||||||
|
|
||||||
|
inline void soft_coeff();
|
||||||
|
|
||||||
|
inline void horizontal_mul_xmm(const Xbyak::Xmm& xmm_weight, const Xbyak::Xmm& xmm_aux);
|
||||||
|
|
||||||
|
inline void horizontal_mul();
|
||||||
|
|
||||||
|
inline void prepare_table() {
|
||||||
|
auto broadcast_d = [&](int val) {
|
||||||
|
for (size_t d = 0; d < vlen / sizeof(int); ++d) {
|
||||||
|
dd(val);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
align(64);
|
||||||
|
L(l_table_constant);
|
||||||
|
broadcast_d(0x3f000000); // 0.5f
|
||||||
|
dw(0x0001);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace kernel
|
||||||
|
|
||||||
|
#endif // OPENVINO_ARCH_X86_64
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
File diff suppressed because it is too large
Load Diff
@ -4,82 +4,43 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ie_common.h>
|
#include "node.h"
|
||||||
#include <node.h>
|
#include "kernels/x64/non_max_suppression.hpp"
|
||||||
#include <string>
|
|
||||||
#include <memory>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#define BOX_COORD_NUM 4
|
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_cpu {
|
namespace intel_cpu {
|
||||||
namespace node {
|
namespace node {
|
||||||
|
|
||||||
enum class NMSBoxEncodeType {
|
|
||||||
CORNER,
|
|
||||||
CENTER
|
|
||||||
};
|
|
||||||
|
|
||||||
enum NMSCandidateStatus {
|
enum NMSCandidateStatus {
|
||||||
SUPPRESSED = 0,
|
SUPPRESSED = 0,
|
||||||
SELECTED = 1,
|
SELECTED = 1,
|
||||||
UPDATED = 2
|
UPDATED = 2
|
||||||
};
|
};
|
||||||
|
|
||||||
struct jit_nms_config_params {
|
|
||||||
NMSBoxEncodeType box_encode_type;
|
|
||||||
bool is_soft_suppressed_by_iou;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct jit_nms_args {
|
|
||||||
const void* selected_boxes_coord[BOX_COORD_NUM];
|
|
||||||
size_t selected_boxes_num;
|
|
||||||
const void* candidate_box;
|
|
||||||
const void* iou_threshold;
|
|
||||||
void* candidate_status;
|
|
||||||
// for soft suppression, score *= scale * iou * iou;
|
|
||||||
const void* score_threshold;
|
|
||||||
const void* scale;
|
|
||||||
void* score;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct jit_uni_nms_kernel {
|
|
||||||
void (*ker_)(const jit_nms_args *);
|
|
||||||
|
|
||||||
void operator()(const jit_nms_args *args) {
|
|
||||||
assert(ker_);
|
|
||||||
ker_(args);
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit jit_uni_nms_kernel(jit_nms_config_params jcp_) : ker_(nullptr), jcp(jcp_) {}
|
|
||||||
virtual ~jit_uni_nms_kernel() {}
|
|
||||||
|
|
||||||
virtual void create_ker() = 0;
|
|
||||||
|
|
||||||
jit_nms_config_params jcp;
|
|
||||||
};
|
|
||||||
|
|
||||||
class NonMaxSuppression : public Node {
|
class NonMaxSuppression : public Node {
|
||||||
public:
|
public:
|
||||||
NonMaxSuppression(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
|
NonMaxSuppression(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr& context);
|
||||||
|
|
||||||
void getSupportedDescriptors() override {};
|
void getSupportedDescriptors() override {};
|
||||||
|
|
||||||
void initSupportedPrimitiveDescriptors() override;
|
void initSupportedPrimitiveDescriptors() override;
|
||||||
|
|
||||||
void execute(dnnl::stream strm) override;
|
void execute(dnnl::stream strm) override;
|
||||||
|
|
||||||
|
void executeDynamicImpl(dnnl::stream strm) override;
|
||||||
|
|
||||||
bool created() const override;
|
bool created() const override;
|
||||||
|
|
||||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
|
||||||
|
|
||||||
struct filteredBoxes {
|
struct FilteredBox {
|
||||||
float score;
|
float score;
|
||||||
int batch_index;
|
int batch_index;
|
||||||
int class_index;
|
int class_index;
|
||||||
int box_index;
|
int box_index;
|
||||||
filteredBoxes() = default;
|
FilteredBox() = default;
|
||||||
filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) :
|
FilteredBox(float _score, int _batch_index, int _class_index, int _box_index) :
|
||||||
score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {}
|
score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -89,66 +50,101 @@ public:
|
|||||||
int suppress_begin_index;
|
int suppress_begin_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
float intersectionOverUnion(const float *boxesI, const float *boxesJ);
|
|
||||||
|
|
||||||
void nmsWithSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides,
|
|
||||||
const SizeVector &scoresStrides, std::vector<filteredBoxes> &filtBoxes);
|
|
||||||
|
|
||||||
void nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides,
|
|
||||||
const SizeVector &scoresStrides, std::vector<filteredBoxes> &filtBoxes);
|
|
||||||
|
|
||||||
void executeDynamicImpl(dnnl::stream strm) override;
|
|
||||||
|
|
||||||
bool isExecutable() const override;
|
bool isExecutable() const override;
|
||||||
|
|
||||||
bool needShapeInfer() const override { return false; }
|
bool needShapeInfer() const override { return false; }
|
||||||
|
|
||||||
void prepareParams() override;
|
void prepareParams() override;
|
||||||
|
|
||||||
|
struct Point2D {
|
||||||
|
float x, y;
|
||||||
|
Point2D(const float px = 0.f, const float py = 0.f) : x(px), y(py) {}
|
||||||
|
Point2D operator+(const Point2D& p) const {
|
||||||
|
return Point2D(x + p.x, y + p.y);
|
||||||
|
}
|
||||||
|
Point2D& operator+=(const Point2D& p) {
|
||||||
|
x += p.x;
|
||||||
|
y += p.y;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
Point2D operator-(const Point2D& p) const {
|
||||||
|
return Point2D(x - p.x, y - p.y);
|
||||||
|
}
|
||||||
|
Point2D operator*(const float coeff) const {
|
||||||
|
return Point2D(x * coeff, y * coeff);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// input
|
// input
|
||||||
enum {
|
enum {
|
||||||
NMS_BOXES,
|
NMS_BOXES,
|
||||||
NMS_SCORES,
|
NMS_SCORES,
|
||||||
NMS_MAXOUTPUTBOXESPERCLASS,
|
NMS_MAX_OUTPUT_BOXES_PER_CLASS,
|
||||||
NMS_IOUTHRESHOLD,
|
NMS_IOU_THRESHOLD,
|
||||||
NMS_SCORETHRESHOLD,
|
NMS_SCORE_THRESHOLD,
|
||||||
NMS_SOFTNMSSIGMA,
|
NMS_SOFT_NMS_SIGMA,
|
||||||
};
|
};
|
||||||
|
|
||||||
// output
|
// output
|
||||||
enum {
|
enum {
|
||||||
NMS_SELECTEDINDICES,
|
NMS_SELECTED_INDICES,
|
||||||
NMS_SELECTEDSCORES,
|
NMS_SELECTED_SCORES,
|
||||||
NMS_VALIDOUTPUTS
|
NMS_VALID_OUTPUTS
|
||||||
};
|
};
|
||||||
|
|
||||||
NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER;
|
float intersectionOverUnion(const float *boxesI, const float *boxesJ);
|
||||||
bool sortResultDescending = true;
|
|
||||||
|
|
||||||
size_t numBatches = 0;
|
float rotatedIntersectionOverUnion(const Point2D (&vertices_0)[4], const float area_0, const float* box_1);
|
||||||
size_t numBoxes = 0;
|
|
||||||
size_t numClasses = 0;
|
|
||||||
|
|
||||||
size_t maxOutputBoxesPerClass = 0lu;
|
void nmsWithSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
|
||||||
float iouThreshold = 0.0f;
|
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
|
||||||
float scoreThreshold = 0.0f;
|
|
||||||
float softNMSSigma = 0.0f;
|
|
||||||
float scale = 1.f;
|
|
||||||
// control placeholder for NMS in new opset.
|
|
||||||
bool isSoftSuppressedByIOU = false;
|
|
||||||
|
|
||||||
bool m_outStaticShape = false;
|
void nmsWithoutSoftSigma(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
|
||||||
|
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
|
||||||
|
|
||||||
std::string errorPrefix;
|
void nmsRotated(const float *boxes, const float *scores, const InferenceEngine::SizeVector &boxesStrides,
|
||||||
|
const InferenceEngine::SizeVector &scoresStrides, std::vector<FilteredBox> &filtBoxes);
|
||||||
|
|
||||||
std::vector<std::vector<size_t>> numFiltBox;
|
void check1DInput(const Shape& shape,
|
||||||
const std::string inType = "input", outType = "output";
|
const std::string& name,
|
||||||
|
const size_t port);
|
||||||
|
|
||||||
void checkPrecision(const Precision& prec, const std::vector<Precision>& precList, const std::string& name, const std::string& type);
|
void checkOutput(const Shape& shape,
|
||||||
void check1DInput(const Shape& shape, const std::vector<Precision>& precList, const std::string& name, const size_t port);
|
const std::string& name,
|
||||||
void checkOutput(const Shape& shape, const std::vector<Precision>& precList, const std::string& name, const size_t port);
|
const size_t port);
|
||||||
|
|
||||||
void createJitKernel();
|
void createJitKernel();
|
||||||
std::shared_ptr<jit_uni_nms_kernel> nms_kernel = nullptr;
|
|
||||||
|
|
||||||
|
NMSBoxEncodeType boxEncodingType = NMSBoxEncodeType::CORNER;
|
||||||
|
bool m_sort_result_descending = true;
|
||||||
|
bool m_clockwise = false;
|
||||||
|
bool m_rotated_boxes = false;
|
||||||
|
size_t m_coord_num = 1lu;
|
||||||
|
|
||||||
|
size_t m_batches_num = 0lu;
|
||||||
|
size_t m_boxes_num = 0lu;
|
||||||
|
size_t m_classes_num = 0lu;
|
||||||
|
|
||||||
|
size_t m_max_output_boxes_per_class = 0lu; // Original value of input NMS_MAX_OUTPUT_BOXES_PER_CLASS
|
||||||
|
size_t m_output_boxes_per_class = 0lu; // Actual number of output boxes
|
||||||
|
float m_iou_threshold = 0.f;
|
||||||
|
float m_score_threshold = 0.f;
|
||||||
|
float m_soft_nms_sigma = 0.f;
|
||||||
|
float m_scale = 0.f;
|
||||||
|
// control placeholder for NMS in new opset.
|
||||||
|
bool m_is_soft_suppressed_by_iou = false;
|
||||||
|
|
||||||
|
bool m_out_static_shape = false;
|
||||||
|
|
||||||
|
std::vector<std::vector<size_t>> m_num_filtered_boxes;
|
||||||
|
const std::string inType = "input";
|
||||||
|
const std::string outType = "output";
|
||||||
|
bool m_defined_outputs[NMS_VALID_OUTPUTS + 1] = { false, false, false };
|
||||||
|
std::vector<FilteredBox> m_filtered_boxes;
|
||||||
|
|
||||||
|
std::shared_ptr<kernel::JitKernelBase> m_jit_kernel;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace node
|
} // namespace node
|
||||||
|
@ -197,6 +197,8 @@ std::vector<std::string> disabledTestPatterns() {
|
|||||||
R"(.*RDFTLayerTest.*SignalSize=().*)",
|
R"(.*RDFTLayerTest.*SignalSize=().*)",
|
||||||
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
|
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
|
||||||
R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)",
|
R"(.*smoke_Snippets_MHA_.?D_SplitDimensionM.*)",
|
||||||
|
// Issue: 122356
|
||||||
|
R"(.*NmsRotatedOpTest.*(SortDesc=True|Clockwise=False).*)",
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(OPENVINO_ARCH_X86)
|
#if defined(OPENVINO_ARCH_X86)
|
||||||
|
@ -0,0 +1,95 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "single_op_tests/nms_rotated.hpp"
|
||||||
|
|
||||||
|
using namespace LayerTestsDefinitions;
|
||||||
|
using namespace ov::test;
|
||||||
|
|
||||||
|
|
||||||
|
static const std::vector<std::vector<InputShape>> input_shapes = {
|
||||||
|
{
|
||||||
|
{ {}, {{1, 5, 5}} },
|
||||||
|
{ {}, {{1, 7, 5}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {}, {{2, 9, 5}} },
|
||||||
|
{ {}, {{2, 15, 9}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {}, {{5, 17, 5}} },
|
||||||
|
{ {}, {{5, 7, 17}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {}, {{9, 75, 5}} },
|
||||||
|
{ {}, {{9, 55, 75}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {-1, -1, 5}, {{5, 20, 5}, {3, 50, 5}, {2, 99, 5}} },
|
||||||
|
{ {-1, -1, -1}, {{5, 30, 20}, {3, 100, 50}, {2, 133, 99}} }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::vector<InputShape>> input_shapes_nightly = {
|
||||||
|
{
|
||||||
|
{ {}, {{3, 11, 5}} },
|
||||||
|
{ {}, {{3, 15, 11}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {}, {{15, 29, 5}} },
|
||||||
|
{ {}, {{15, 31, 29}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {}, {{21, 64, 5}} },
|
||||||
|
{ {}, {{21, 32, 64}} }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ {-1, -1, 5}, {{7, 35, 5}, {7, 35, 5}, {7, 35, 5}} },
|
||||||
|
{ {-1, -1, -1}, {{7, 30, 35}, {7, 100, 35}, {7, 133, 35}} }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const ov::AnyMap empty_plugin_config{};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_, NmsRotatedOpTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(input_shapes), // Input shapes
|
||||||
|
::testing::Values(ElementType::f32), // Boxes and scores input precisions
|
||||||
|
::testing::Values(ElementType::i32), // Max output boxes input precisions
|
||||||
|
::testing::Values(ElementType::f32), // Thresholds precisions
|
||||||
|
::testing::Values(ElementType::i32), // Output type
|
||||||
|
::testing::Values(5, 20), // Max output boxes per class
|
||||||
|
::testing::Values(0.3f, 0.7f), // IOU threshold
|
||||||
|
::testing::Values(0.3f, 0.7f), // Score threshold
|
||||||
|
::testing::Values(true, false), // Sort result descending
|
||||||
|
::testing::Values(true, false), // Clockwise
|
||||||
|
::testing::Values(false), // Is 1st input constant
|
||||||
|
::testing::Values(false), // Is 2nd input constant
|
||||||
|
::testing::Values(false), // Is 3rd input constant
|
||||||
|
::testing::Values(false), // Is 4th input constant
|
||||||
|
::testing::Values(false), // Is 5th input constant
|
||||||
|
::testing::Values(empty_plugin_config), // Additional plugin configuration
|
||||||
|
::testing::Values(utils::DEVICE_CPU)), // Device name
|
||||||
|
NmsRotatedOpTest::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(nightly_, NmsRotatedOpTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(input_shapes_nightly),
|
||||||
|
::testing::Values(ElementType::f16, ElementType::bf16),
|
||||||
|
::testing::Values(ElementType::i64),
|
||||||
|
::testing::Values(ElementType::f16, ElementType::bf16),
|
||||||
|
::testing::Values(ElementType::i64),
|
||||||
|
::testing::Values(10),
|
||||||
|
::testing::Values(0.5f),
|
||||||
|
::testing::Values(0.4f),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(true, false),
|
||||||
|
::testing::Values(empty_plugin_config),
|
||||||
|
::testing::Values(utils::DEVICE_CPU)),
|
||||||
|
NmsRotatedOpTest::getTestCaseName);
|
@ -43,9 +43,9 @@ using NmsParams = std::tuple<InputShapeParams,
|
|||||||
int32_t, // Max output boxes per class
|
int32_t, // Max output boxes per class
|
||||||
ThresholdValues, // IOU, Score, Soft NMS sigma
|
ThresholdValues, // IOU, Score, Soft NMS sigma
|
||||||
ngraph::helpers::InputLayerType, // max_output_boxes_per_class input type
|
ngraph::helpers::InputLayerType, // max_output_boxes_per_class input type
|
||||||
ngraph::op::v9::NonMaxSuppression::BoxEncodingType, // Box encoding
|
ov::op::v9::NonMaxSuppression::BoxEncodingType, // Box encoding
|
||||||
bool, // Sort result descending
|
bool, // Sort result descending
|
||||||
ngraph::element::Type, // Output type
|
ElementType, // Output type
|
||||||
std::string>; // Device name
|
std::string>; // Device name
|
||||||
|
|
||||||
class NmsLayerCPUTest : public testing::WithParamInterface<NmsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
|
class NmsLayerCPUTest : public testing::WithParamInterface<NmsParams>, virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||||
@ -57,9 +57,9 @@ public:
|
|||||||
ngraph::helpers::InputLayerType maxOutBoxesType;
|
ngraph::helpers::InputLayerType maxOutBoxesType;
|
||||||
ThresholdValues thrValues;
|
ThresholdValues thrValues;
|
||||||
float iouThr, scoreThr, softNmsSigma;
|
float iouThr, scoreThr, softNmsSigma;
|
||||||
op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
|
ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
|
||||||
bool sortResDescend;
|
bool sortResDescend;
|
||||||
element::Type outType;
|
ElementType outType;
|
||||||
std::string targetDevice;
|
std::string targetDevice;
|
||||||
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType, targetDevice) = obj.param;
|
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType, targetDevice) = obj.param;
|
||||||
|
|
||||||
@ -115,12 +115,12 @@ protected:
|
|||||||
ThresholdValues thrValues;
|
ThresholdValues thrValues;
|
||||||
ngraph::helpers::InputLayerType maxOutBoxesType;
|
ngraph::helpers::InputLayerType maxOutBoxesType;
|
||||||
float iouThr, scoreThr, softNmsSigma;
|
float iouThr, scoreThr, softNmsSigma;
|
||||||
op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
|
ov::op::v9::NonMaxSuppression::BoxEncodingType boxEncoding;
|
||||||
bool sortResDescend;
|
bool sortResDescend;
|
||||||
element::Type outType;
|
ElementType outType;
|
||||||
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType,
|
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, thrValues, maxOutBoxesType, boxEncoding, sortResDescend, outType,
|
||||||
targetDevice) = this->GetParam();
|
targetDevice) = this->GetParam();
|
||||||
element::Type paramsPrec, maxBoxPrec, thrPrec;
|
ElementType paramsPrec, maxBoxPrec, thrPrec;
|
||||||
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
|
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
|
||||||
|
|
||||||
std::tie(iouThr, scoreThr, softNmsSigma) = thrValues;
|
std::tie(iouThr, scoreThr, softNmsSigma) = thrValues;
|
||||||
@ -156,7 +156,7 @@ protected:
|
|||||||
|
|
||||||
if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) {
|
if (maxOutBoxesType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||||
inputDynamicShapes.push_back(ngraph::PartialShape{1});
|
inputDynamicShapes.push_back(ngraph::PartialShape{1});
|
||||||
params.push_back(std::make_shared<ngraph::opset1::Parameter>(element::Type_t::i32, inputDynamicShapes.back()));
|
params.push_back(std::make_shared<ngraph::opset1::Parameter>(ElementType::i32, inputDynamicShapes.back()));
|
||||||
params[1]->set_friendly_name("param_3");
|
params[1]->set_friendly_name("param_3");
|
||||||
maxOutBoxesPerClassNode = params.back();
|
maxOutBoxesPerClassNode = params.back();
|
||||||
} else {
|
} else {
|
||||||
@ -166,7 +166,7 @@ protected:
|
|||||||
auto iouThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{iouThr})->output(0);
|
auto iouThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{iouThr})->output(0);
|
||||||
auto scoreThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{scoreThr})->output(0);
|
auto scoreThrNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{scoreThr})->output(0);
|
||||||
auto softNmsSigmaNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{softNmsSigma})->output(0);
|
auto softNmsSigmaNode = builder::makeConstant(thrPrec, ngraph::Shape{}, std::vector<float>{softNmsSigma})->output(0);
|
||||||
auto nms = std::make_shared<ngraph::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
|
auto nms = std::make_shared<ov::op::v9::NonMaxSuppression>(params[0], params[1], maxOutBoxesPerClassNode, iouThrNode, scoreThrNode,
|
||||||
softNmsSigmaNode, boxEncoding, sortResDescend, outType);
|
softNmsSigmaNode, boxEncoding, sortResDescend, outType);
|
||||||
|
|
||||||
function = makeNgraphFunction(paramsPrec, params, nms, "NMS");
|
function = makeNgraphFunction(paramsPrec, params, nms, "NMS");
|
||||||
@ -276,7 +276,7 @@ private:
|
|||||||
|
|
||||||
expectedList.resize(selected_indices_size);
|
expectedList.resize(selected_indices_size);
|
||||||
|
|
||||||
if (indeces_iter->get_element_type() == ov::element::i32) {
|
if (indeces_iter->get_element_type() == ElementType::i32) {
|
||||||
auto selected_indices_data = indeces_iter->data<int32_t>();
|
auto selected_indices_data = indeces_iter->data<int32_t>();
|
||||||
|
|
||||||
for (size_t i = 0; i < selected_indices_size; i += 3) {
|
for (size_t i = 0; i < selected_indices_size; i += 3) {
|
||||||
@ -296,7 +296,7 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scores_iter->get_element_type() == ov::element::f32) {
|
if (scores_iter->get_element_type() == ElementType::f32) {
|
||||||
auto selected_scores_data = scores_iter->data<float>();
|
auto selected_scores_data = scores_iter->data<float>();
|
||||||
for (size_t i = 0; i < selected_scores_size; i += 3) {
|
for (size_t i = 0; i < selected_scores_size; i += 3) {
|
||||||
expectedList[i/3].score = selected_scores_data[i+2];
|
expectedList[i/3].score = selected_scores_data[i+2];
|
||||||
@ -319,7 +319,7 @@ private:
|
|||||||
size_t selected_indices_size = indeces_iter->get_size();
|
size_t selected_indices_size = indeces_iter->get_size();
|
||||||
const auto selected_scores_data = scores_iter->data<float>();
|
const auto selected_scores_data = scores_iter->data<float>();
|
||||||
|
|
||||||
if (indeces_iter->get_element_type() == ov::element::i32) {
|
if (indeces_iter->get_element_type() == ElementType::i32) {
|
||||||
const auto selected_indices_data = indeces_iter->data<int32_t>();
|
const auto selected_indices_data = indeces_iter->data<int32_t>();
|
||||||
for (size_t i = 0; i < selected_indices_size; i += 3) {
|
for (size_t i = 0; i < selected_indices_size; i += 3) {
|
||||||
const int32_t batchId = selected_indices_data[i+0];
|
const int32_t batchId = selected_indices_data[i+0];
|
||||||
@ -415,10 +415,10 @@ const std::vector<InputShapeParams> inShapeParams = {
|
|||||||
const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
|
const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
|
||||||
const std::vector<float> threshold = {0.3f, 0.7f};
|
const std::vector<float> threshold = {0.3f, 0.7f};
|
||||||
const std::vector<float> sigmaThreshold = {0.0f, 0.5f};
|
const std::vector<float> sigmaThreshold = {0.0f, 0.5f};
|
||||||
const std::vector<op::v9::NonMaxSuppression::BoxEncodingType> encodType = {op::v9::NonMaxSuppression::BoxEncodingType::CENTER,
|
const std::vector<ov::op::v9::NonMaxSuppression::BoxEncodingType> encodType = {ov::op::v9::NonMaxSuppression::BoxEncodingType::CENTER,
|
||||||
op::v9::NonMaxSuppression::BoxEncodingType::CORNER};
|
ov::op::v9::NonMaxSuppression::BoxEncodingType::CORNER};
|
||||||
const std::vector<bool> sortResDesc = {true, false};
|
const std::vector<bool> sortResDesc = {true, false};
|
||||||
const std::vector<element::Type> outType = {element::i32, element::i64};
|
const std::vector<ElementType> outType = {ElementType::i32, ElementType::i64};
|
||||||
const std::vector<ngraph::helpers::InputLayerType> maxBoxInputTypes = {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT};
|
const std::vector<ngraph::helpers::InputLayerType> maxBoxInputTypes = {ngraph::helpers::InputLayerType::PARAMETER, ngraph::helpers::InputLayerType::CONSTANT};
|
||||||
|
|
||||||
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
|
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
|
||||||
|
@ -0,0 +1,15 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "shared_test_classes/single_op/nms_rotated.hpp"
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
TEST_P(NmsRotatedOpTest, CompareWithRefs) {
|
||||||
|
run();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,47 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
std::vector<ov::test::InputShape>, // Input shapes
|
||||||
|
ov::test::ElementType, // Boxes and scores input precisions
|
||||||
|
ov::test::ElementType, // Max output boxes input precisions
|
||||||
|
ov::test::ElementType, // Thresholds precisions
|
||||||
|
ov::test::ElementType, // Output type
|
||||||
|
int64_t, // Max output boxes per class
|
||||||
|
float, // IOU threshold
|
||||||
|
float, // Score threshold
|
||||||
|
bool, // Sort result descending
|
||||||
|
bool, // Clockwise
|
||||||
|
bool, // Is 1st input constant
|
||||||
|
bool, // Is 2nd input constant
|
||||||
|
bool, // Is 3rd input constant
|
||||||
|
bool, // Is 4th input constant
|
||||||
|
bool, // Is 5th input constant
|
||||||
|
ov::AnyMap, // Additional configuration
|
||||||
|
std::string // Device name
|
||||||
|
> NmsRotatedParams;
|
||||||
|
|
||||||
|
class NmsRotatedOpTest : public testing::WithParamInterface<NmsRotatedParams>,
|
||||||
|
public ov::test::SubgraphBaseTest {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override;
|
||||||
|
|
||||||
|
void generate_inputs(const std::vector<ov::Shape>& target_shapes) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int64_t m_max_out_boxes_per_class;
|
||||||
|
float m_iou_threshold;
|
||||||
|
float m_score_threshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,207 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "shared_test_classes/single_op/nms_rotated.hpp"
|
||||||
|
#include "ov_models/builders.hpp"
|
||||||
|
#include "common_test_utils/data_utils.hpp"
|
||||||
|
#include "openvino/op/nms_rotated.hpp"
|
||||||
|
|
||||||
|
using namespace ov::test;
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
std::string NmsRotatedOpTest::getTestCaseName(const testing::TestParamInfo<NmsRotatedParams>& obj) {
|
||||||
|
const auto& in_shapes = std::get<0>(obj.param);
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << "IS=(";
|
||||||
|
for (size_t i = 0lu; i < in_shapes.size(); i++) {
|
||||||
|
result << utils::partialShape2str({in_shapes[i].first}) << (i < in_shapes.size() - 1lu ? "_" : "");
|
||||||
|
}
|
||||||
|
result << ")_TS=";
|
||||||
|
for (size_t i = 0lu; i < in_shapes.front().second.size(); i++) {
|
||||||
|
result << "{";
|
||||||
|
for (size_t j = 0lu; j < in_shapes.size(); j++) {
|
||||||
|
result << utils::vec2str(in_shapes[j].second[i]) << (j < in_shapes.size() - 1lu ? "_" : "");
|
||||||
|
}
|
||||||
|
result << "}_";
|
||||||
|
}
|
||||||
|
result << "_BoxPrc=" << std::get<1>(obj.param);
|
||||||
|
result << "_MaxPrc=" << std::get<2>(obj.param);
|
||||||
|
result << "_ThrPrc=" << std::get<3>(obj.param);
|
||||||
|
result << "_OutPrc=" << std::get<4>(obj.param);
|
||||||
|
result << "_MaxBox=" << std::get<5>(obj.param);
|
||||||
|
result << "_IouThr=" << std::get<6>(obj.param);
|
||||||
|
result << "_ScoreThr=" << std::get<7>(obj.param);
|
||||||
|
result << "_SortDesc=" << utils::bool2str(std::get<8>(obj.param));
|
||||||
|
result << "_Clockwise=" << utils::bool2str(std::get<9>(obj.param));
|
||||||
|
result << "_ConstIn={" << utils::bool2str(std::get<10>(obj.param)) << ","
|
||||||
|
<< utils::bool2str(std::get<11>(obj.param)) << ","
|
||||||
|
<< utils::bool2str(std::get<12>(obj.param)) << ","
|
||||||
|
<< utils::bool2str(std::get<13>(obj.param)) << ","
|
||||||
|
<< utils::bool2str(std::get<14>(obj.param)) << "}";
|
||||||
|
|
||||||
|
const auto& config = std::get<15>(obj.param);
|
||||||
|
if (!config.empty()) {
|
||||||
|
result << "_Config={";
|
||||||
|
for (const auto& conf_item : config) {
|
||||||
|
result << "_" << conf_item.first << "=";
|
||||||
|
conf_item.second.print(result);
|
||||||
|
}
|
||||||
|
result << "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
result << "_Device=" << std::get<16>(obj.param);
|
||||||
|
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void NmsRotatedOpTest::SetUp() {
|
||||||
|
const auto& params = this->GetParam();
|
||||||
|
const auto& in_shapes = std::get<0>(params);
|
||||||
|
const auto& boxes_prc = std::get<1>(params);
|
||||||
|
const auto& max_boxes_prc = std::get<2>(params);
|
||||||
|
const auto& thresholds_prc = std::get<3>(params);
|
||||||
|
const auto& out_prc = std::get<4>(params);
|
||||||
|
m_max_out_boxes_per_class = std::get<5>(params);
|
||||||
|
m_iou_threshold = std::get<6>(params);
|
||||||
|
m_score_threshold = std::get<7>(params);
|
||||||
|
const auto& sort_descending = std::get<8>(params);
|
||||||
|
const auto& clockwise = std::get<9>(params);
|
||||||
|
const auto& is_0_in_const = std::get<10>(params);
|
||||||
|
const auto& is_1_in_const = std::get<11>(params);
|
||||||
|
const auto& is_2_in_const = std::get<12>(params);
|
||||||
|
const auto& is_3_in_const = std::get<13>(params);
|
||||||
|
const auto& is_4_in_const = std::get<14>(params);
|
||||||
|
configuration = std::get<15>(params);
|
||||||
|
targetDevice = std::get<16>(params);
|
||||||
|
|
||||||
|
std::vector<InputShape> actual_shapes;
|
||||||
|
ov::ParameterVector in_params;
|
||||||
|
std::vector<std::shared_ptr<ov::Node>> inputs;
|
||||||
|
const auto in_shape_1d = InputShape{{1}, {{1}}};
|
||||||
|
|
||||||
|
#define CONST_CASE(P, S, H, L) \
|
||||||
|
case P: \
|
||||||
|
inputs.push_back(ngraph::builder::makeConstant(P, S, std::vector<ov::element_type_traits<P>::value_type>{}, true, \
|
||||||
|
ov::element_type_traits<P>::value_type(H), ov::element_type_traits<P>::value_type(L))); \
|
||||||
|
break;
|
||||||
|
|
||||||
|
#define CREATE_INPUT(C, P, S, N, H, L) \
|
||||||
|
if (C) { \
|
||||||
|
switch (P) { \
|
||||||
|
CONST_CASE(ElementType::f32, S.second[0], H, L) \
|
||||||
|
CONST_CASE(ElementType::f16, S.second[0], H, L) \
|
||||||
|
CONST_CASE(ElementType::bf16, S.second[0], H, L) \
|
||||||
|
CONST_CASE(ElementType::i32, S.second[0], H, L) \
|
||||||
|
CONST_CASE(ElementType::i64, S.second[0], H, L) \
|
||||||
|
default: OPENVINO_THROW("NmsRotated does not support precision ", P, " for the ", N, " input."); \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
actual_shapes.push_back(S); \
|
||||||
|
if (S.first.rank() == 0) { \
|
||||||
|
in_params.push_back(std::make_shared<ov::op::v0::Parameter>(P, S.second.front())); \
|
||||||
|
} else { \
|
||||||
|
in_params.push_back(std::make_shared<ov::op::v0::Parameter>(P, S.first)); \
|
||||||
|
} \
|
||||||
|
in_params.back()->set_friendly_name(N); \
|
||||||
|
inputs.push_back(in_params.back()); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CREATE_INPUT(is_0_in_const, boxes_prc, in_shapes[0], "Boxes", 30, 10)
|
||||||
|
CREATE_INPUT(is_1_in_const, boxes_prc, in_shapes[1], "Scores", 1, 0)
|
||||||
|
CREATE_INPUT(is_2_in_const, max_boxes_prc, in_shape_1d, "MaxOutputBoxesPerClass", m_max_out_boxes_per_class, m_max_out_boxes_per_class)
|
||||||
|
CREATE_INPUT(is_3_in_const, thresholds_prc, in_shape_1d, "IouThreshold", m_iou_threshold, m_iou_threshold)
|
||||||
|
CREATE_INPUT(is_4_in_const, thresholds_prc, in_shape_1d, "ScoreThreshold", m_score_threshold, m_score_threshold)
|
||||||
|
|
||||||
|
#undef CONST_CASE
|
||||||
|
#undef CREATE_INPUT
|
||||||
|
|
||||||
|
init_input_shapes(actual_shapes);
|
||||||
|
|
||||||
|
const auto nms_op = std::make_shared<ov::op::v13::NMSRotated>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4],
|
||||||
|
sort_descending, out_prc, clockwise);
|
||||||
|
ov::ResultVector results;
|
||||||
|
for (size_t i = 0lu; i < nms_op->get_output_size(); i++) {
|
||||||
|
results.push_back(std::make_shared<ov::op::v0::Result>(nms_op->output(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
function = std::make_shared<ov::Model>(results, in_params, "NMSRotated");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TD, typename TS>
|
||||||
|
void fill_data(TD* dst, const TS* src, size_t len) {
|
||||||
|
for (size_t i = 0llu; i < len; i++) {
|
||||||
|
dst[i] = static_cast<TD>(src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NmsRotatedOpTest::generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) {
|
||||||
|
inputs.clear();
|
||||||
|
const auto& func_inputs = function->inputs();
|
||||||
|
|
||||||
|
for (size_t i = 0llu; i < func_inputs.size(); ++i) {
|
||||||
|
const auto& func_input = func_inputs[i];
|
||||||
|
const auto& name = func_input.get_node()->get_friendly_name();
|
||||||
|
const auto& in_prc = func_input.get_element_type();
|
||||||
|
auto tensor = ov::Tensor(in_prc, targetInputStaticShapes[i]);
|
||||||
|
|
||||||
|
#define FILL_DATA(P, S, L) \
|
||||||
|
case P : \
|
||||||
|
fill_data(tensor.data<ov::element_type_traits<P>::value_type>(), S, L); break;
|
||||||
|
|
||||||
|
#define GEN_DATA(P, R, S, K) \
|
||||||
|
case P : \
|
||||||
|
utils::fill_data_random(tensor.data<ov::element_type_traits<P>::value_type>(), shape_size(targetInputStaticShapes[i]), R, S, K); break;
|
||||||
|
|
||||||
|
if (name == "Boxes") {
|
||||||
|
switch (in_prc) {
|
||||||
|
GEN_DATA(ElementType::f32, 30, 20, 1)
|
||||||
|
GEN_DATA(ElementType::f16, 30, 20, 1)
|
||||||
|
GEN_DATA(ElementType::bf16, 30, 20, 1)
|
||||||
|
default:
|
||||||
|
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input.");
|
||||||
|
}
|
||||||
|
} else if (name == "Scores") {
|
||||||
|
switch (in_prc) {
|
||||||
|
GEN_DATA(ElementType::f32, 1, 0, 100)
|
||||||
|
GEN_DATA(ElementType::f16, 1, 0, 100)
|
||||||
|
GEN_DATA(ElementType::bf16, 1, 0, 100)
|
||||||
|
default:
|
||||||
|
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the Scores input.");
|
||||||
|
}
|
||||||
|
} else if (name == "MaxOutputBoxesPerClass") {
|
||||||
|
switch (in_prc) {
|
||||||
|
FILL_DATA(ElementType::i64, &m_max_out_boxes_per_class, 1)
|
||||||
|
FILL_DATA(ElementType::i32, &m_max_out_boxes_per_class, 1)
|
||||||
|
default:
|
||||||
|
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the MaxOutputBoxesPerClass input.");
|
||||||
|
}
|
||||||
|
} else if (name == "IouThreshold") {
|
||||||
|
switch (in_prc) {
|
||||||
|
FILL_DATA(ElementType::f32, &m_iou_threshold, 1)
|
||||||
|
FILL_DATA(ElementType::f16, &m_iou_threshold, 1)
|
||||||
|
FILL_DATA(ElementType::bf16, &m_iou_threshold, 1)
|
||||||
|
default:
|
||||||
|
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the IouThreshold input.");
|
||||||
|
}
|
||||||
|
} else if (name == "ScoreThreshold") {
|
||||||
|
switch (in_prc) {
|
||||||
|
FILL_DATA(ElementType::f32, &m_score_threshold, 1)
|
||||||
|
FILL_DATA(ElementType::f16, &m_score_threshold, 1)
|
||||||
|
FILL_DATA(ElementType::bf16, &m_score_threshold, 1)
|
||||||
|
default:
|
||||||
|
OPENVINO_THROW("NmsRotated does not support precision ", in_prc, " for the ScoreThreshold input.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef GEN_DATA
|
||||||
|
#undef FILL_DATA
|
||||||
|
|
||||||
|
inputs.insert({func_input.get_node_shared_ptr(), tensor});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -1131,5 +1131,4 @@ conformance_RegionYolo/ReadIRTest.ImportExport/Op=RegionYolo.1_Type=f32_IR=Regio
|
|||||||
conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07
|
conformance_Add/ReadIRTest.ImportExport/Op=Add.1_Type=i32_IR=28f23780d4ca0d40671caf79d5cd9223ad8f6dc2fa5ade2521f3d99586eeeb7f_Device=CPU_Shape=static_Config=(),9.72615e-07
|
||||||
conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281
|
conformance_Convolution/ReadIRTest.Inference/Op=Convolution.1_Type=f32_IR=c301804445f273eef62f41f02204711d9d6e571da28c76ab447d7d90983b0032_Device=CPU_Shape=dynamic_Config=(),0.000113281
|
||||||
conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1
|
conformance/OpImplCheckTest.checkPluginImplementation/Function=Multinomial_opset13_Device=CPU_Config=(),1
|
||||||
conformance/OpImplCheckTest.checkPluginImplementation/Function=NMSRotated_opset13_Device=CPU_Config=(),1
|
|
||||||
conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1
|
conformance/OpImplCheckTest.checkPluginImplementation/Function=LSTMSequence_opset1_Device=CPU_Config=(),1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user