[GPU] Matrix NMS (#13137)

This commit is contained in:
Konstantin Beluchenko 2022-11-02 16:16:57 +02:00 committed by GitHub
parent f8b12b4f1a
commit c6528ee4ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1858 additions and 3 deletions

View File

@ -14,6 +14,7 @@
#include "itt.hpp"
#include "ngraph_ops/nms_static_shape_ie.hpp"
#include "transformations/utils/utils.hpp"
ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type) {
MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE);
@ -46,13 +47,13 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool
if (nms->output(1).get_element_type() != output_1.get_element_type()) {
output_1 = std::make_shared<opset1::Convert>(output_1, nms->output(1).get_element_type());
output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1");
output_1.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(1)));
new_ops.emplace_back(output_1.get_node_shared_ptr());
}
if (nms->output(2).get_element_type() != output_2.get_element_type()) {
output_2 = std::make_shared<opset1::Convert>(output_2, nms->output(2).get_element_type());
output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2");
output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(2)));
new_ops.emplace_back(output_2.get_node_shared_ptr());
}

View File

@ -247,3 +247,4 @@ REGISTER_FACTORY(v9, Eye);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
REGISTER_FACTORY(internal, GenerateProposalsIEInternal);
REGISTER_FACTORY(internal, NmsStaticShapeIE8);

View File

@ -0,0 +1,154 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <vector>
#include "ngraph/op/matrix_nms.hpp"
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Performs matrix nms of input boxes and returns indices of selected boxes.
struct matrix_nms : public primitive_base<matrix_nms> {
CLDNN_DECLARE_PRIMITIVE(matrix_nms)
enum decay_function { gaussian, linear };
enum sort_result_type {
class_id, // sort selected boxes by class id (ascending) in each batch element
score, // sort selected boxes by score (descending) in each batch element
none // do not guarantee the order in each batch element
};
/// \brief Structure that specifies attributes of the operation
struct attributes {
// specifies order of output elements
sort_result_type sort_type = sort_result_type::none;
// specifies whenever it is necessary to sort selected boxes across batches or not
bool sort_result_across_batch = false;
// specifies minimum score to consider box for the processing
float score_threshold = 0.0f;
// specifies maximum number of boxes to be selected per class, -1 meaning to
// keep all boxes
int nms_top_k = -1;
// specifies maximum number of boxes to be selected per batch element, -1
// meaning to keep all boxes
int keep_top_k = -1;
// specifies the background class id, -1 meaning to keep all classes
int background_class = -1;
// specifies decay function used to decay scores
decay_function decay = decay_function::linear;
// specifies gaussian_sigma parameter for gaussian decay_function
float gaussian_sigma = 2.0f;
// specifies threshold to filter out boxes with low confidence score after
// decaying
float post_threshold = 0.0f;
// specifies whether boxes are normalized or not
bool normalized = true;
attributes() {}
attributes(const ngraph::op::v8::MatrixNms::Attributes& attrs)
: attributes(from(attrs.sort_result_type),
attrs.sort_result_across_batch,
attrs.score_threshold,
attrs.nms_top_k,
attrs.keep_top_k,
attrs.background_class,
from(attrs.decay_function),
attrs.gaussian_sigma,
attrs.post_threshold,
attrs.normalized) {}
attributes(sort_result_type sort_type,
bool sort_result_across_batch,
float score_threshold,
int nms_top_k,
int keep_top_k,
int background_class,
decay_function decay,
float gaussian_sigma,
float post_threshold,
bool normalized)
: sort_type(sort_type),
sort_result_across_batch(sort_result_across_batch),
score_threshold(score_threshold),
nms_top_k(nms_top_k),
keep_top_k(keep_top_k),
background_class(background_class),
decay(decay),
gaussian_sigma(gaussian_sigma),
post_threshold(post_threshold),
normalized(normalized) {}
};
/// @brief Constructs matrix_nms primitive.
/// @param id This primitive id.
/// @param boxes primitive id.
/// @param scores primitive id.
/// @param second_output primitive id.
/// @param third_output primitive id.
/// @param attrs attributes.
matrix_nms(const primitive_id& id,
const primitive_id& boxes,
const primitive_id& scores,
const primitive_id& second_output,
const primitive_id& third_output,
const matrix_nms::attributes& attrs)
: primitive_base(id, {boxes, scores, second_output, third_output}),
attribs(attrs) {}
/// @brief Constructs matrix_nms primitive.
/// @param id This primitive id.
/// @param boxes primitive id.
/// @param scores primitive id.
/// @param second_output primitive id.
/// @param third_output primitive id.
/// @param attrs operation attributes.
matrix_nms(const primitive_id& id,
const primitive_id& boxes,
const primitive_id& scores,
const primitive_id& second_output,
const primitive_id& third_output,
const ngraph::op::v8::MatrixNms::Attributes& attrs)
: primitive_base(id, {boxes, scores, second_output, third_output}),
attribs(attrs) {}
attributes attribs;
private:
static cldnn::matrix_nms::decay_function from(ngraph::op::v8::MatrixNms::DecayFunction decay) {
switch (decay) {
case ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN:
return cldnn::matrix_nms::decay_function::gaussian;
case ngraph::op::v8::MatrixNms::DecayFunction::LINEAR:
default:
return cldnn::matrix_nms::decay_function::linear;
}
}
static cldnn::matrix_nms::sort_result_type from(ngraph::op::v8::MatrixNms::SortResultType type) {
switch (type) {
case ngraph::op::v8::MatrixNms::SortResultType::CLASSID:
return cldnn::matrix_nms::sort_result_type::class_id;
case ngraph::op::v8::MatrixNms::SortResultType::SCORE:
return cldnn::matrix_nms::sort_result_type::score;
case ngraph::op::v8::MatrixNms::SortResultType::NONE:
default:
return cldnn::matrix_nms::sort_result_type::none;
}
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -0,0 +1,117 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "data_inst.h"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "matrix_nms/matrix_nms_kernel_ref.h"
#include "matrix_nms/matrix_nms_kernel_selector.h"
#include "matrix_nms_inst.h"
#include "primitive_base.hpp"
namespace cldnn {
namespace ocl {
namespace {
kernel_selector::matrix_nms_params::decay_function from(matrix_nms::decay_function decay) {
switch (decay) {
case matrix_nms::decay_function::gaussian:
return kernel_selector::matrix_nms_params::decay_function::GAUSSIAN;
default:
case matrix_nms::decay_function::linear:
return kernel_selector::matrix_nms_params::decay_function::LINEAR;
}
}
kernel_selector::matrix_nms_params::sort_result_type from(matrix_nms::sort_result_type type) {
switch (type) {
case matrix_nms::sort_result_type::class_id:
return kernel_selector::matrix_nms_params::sort_result_type::CLASS_ID;
case matrix_nms::sort_result_type::score:
return kernel_selector::matrix_nms_params::sort_result_type::SCORE;
default:
case matrix_nms::sort_result_type::none:
return kernel_selector::matrix_nms_params::sort_result_type::NONE;
}
}
} // namespace
struct matrix_nms_impl : typed_primitive_impl_ocl<matrix_nms> {
using parent = typed_primitive_impl_ocl<matrix_nms>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<matrix_nms_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(matrix_nms_inst& instance, int32_t) const override {
kernel_arguments_data args;
args.inputs = {instance.input_boxes_mem(),
instance.input_scores_mem(),
instance.input_selected_boxes_mem(),
instance.input_valid_outputs_mem()};
args.outputs = {instance.output_memory_ptr()};
return args;
}
public:
static primitive_impl* create(const matrix_nms_node& node, const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::matrix_nms_params>(impl_param);
auto optional_params =
get_default_optional_params<kernel_selector::matrix_nms_optional_params>(node.get_program());
const auto& scores_layout = impl_param.get_input_layout(1);
const auto& second_output_layout = impl_param.get_input_layout(2);
const auto& third_output_layout = impl_param.get_input_layout(3);
params.inputs.push_back(convert_data_tensor(scores_layout));
params.inputs.push_back(convert_data_tensor(second_output_layout));
params.inputs.push_back(convert_data_tensor(third_output_layout));
const auto& primitive = node.get_primitive();
params.sort_type = from(primitive->attribs.sort_type);
params.sort_result_across_batch = primitive->attribs.sort_result_across_batch;
params.score_threshold = primitive->attribs.score_threshold;
params.nms_top_k = primitive->attribs.nms_top_k;
params.keep_top_k = primitive->attribs.keep_top_k;
params.background_class = primitive->attribs.background_class;
params.decay = from(primitive->attribs.decay);
params.gaussian_sigma = primitive->attribs.gaussian_sigma;
params.post_threshold = primitive->attribs.post_threshold;
params.normalized = primitive->attribs.normalized;
auto& kernel_selector = kernel_selector::matrix_nms_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
CLDNN_ERROR_BOOL(node.id(),
"Best_kernel.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this nodeuments");
auto matrix_nms_node = new matrix_nms_impl(node, best_kernels[0]);
return matrix_nms_node;
}
};
namespace detail {
attach_matrix_nms_impl::attach_matrix_nms_impl() {
auto types = {data_types::f16, data_types::f32, data_types::i32};
auto formats = {format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32};
implementation_map<matrix_nms>::add(impl_types::ocl, matrix_nms_impl::create, types, formats);
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -50,6 +50,7 @@ void register_implementations() {
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(non_max_suppression);
REGISTER_OCL(matrix_nms);
REGISTER_OCL(normalize);
REGISTER_OCL(one_hot);
REGISTER_OCL(permute);

View File

@ -131,6 +131,7 @@ REGISTER_OCL(max_unpooling);
REGISTER_OCL(mutable_data);
REGISTER_OCL(mvn);
REGISTER_OCL(non_max_suppression);
REGISTER_OCL(matrix_nms);
REGISTER_OCL(normalize);
REGISTER_OCL(one_hot);
REGISTER_OCL(permute);

View File

@ -0,0 +1,43 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <memory>
#include <string>
#include "intel_gpu/primitives/matrix_nms.hpp"
#include "primitive_inst.h"
namespace cldnn {
using matrix_nms_node = typed_program_node<matrix_nms>;
template <>
class typed_primitive_inst<matrix_nms> : public typed_primitive_inst_base<matrix_nms> {
using parent = typed_primitive_inst_base<matrix_nms>;
public:
typed_primitive_inst(network& network, const matrix_nms_node& node) : parent(network, node) {}
static layout calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param);
static std::string to_string(const matrix_nms_node& node);
memory::ptr input_boxes_mem() const {
return dep_memory_ptr(0);
}
memory::ptr input_scores_mem() const {
return dep_memory_ptr(1);
}
memory::ptr input_selected_boxes_mem() const {
return dep_memory_ptr(2);
}
memory::ptr input_valid_outputs_mem() const {
return dep_memory_ptr(3);
}
};
using matrix_nms_inst = typed_primitive_inst<matrix_nms>;
} // namespace cldnn

View File

@ -0,0 +1,88 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <json_object.h>
#include <sstream>
#include <string>
#include "matrix_nms_inst.h"
#include "openvino/core/enum_names.hpp"
#include "primitive_type_base.h"
namespace cldnn {
primitive_type_id matrix_nms::type_id() {
static primitive_type_base<matrix_nms> instance;
return &instance;
}
layout matrix_nms_inst::calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param) {
const auto primitive = impl_param.typed_desc<matrix_nms>();
const auto boxes_layout = impl_param.get_input_layout(0);
const auto scores_layout = impl_param.get_input_layout(1);
const auto batches_num = boxes_layout.batch();
auto classes_num = scores_layout.feature();
const auto boxes_num = boxes_layout.feature();
if (primitive->attribs.background_class >= 0 && primitive->attribs.background_class < classes_num)
classes_num = std::max(1, classes_num - 1);
int max_output_boxes_per_class{boxes_num};
if (primitive->attribs.nms_top_k >= 0)
max_output_boxes_per_class = std::min(max_output_boxes_per_class, primitive->attribs.nms_top_k);
auto max_output_boxes_per_batch = max_output_boxes_per_class * classes_num;
if (primitive->attribs.keep_top_k >= 0)
max_output_boxes_per_batch = std::min(max_output_boxes_per_batch, primitive->attribs.keep_top_k);
auto output_num = max_output_boxes_per_batch * batches_num;
// BOX_DATA: class_id, box_score, xmin, ymin, xmax, ymax
constexpr size_t BOX_DATA{6};
return layout(boxes_layout.data_type, boxes_layout.format, {output_num, BOX_DATA, 1, 1});
}
std::string matrix_nms_inst::to_string(const matrix_nms_node& node) {
json_composite matrix_nms_info;
matrix_nms_info.add("boxes id", node.input().id());
matrix_nms_info.add("scores id", node.get_dependency(1).id());
matrix_nms_info.add("sort_result_type", ov::as_string(node.get_primitive()->attribs.sort_type));
matrix_nms_info.add("decay_function", ov::as_string(node.get_primitive()->attribs.decay));
matrix_nms_info.add("sort_result_across_batch", node.get_primitive()->attribs.sort_result_across_batch);
matrix_nms_info.add("score_threshold", node.get_primitive()->attribs.score_threshold);
matrix_nms_info.add("nms_top_k", node.get_primitive()->attribs.nms_top_k);
matrix_nms_info.add("keep_top_k", node.get_primitive()->attribs.keep_top_k);
matrix_nms_info.add("background_class", node.get_primitive()->attribs.background_class);
matrix_nms_info.add("gaussian_sigma", node.get_primitive()->attribs.gaussian_sigma);
matrix_nms_info.add("post_threshold", node.get_primitive()->attribs.post_threshold);
matrix_nms_info.add("normalized", node.get_primitive()->attribs.normalized);
auto node_info = node.desc_to_json();
node_info->add("matrix_nms info", matrix_nms_info);
std::stringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn
namespace ov {
template <>
EnumNames<matrix_nms::decay_function>& EnumNames<matrix_nms::decay_function>::get() {
static auto enum_names = EnumNames<matrix_nms::decay_function>(
"decay_function",
{{"gaussian", matrix_nms::decay_function::gaussian}, {"linear", matrix_nms::decay_function::linear}});
return enum_names;
}
template <>
EnumNames<matrix_nms::sort_result_type>& EnumNames<matrix_nms::sort_result_type>::get() {
static auto enum_names =
EnumNames<matrix_nms::sort_result_type>("sort_result_type",
{{"class_id", matrix_nms::sort_result_type::class_id},
{"score", matrix_nms::sort_result_type::score},
{"none", matrix_nms::sort_result_type::none}});
return enum_names;
}
} // namespace ov

View File

@ -20,6 +20,7 @@
#include "sliding_window_utils.hpp"
#include "program_helpers.h"
#include "matrix_nms_inst.h"
#include "roi_pooling_inst.h"
#include "reorg_yolo_inst.h"
#include "eltwise_inst.h"
@ -1442,6 +1443,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::ctc_loss::type_id() &&
prim.type() != cldnn::non_max_suppression::type_id() &&
prim.type() != cldnn::roi_align::type_id() &&
prim.type() != cldnn::matrix_nms::type_id() &&
prim.type() != cldnn::adaptive_pooling::type_id() &&
prim.type() != cldnn::bucketize::type_id() &&
prim.type() != cldnn::roll::type_id() &&
@ -1487,6 +1489,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::ctc_loss::type_id() &&
prim.type() != cldnn::non_max_suppression::type_id() &&
prim.type() != cldnn::roi_align::type_id() &&
prim.type() != cldnn::matrix_nms::type_id() &&
prim.type() != cldnn::adaptive_pooling::type_id() &&
prim.type() != cldnn::bucketize::type_id() &&
prim.type() != cldnn::roll::type_id() &&

View File

@ -0,0 +1,333 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "include/batch_headers/common.cl"
#include "include/batch_headers/data_types.cl"
#define NUM_BATCHES INPUT0_BATCH_NUM
#define NUM_BOXES INPUT0_FEATURE_NUM
#define NUM_CLASSES INPUT1_FEATURE_NUM
typedef struct {
int batch_idx;
int class_idx;
int box_idx;
INPUT1_TYPE score;
} FUNC(BoxInfo);
#define BOX_INFO FUNC(BoxInfo)
#define unroll_for __attribute__((opencl_unroll_hint)) for
inline INPUT1_TYPE FUNC(decay_gaussian)(INPUT1_TYPE iou, INPUT1_TYPE max_iou) {
return exp((max_iou * max_iou - iou * iou) * GAUSSIAN_SIGMA);
}
inline INPUT1_TYPE FUNC(decay_linear)(INPUT1_TYPE iou, INPUT1_TYPE max_iou) {
return (INPUT1_VAL_ONE - iou) / (INPUT1_VAL_ONE - max_iou + TINY);
}
inline void FUNC(swap)(int* a, int* b) {
int temp = *a;
*a = *b;
*b = temp;
}
inline void FUNC(sortIterative)(const __global INPUT1_TYPE* scores,
const int batchId,
const int classId,
int* indices,
const int size) {
for (int i = 1; i <= size; i++) {
bool swapped = false;
for (int j = 0; j < size - i; j++) {
const INPUT1_TYPE score_curr = scores[INPUT1_GET_INDEX(batchId, classId, 0, indices[j])];
const INPUT1_TYPE score_next = scores[INPUT1_GET_INDEX(batchId, classId, 0, indices[j + 1])];
if (score_curr < score_next) {
FUNC_CALL(swap)(&indices[j], &indices[j + 1]);
swapped = true;
}
}
if (!swapped)
break;
}
}
inline void FUNC(swap_boxes)(__global BOX_INFO* a, __global BOX_INFO* b) {
BOX_INFO temp = *a;
*a = *b;
*b = temp;
}
inline void FUNC(sortIterativeBoxes)(__global BOX_INFO* boxes, int l, int h) {
for (int i = 1; i < h - l; i++) {
bool swapped = false;
for (int j = l; j < h - i; j++) {
if ((boxes[j].score < boxes[j + 1].score) ||
(boxes[j].score == boxes[j + 1].score && boxes[j].class_idx > boxes[j + 1].class_idx) ||
(boxes[j].score == boxes[j + 1].score && boxes[j].class_idx == boxes[j + 1].class_idx &&
boxes[j].box_idx > boxes[j + 1].box_idx)) {
FUNC_CALL(swap_boxes)(&boxes[j], &boxes[j + 1]);
swapped = true;
}
}
if (!swapped)
break;
}
}
inline void FUNC(sortIterativeBoxesAcrossBatches)(__global BOX_INFO* boxes) {
const int size = NUM_BATCHES * NUM_CLASSES * MAX_BOXES_PER_CLASS;
for (int i = 1; i < size; i++) {
bool swapped = false;
for (int j = 0; j < size - i; j++) {
__global BOX_INFO* l = boxes + j;
__global BOX_INFO* r = boxes + j + 1;
// sort by score
#if SORT_TYPE == 1
if ((l->score < r->score) || (l->score == r->score && l->batch_idx > r->batch_idx) ||
(l->score == r->score && l->batch_idx == r->batch_idx && l->class_idx > r->class_idx) ||
(l->score == r->score && l->batch_idx == r->batch_idx && l->class_idx == r->class_idx &&
l->box_idx > r->box_idx)) {
FUNC_CALL(swap_boxes)(l, r);
swapped = true;
}
// sort by class id
#elif SORT_TYPE == 0
if (r->score != INPUT1_VAL_ZERO &&
((l->score == INPUT1_VAL_ZERO) || // case with empty buffer
(l->class_idx > r->class_idx) || (l->class_idx == r->class_idx && l->batch_idx > r->batch_idx) ||
(l->class_idx == r->class_idx && l->batch_idx == r->batch_idx && l->score < r->score) ||
(l->class_idx == r->class_idx && l->batch_idx == r->batch_idx && l->score == r->score &&
l->box_idx > r->box_idx))) {
FUNC_CALL(swap_boxes)(l, r);
swapped = true;
}
#endif
}
if (!swapped)
break;
}
}
inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE* boxes, const short batch, const ushort box_idx) {
COORD_TYPE_4 coords = (COORD_TYPE_4)(boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 0)],
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 1)],
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 2)],
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 3)]);
// uncomment when flipped coordinates will be fixed in reference impl
/*
const INPUT0_TYPE x1 = min(coords[0], coords[2]);
const INPUT0_TYPE x2 = max(coords[0], coords[2]);
const INPUT0_TYPE y1 = min(coords[1], coords[3]);
const INPUT0_TYPE y2 = max(coords[1], coords[3]);
coords[0] = x1;
coords[1] = y1;
coords[2] = x2;
coords[3] = y2;
*/
return coords;
}
inline INPUT0_TYPE FUNC(area)(const INPUT0_TYPE w, const INPUT0_TYPE h) {
return (w + NORM) * (h + NORM);
}
inline INPUT0_TYPE FUNC(areaBox)(const COORD_TYPE_4 box) {
if (box[2] < box[0] || box[3] < box[1])
return INPUT0_VAL_ZERO;
return FUNC_CALL(area)(box[3] - box[1], box[2] - box[0]);
}
inline INPUT0_TYPE FUNC(intersectionOverUnion)(const COORD_TYPE_4 box1, const COORD_TYPE_4 box2) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || box2[3] < box1[1])
return INPUT0_VAL_ZERO;
const INPUT0_TYPE area = FUNC_CALL(areaBox)(box1);
const INPUT0_TYPE areaBox = FUNC_CALL(areaBox)(box2);
const INPUT0_TYPE intersection_xmin = max(box1[0], box2[0]);
const INPUT0_TYPE intersection_ymin = max(box1[1], box2[1]);
const INPUT0_TYPE intersection_xmax = min(box1[2], box2[2]);
const INPUT0_TYPE intersection_ymax = min(box1[3], box2[3]);
const INPUT0_TYPE intersection_area =
FUNC_CALL(area)(intersection_xmax - intersection_xmin, intersection_ymax - intersection_ymin);
const INPUT0_TYPE union_area = area + areaBox - intersection_area;
return intersection_area / union_area;
}
#ifdef MATRIX_NMS_STAGE_0
KERNEL(matrix_nms_ref_stage_0)
(const __global INPUT0_TYPE* input_boxes,
const __global INPUT1_TYPE* input_scores,
__global uchar* buffer0,
__global int* selected_boxes_num) {
const int batchId = get_global_id(0);
const int classId = get_global_id(1);
if (classId == BACKGROUND_CLASS)
return;
int sorted_score_indices[NUM_BOXES];
for (int i = 0; i < NUM_BOXES; ++i)
sorted_score_indices[i] = i;
int valid_boxes_num = 0;
for (int i = 0; i < NUM_BOXES; i++) {
if (input_scores[INPUT1_GET_INDEX(batchId, classId, 0, i)] > SCORE_THRESHOLD)
++valid_boxes_num;
}
// TODO: consider faster sorting algorithm
FUNC_CALL(sortIterative)(input_scores, batchId, classId, sorted_score_indices, NUM_BOXES);
valid_boxes_num = min(valid_boxes_num, MAX_BOXES_PER_CLASS);
const int matrix_size = MAX_BOXES_PER_CLASS < 3 ? 1 : (MAX_BOXES_PER_CLASS * (MAX_BOXES_PER_CLASS - 1)) >> 1;
INPUT1_TYPE iou_matrix[matrix_size];
INPUT1_TYPE iou_max[MAX_BOXES_PER_CLASS];
iou_max[0] = INPUT1_VAL_ZERO;
for (int i = 1; i < valid_boxes_num; ++i) {
INPUT1_TYPE max_iou = INPUT1_VAL_ZERO;
const COORD_TYPE_4 box_i = FUNC_CALL(getBoxCoords)(input_boxes, batchId, sorted_score_indices[i]);
for (int j = 0; j < i; ++j) {
const COORD_TYPE_4 box_j = FUNC_CALL(getBoxCoords)(input_boxes, batchId, sorted_score_indices[j]);
const INPUT1_TYPE iou = FUNC_CALL(intersectionOverUnion)(box_i, box_j);
max_iou = max(iou, max_iou);
iou_matrix[i * (i - 1) / 2 + j] = iou;
}
iou_max[i] = max_iou;
}
const INPUT1_TYPE first_score = input_scores[INPUT1_GET_INDEX(batchId, classId, 0, sorted_score_indices[0])];
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
box_info = &box_info[batchId * NUM_CLASSES * MAX_BOXES_PER_CLASS + classId * MAX_BOXES_PER_CLASS];
int box_info_counter = 0;
if (first_score > POST_THRESHOLD && valid_boxes_num > 0) {
box_info[box_info_counter].class_idx = classId;
box_info[box_info_counter].score = first_score;
box_info[box_info_counter].box_idx = sorted_score_indices[0];
box_info[box_info_counter].batch_idx = batchId;
++box_info_counter;
}
for (int i = 1; i < valid_boxes_num; ++i) {
INPUT1_TYPE min_decay = INPUT1_VAL_ONE;
for (int j = 0; j < i; ++j) {
INPUT1_TYPE iou = iou_matrix[i * (i - 1) / 2 + j];
INPUT1_TYPE decay =
DECAY_FUNC == 0 ? FUNC_CALL(decay_gaussian)(iou, iou_max[j]) : FUNC_CALL(decay_linear)(iou, iou_max[j]);
min_decay = min(min_decay, decay);
}
INPUT1_TYPE ds = min_decay * input_scores[INPUT1_GET_INDEX(batchId, classId, 0, sorted_score_indices[i])];
if (ds <= POST_THRESHOLD)
continue;
box_info[box_info_counter].batch_idx = batchId;
box_info[box_info_counter].class_idx = classId;
box_info[box_info_counter].box_idx = sorted_score_indices[i];
box_info[box_info_counter].score = ds;
++box_info_counter;
}
selected_boxes_num[batchId * NUM_CLASSES + classId] = box_info_counter;
}
#endif /* MATRIX_NMS_STAGE_0 */
#ifdef MATRIX_NMS_STAGE_1
KERNEL(matrix_nms_ref_stage_1)
(__global INPUT3_TYPE* valid_outputs, __global uchar* buffer0, __global int* selected_boxes_num) {
const int batchId = get_global_id(0);
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
const int first_idx = batchId * NUM_CLASSES * MAX_BOXES_PER_CLASS;
const int last_idx = first_idx + NUM_CLASSES * MAX_BOXES_PER_CLASS;
// TODO: consider faster sorting algorithm
FUNC_CALL(sortIterativeBoxes)(box_info, first_idx, last_idx);
for (int i = 0; i < NUM_CLASSES; ++i) {
if (i == BACKGROUND_CLASS)
continue;
valid_outputs[INPUT3_GET_INDEX(batchId, 0, 0, 0)] += selected_boxes_num[batchId * NUM_CLASSES + i];
}
}
#endif /* MATRIX_NMS_STAGE_1 */
#ifdef MATRIX_NMS_STAGE_2
KERNEL(matrix_nms_ref_stage_2)
(const __global INPUT0_TYPE* input_boxes,
__global OUTPUT_TYPE* output,
__global INPUT2_TYPE* selected_indices,
__global INPUT3_TYPE* valid_outputs,
__global uchar* buffer0) {
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
// TODO: consider faster sorting algorithm
// and index sorting instead of data sorting
#if SORT_RESULT_ACROSS_BATCH == 1 && SORT_TYPE != 2
FUNC_CALL(sortIterativeBoxesAcrossBatches)(box_info);
#endif
int output_idx = 0;
int box_info_idx = 0;
for (int i = 0; i < NUM_BATCHES; ++i) {
if (KEEP_TOP_K != -1 && KEEP_TOP_K < valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)])
valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)] = KEEP_TOP_K;
#if SORT_RESULT_ACROSS_BATCH == 0
box_info_idx = i * NUM_CLASSES * MAX_BOXES_PER_CLASS;
#endif
unroll_for(int j = 0; j < valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)]; ++j) {
output[OUTPUT_GET_INDEX(output_idx, 0, 0, 0)] = box_info[box_info_idx].class_idx;
output[OUTPUT_GET_INDEX(output_idx, 1, 0, 0)] = box_info[box_info_idx].score;
output[OUTPUT_GET_INDEX(output_idx, 2, 0, 0)] =
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 0)];
output[OUTPUT_GET_INDEX(output_idx, 3, 0, 0)] =
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 1)];
output[OUTPUT_GET_INDEX(output_idx, 4, 0, 0)] =
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 2)];
output[OUTPUT_GET_INDEX(output_idx, 5, 0, 0)] =
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 3)];
selected_indices[INPUT2_GET_INDEX(output_idx, 0, 0, 0)] =
box_info[box_info_idx].batch_idx * NUM_BOXES + box_info[box_info_idx].box_idx;
++output_idx;
++box_info_idx;
}
// Paddings
while (output_idx < (i + 1) * MAX_BOXES_PER_BATCH) {
unroll_for(int j = 0; j < 6; ++j) {
output[OUTPUT_GET_INDEX(output_idx, j, 0, 0)] = -OUTPUT_VAL_ONE;
}
selected_indices[INPUT2_GET_INDEX(output_idx, 0, 0, 0)] = -INPUT2_VAL_ONE;
++output_idx;
}
}
}
#endif /* MATRIX_NMS_STAGE_2 */
#undef NUM_BATCHES
#undef NUM_BOXES
#undef NUM_CLASSES
#undef BOX_INFO

View File

@ -90,6 +90,7 @@ enum class KernelType {
CONVERT_COLOR,
RANDOM_UNIFORM,
ADAPTIVE_POOLING,
MATRIX_NMS,
REVERSE,
PRIOR_BOX,
EYE,

View File

@ -175,6 +175,7 @@ std::string toString(KernelType kt) {
return "BINARY_CONVOLUTION";
case KernelType::NON_MAX_SUPPRESSION:
return "NON_MAX_SUPPRESSION";
case KernelType::MATRIX_NMS: return "MATRIX_NMS";
default: return "";
}
}

View File

@ -0,0 +1,196 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "matrix_nms_kernel_ref.h"
#include <vector>
#include "kernel_selector_utils.h"
namespace kernel_selector {
ParamsKey MatrixNmsKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
namespace {
MatrixNmsKernelRef::DispatchData SetDefault(const matrix_nms_params& params, size_t idx) {
MatrixNmsKernelRef::DispatchData dispatch_data;
const auto& input_scores = params.inputs[1];
if (idx == 0) {
dispatch_data.gws = {input_scores.Batch().v, input_scores.Feature().v, 1};
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
} else if (idx == 1) {
dispatch_data.gws = {input_scores.Batch().v, 1, 1};
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
} else {
dispatch_data.gws = {1, 1, 1};
dispatch_data.lws = {1, 1, 1};
}
return dispatch_data;
}
std::tuple<int, int> GetMaxBoxes(const matrix_nms_params& params) {
const int classes_num = params.inputs[1].Feature().v;
const int boxes_num = params.inputs[0].Feature().v;
int max_boxes_per_class{boxes_num};
if (params.nms_top_k >= 0)
max_boxes_per_class = std::min(max_boxes_per_class, params.nms_top_k);
auto classes_num_adj = classes_num;
if (params.background_class >= 0 && params.background_class < classes_num)
classes_num_adj = std::max(1, classes_num - 1);
auto max_boxes_per_batch = max_boxes_per_class * classes_num_adj;
if (params.keep_top_k >= 0)
max_boxes_per_batch = std::min(max_boxes_per_batch, params.keep_top_k);
return {max_boxes_per_class, max_boxes_per_batch};
}
} // anonymous namespace
KernelsData MatrixNmsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
constexpr size_t kernels_num{3};
KernelData kernel_data = KernelData::Default<matrix_nms_params>(params, kernels_num);
const matrix_nms_params& new_params = dynamic_cast<const matrix_nms_params&>(*kernel_data.params.get());
constexpr size_t BOX_INFO_SIZE{16};
const int batches_num = new_params.inputs[1].Batch().v;
const int classes_num = new_params.inputs[1].Feature().v;
int max_boxes_per_class, max_boxes_per_batch;
std::tie(max_boxes_per_class, max_boxes_per_batch) = GetMaxBoxes(new_params);
const size_t box_info_num = batches_num * classes_num * max_boxes_per_class;
const size_t box_info_buffer_size = box_info_num * BOX_INFO_SIZE;
const size_t sel_boxes_num_buffer_size = batches_num * classes_num * sizeof(int);
kernel_data.internalBufferSizes.push_back(box_info_buffer_size);
kernel_data.internalBufferSizes.push_back(sel_boxes_num_buffer_size);
kernel_data.internalBufferDataType = Datatype::F32;
for (size_t i{}; i < kernels_num; ++i) {
auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i);
auto jit_constants = GetJitConstants(new_params);
jit_constants.AddConstant(MakeJitConstant("MATRIX_NMS_STAGE_" + std::to_string(i), "true"));
jit_constants.AddConstant(MakeJitConstant("MAX_BOXES_PER_CLASS", max_boxes_per_class));
jit_constants.AddConstant(MakeJitConstant("MAX_BOXES_PER_BATCH", max_boxes_per_batch));
auto jit = CreateJit(kernelName, jit_constants, entry_point);
DispatchData dispatch_data = SetDefault(new_params, i);
auto& kernel = kernel_data.kernels[i];
KernelBase::CheckDispatchData(kernelName, dispatch_data, params.engineInfo.maxWorkGroupSize);
kernel.params.workGroups.global = dispatch_data.gws;
kernel.params.workGroups.local = dispatch_data.lws;
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
SetKernelArguments(new_params, kernel, i);
}
return {kernel_data};
}
float MatrixNmsKernelRef::GetKernelsPriority(const Params& params, const optional_params& options) const {
return FORCE_PRIORITY_9;
}
bool MatrixNmsKernelRef::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::MATRIX_NMS || o.GetType() != KernelType::MATRIX_NMS) {
return false;
}
const matrix_nms_params& params = static_cast<const matrix_nms_params&>(p);
// inputs: boxes, scores, second output, third output
if (params.inputs.size() != 4)
return false;
return true;
}
JitConstants MatrixNmsKernelRef::GetJitConstants(const matrix_nms_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
const auto& boxes = params.inputs[0];
switch (boxes.GetDType()) {
case Datatype::F32:
jit.AddConstant(MakeJitConstant("COORD_TYPE_4", "float4"));
jit.AddConstant(MakeJitConstant("TINY", "1e-10f"));
break;
case Datatype::F16:
jit.AddConstant(MakeJitConstant("COORD_TYPE_4", "half4"));
jit.AddConstant(MakeJitConstant("TINY", "1e-7h"));
break;
break;
default:
throw std::invalid_argument("Matrix NMS boxes type should be one of F32 or F16.");
}
jit.AddConstant(MakeJitConstant("SORT_TYPE", params.sort_type));
jit.AddConstant(MakeJitConstant("SORT_RESULT_ACROSS_BATCH", params.sort_result_across_batch));
jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD", params.score_threshold));
jit.AddConstant(MakeJitConstant("KEEP_TOP_K", params.keep_top_k));
jit.AddConstant(MakeJitConstant("BACKGROUND_CLASS", params.background_class));
jit.AddConstant(MakeJitConstant("DECAY_FUNC", params.decay));
jit.AddConstant(MakeJitConstant("GAUSSIAN_SIGMA", params.gaussian_sigma));
jit.AddConstant(MakeJitConstant("POST_THRESHOLD", params.post_threshold));
jit.AddConstant(MakeJitConstant("NORM", params.normalized ? "INPUT0_VAL_ZERO" : "INPUT0_VAL_ONE"));
return jit;
}
void MatrixNmsKernelRef::SetKernelArguments(const matrix_nms_params& params, clKernelData& kernel, size_t idx) const {
switch (idx) {
case 0:
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
break;
case 1:
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 3});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
break;
case 2:
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 3});
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
break;
default:
throw std::invalid_argument("Matrix NMS has 3 kernels. valid index is 0 ~ 2.");
}
}
} // namespace kernel_selector

View File

@ -0,0 +1,67 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
#include "kernel_selector_params.h"
namespace kernel_selector {
struct matrix_nms_params : public base_params {
matrix_nms_params() : base_params(KernelType::MATRIX_NMS) {}
enum decay_function { GAUSSIAN, LINEAR };
enum sort_result_type {
CLASS_ID, // sort selected boxes by class id (ascending) in each batch element
SCORE, // sort selected boxes by score (descending) in each batch element
NONE // do not guarantee the order in each batch element
};
// specifies order of output elements
sort_result_type sort_type = sort_result_type::NONE;
// specifies whenever it is necessary to sort selected boxes across batches or not
bool sort_result_across_batch = false;
// specifies minimum score to consider box for the processing
float score_threshold = 0.0f;
// specifies maximum number of boxes to be selected per class, -1 meaning to
// keep all boxes
int nms_top_k = -1;
// specifies maximum number of boxes to be selected per batch element, -1
// meaning to keep all boxes
int keep_top_k = -1;
// specifies the background class id, -1 meaning to keep all classes
int background_class = -1;
// specifies decay function used to decay scores
decay_function decay = decay_function::LINEAR;
// specifies gaussian_sigma parameter for gaussian decay_function
float gaussian_sigma = 2.0f;
// specifies threshold to filter out boxes with low confidence score after
// decaying
float post_threshold = 0.0f;
// specifies whether boxes are normalized or not
bool normalized = true;
};
struct matrix_nms_optional_params : optional_params {
matrix_nms_optional_params() : optional_params(KernelType::MATRIX_NMS) {}
};
class MatrixNmsKernelRef : public KernelBaseOpenCL {
public:
MatrixNmsKernelRef() : KernelBaseOpenCL("matrix_nms_ref") {}
using DispatchData = CommonDispatchData;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
JitConstants GetJitConstants(const matrix_nms_params& params) const;
bool Validate(const Params& p, const optional_params& o) const override;
void SetKernelArguments(const matrix_nms_params& params, clKernelData& kernel, size_t idx) const;
};
} // namespace kernel_selector

View File

@ -0,0 +1,18 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "matrix_nms_kernel_selector.h"
#include "matrix_nms_kernel_ref.h"
namespace kernel_selector {
matrix_nms_kernel_selector::matrix_nms_kernel_selector() {
Attach<MatrixNmsKernelRef>();
}
KernelsData matrix_nms_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::MATRIX_NMS);
}
} // namespace kernel_selector

View File

@ -0,0 +1,21 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class matrix_nms_kernel_selector : public kernel_selector_base {
public:
static matrix_nms_kernel_selector& Instance() {
static matrix_nms_kernel_selector instance_;
return instance_;
}
matrix_nms_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,83 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/primitives/matrix_nms.hpp"
#include <memory>
#include <ngraph/opsets/opset8.hpp>
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
#include "ngraph_ops/nms_static_shape_ie.hpp"
namespace ngraph {
namespace op {
namespace internal {
using NmsStaticShapeIE8 = ngraph::op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>;
}
} // namespace op
} // namespace ngraph
namespace ov {
namespace intel_gpu {
namespace {
void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr<ngraph::op::internal::NmsStaticShapeIE8>& op) {
validate_inputs_count(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::vector<cldnn::memory::ptr> shared_memory;
auto outputIndices = op->get_output_shape(0)[0];
cldnn::layout mutableLayoutFirst = cldnn::layout(cldnn::element_type_to_data_type(ngraph::element::i32),
cldnn::format::bfyx,
cldnn::tensor(static_cast<int32_t>(outputIndices), 1, 1, 1));
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
cldnn::primitive_id matrix_nms_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
auto matrix_nms_mutable_prim_first = cldnn::mutable_data(matrix_nms_mutable_id_w_first, shared_memory.back());
p.add_primitive(*op, matrix_nms_mutable_prim_first);
inputPrimitives.push_back(matrix_nms_mutable_id_w_first);
auto batches_num = op->get_output_shape(2)[0];
cldnn::layout mutableLayoutSecond = cldnn::layout(cldnn::element_type_to_data_type(ngraph::element::i32),
cldnn::format::bfyx,
cldnn::tensor(static_cast<int32_t>(batches_num), 1, 1, 1));
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
cldnn::primitive_id matrix_nms_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
auto matrix_nms_mutable_prim_second = cldnn::mutable_data(matrix_nms_mutable_id_w_second, shared_memory.back());
p.add_primitive(*op, matrix_nms_mutable_prim_second);
inputPrimitives.push_back(matrix_nms_mutable_id_w_second);
auto matrixNmsLayerName = layer_type_name_ID(op) + ".out0";
auto prim = cldnn::matrix_nms(matrixNmsLayerName,
inputPrimitives[0],
inputPrimitives[1],
inputPrimitives[inputPrimitives.size() - 2],
inputPrimitives[inputPrimitives.size() - 1],
op->get_attrs());
p.add_primitive(*op, prim);
cldnn::primitive_id matrix_nms_id_r_first = layer_type_name_ID(op) + ".out1";
auto matrix_nms_mutable_prim_r_first =
cldnn::mutable_data(matrix_nms_id_r_first, {matrixNmsLayerName}, shared_memory.front());
p.add_primitive(*op, matrix_nms_mutable_prim_r_first);
cldnn::primitive_id matrix_nms_id_r_second = layer_type_name_ID(op) + ".out2";
auto matrix_nms_mutable_prim_r_second =
cldnn::mutable_data(matrix_nms_id_r_second, {matrixNmsLayerName}, shared_memory.back());
p.add_primitive(*op, matrix_nms_mutable_prim_r_second);
}
} // anonymous namespace
REGISTER_FACTORY_IMPL(internal, NmsStaticShapeIE8);
} // namespace intel_gpu
} // namespace ov

View File

@ -70,6 +70,7 @@
#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp>
#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
#include <transformations/op_conversions/convert_gather_0d.hpp>
@ -165,6 +166,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertGP9ToGPIEInternal>();
manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
manager.register_pass<ngraph::pass::ConvertGather0D>();
manager.register_pass<ngraph::pass::ConvertPriorBox8To0, false>();

View File

@ -0,0 +1,673 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <intel_gpu/graph/network.hpp>
#include <intel_gpu/graph/topology.hpp>
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/matrix_nms.hpp>
#include <intel_gpu/primitives/mutable_data.hpp>
#include <intel_gpu/runtime/memory.hpp>
#include "test_utils.h"
using namespace cldnn;
using namespace tests;
namespace {
#define PAD -1.0
#define PADI -1
#define THRESHOLD 1e-3f
template <class T>
std::vector<T> convert(const std::vector<float>& v) {
return {v.begin(), v.end()};
}
struct matrix_nms_test_inputs {
int num_butches;
int num_boxes;
int num_classes;
int num_selected_boxes;
bool sort_result_across_batch;
float score_threshold;
int nms_top_k;
int keep_top_k;
int background_class;
float gaussian_sigma;
float post_threshold;
bool normalized;
std::vector<float> boxes_values;
std::vector<float> scores_values;
std::vector<float> expected_output;
std::vector<int> expected_selected_boxes;
std::vector<int> expected_valid_outputs;
matrix_nms::sort_result_type sort_result_type;
matrix_nms::decay_function decay_function;
std::string test_name;
};
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type>;
template <class T>
struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_params> {
public:
void test() {
format::type blocked_format;
matrix_nms_test_inputs test_inputs;
std::tie(test_inputs, blocked_format) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
const auto data_type = type_to_data_type<T>::value;
const auto plain_format = format::bfyx;
auto& engine = get_test_engine();
auto boxes = engine.allocate_memory(
{data_type, plain_format, tensor{test_inputs.num_butches, test_inputs.num_boxes, 1, 4}});
auto scores = engine.allocate_memory(
{data_type,
plain_format,
tensor{test_inputs.num_butches, test_inputs.num_classes, 1, test_inputs.num_boxes}});
auto selected_boxes =
engine.allocate_memory({data_types::i32, plain_format, tensor{test_inputs.num_selected_boxes, 1, 1, 1}});
auto valid_outputs =
engine.allocate_memory({data_types::i32, plain_format, tensor{test_inputs.num_butches, 1, 1, 1}});
set_values(boxes, convert<T>(test_inputs.boxes_values));
set_values(scores, convert<T>(test_inputs.scores_values));
const matrix_nms::attributes attrs(test_inputs.sort_result_type,
test_inputs.sort_result_across_batch,
test_inputs.score_threshold,
test_inputs.nms_top_k,
test_inputs.keep_top_k,
test_inputs.background_class,
test_inputs.decay_function,
test_inputs.gaussian_sigma,
test_inputs.post_threshold,
test_inputs.normalized);
topology topology;
topology.add(input_layout("boxes", boxes->get_layout()));
topology.add(input_layout("scores", scores->get_layout()));
topology.add(mutable_data("selected_boxes", selected_boxes));
topology.add(mutable_data("valid_outputs", valid_outputs));
topology.add(reorder("reordered_boxes", "boxes", blocked_format, data_type));
topology.add(reorder("reordered_scores", "scores", blocked_format, data_type));
topology.add(matrix_nms("reordered_matrix_nms",
"reordered_boxes",
"reordered_scores",
"selected_boxes",
"valid_outputs",
attrs));
topology.add(reorder("matrix_nms", "reordered_matrix_nms", plain_format, data_type));
network network(engine, topology);
network.set_input_data("boxes", boxes);
network.set_input_data("scores", scores);
auto outputs = network.execute();
auto output = outputs.at("matrix_nms").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
cldnn::mem_lock<int> selected_boxes_ptr(selected_boxes, get_test_stream());
cldnn::mem_lock<int> valid_outputs_ptr(valid_outputs, get_test_stream());
const auto expected_output = convert<T>(test_inputs.expected_output);
ASSERT_EQ(expected_output.size(), output_ptr.size());
for (size_t i = 0; i < expected_output.size(); ++i) {
EXPECT_NEAR(expected_output[i], output_ptr[i], THRESHOLD);
}
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
EXPECT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
}
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
EXPECT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
}
}
static std::string PrintToStringParamName(const testing::TestParamInfo<matrix_nms_test_params>& info) {
auto& test_inputs = std::get<0>(info.param);
std::ostringstream result;
auto sort_res_type_str =
test_inputs.sort_result_type == matrix_nms::sort_result_type::score
? "score"
: test_inputs.sort_result_type == matrix_nms::sort_result_type::class_id ? "class_id" : "none";
auto decay_function_str =
test_inputs.decay_function == matrix_nms::decay_function::linear
? "linear"
: test_inputs.decay_function == matrix_nms::decay_function::gaussian ? "gaussian" : "none";
result << "SortResultAcrossBatch=" << bool_to_str(test_inputs.sort_result_across_batch) << "_";
result << "ScoreThreshold=" << test_inputs.score_threshold << "_";
result << "NmsTopK=" << test_inputs.nms_top_k << "_";
result << "KeepTopK=" << test_inputs.keep_top_k << "_";
result << "BackgroundClass=" << test_inputs.background_class << "_";
result << "GaussianSigma=" << test_inputs.gaussian_sigma << "_";
result << "PostThreshold=" << test_inputs.post_threshold << "_";
result << "Normalized=" << bool_to_str(test_inputs.normalized) << "_";
result << "sort_result_type=" << sort_res_type_str << "_";
result << "decay_function=" << decay_function_str << "_";
result << "Format=" << fmt_to_str(std::get<1>(info.param));
if (!test_inputs.test_name.empty())
result << "_TN=" << test_inputs.test_name;
return result.str();
}
};
matrix_nms_test_inputs get_matrix_nms_smoke_inputs() {
return {1, // num_butches
6, // num_boxes
2, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
0, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
std::vector<float>{1.00, // expected_output
0.95,
0.00,
0.00,
1.00,
1.00,
1.00,
0.8,
0.00,
10.00,
1.00,
11.00,
1.00,
0.13636364,
0.0,
0.1,
1.0,
1.1},
std::vector<int>{0, 3, 1}, // expected_selected_boxes
std::vector<int>{3}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"smoke"};
}
matrix_nms_test_inputs get_matrix_nms_gaussian_inputs() {
return {1, // num_butches
6, // num_boxes
2, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
0, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
std::vector<float>{1.00, // expected_output
0.95,
0.00,
0.00,
1.00,
1.00,
1.00,
0.8,
0.00,
10.00,
1.00,
11.00,
1.00,
0.1966116,
0.0,
0.1,
1.0,
1.1},
std::vector<int>{0, 3, 1}, // expected_selected_boxes
std::vector<int>{3}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::gaussian, // decay_function
"gaussian"};
}
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_inputs() {
return {2, // num_butches
6, // num_boxes
2, // num_classes
6, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
0, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
std::vector<float>{1.00, 0.95, 0.00, 0.00, 1.00, 1.00, // expected_output
1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1,
1.0, 1.1, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8,
0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
std::vector<int>{0, 3, 1, 6, 9, 7}, // expected_selected_boxes
std::vector<int>{3, 3}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"two_batches_two_classes"};
}
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs() {
return {2, // num_butches
6, // num_boxes
2, // num_classes
12, // num_selected_boxes
true, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.5f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
std::vector<float>{0.00, 0.95, 0.00, 10.00, 1.00, 11.00, // expected_output
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 1.00, 0.95,
0.00, 0.00, 1.00, 1.00, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD,
PAD, PAD, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00,
1.00, 0.80, 0.00, 10.00, 1.00, 11.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00, PAD, PAD,
PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD},
std::vector<int>{3, 0, 9, 6, PADI, PADI, 0, 6, 3, 9, PADI, PADI}, // expected_selected_boxes
std::vector<int>{4, 4}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"two_batches_two_classes_by_score_cross_batch"};
}
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs() {
return {2, // num_butches
6, // num_boxes
2, // num_classes
12, // num_selected_boxes
true, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.5f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
std::vector<float>{0.00, 0.95, 0.00, 10.00, 1.00, 11.00, // expected_output
0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90,
0.00, 0.00, 1.00, 1.00, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD,
PAD, PAD, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00, PAD, PAD,
PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD},
std::vector<int>{3, 0, 9, 6, PADI, PADI, 0, 3, 6, 9, PADI, PADI}, // expected_selected_boxes
std::vector<int>{4, 4}, // expected_valid_output
matrix_nms::sort_result_type::class_id, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_two_batches_two_classes_by_classid_cross_batch"};
}
matrix_nms_test_inputs get_matrix_nms_by_keep_top_k_inputs() {
return {2, // num_butches
6, // num_boxes
2, // num_classes
6, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
3, // keep_top_k
0, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
std::vector<float>{1.00, 0.95, 0.00, 0.00, 1.00, 1.00, // expected_output
1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1,
1.0, 1.1, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8,
0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
std::vector<int>{0, 3, 1, 6, 9, 7}, // expected_selected_boxes
std::vector<int>{3, 3}, // expected_valid_output
matrix_nms::sort_result_type::class_id, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_by_keep_top_k"};
}
matrix_nms_test_inputs get_matrix_nms_background_inputs() {
return {1, // num_butches
6, // num_boxes
2, // num_classes
6, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
std::vector<float>{0.00, 0.95, 0.0, 10.0, 1.0, 11.0, // expected_output
1.00, 0.95, 0.0, 0.0, 1.0, 1.0, 0.00, 0.9, 0.0, 0.0,
1.0, 1.0, 1.00, 0.8, 0.0, 10.0, 1.0, 11.0, 0.00, 0.13636364,
0.0, 0.1, 1.0, 1.1, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
std::vector<int>{3, 0, 0, 3, 1, 1}, // expected_selected_boxes
std::vector<int>{6}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_background"};
}
matrix_nms_test_inputs get_matrix_nms_flipped_coordinates_inputs() {
return {1, // num_butches
6, // num_boxes
1, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 1.0, 1.1, 0.0, 0.9, 1.0, -0.1, // boxes
0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
std::vector<float>{0.00,
0.95,
0.0,
10.0,
1.0,
11.0, // expected_output
0.00,
0.9,
1.0,
1.0,
0.0,
0.0,
0.00,
0.75,
0.0,
0.1,
1.0,
1.1},
std::vector<int>{3, 0, 1}, // expected_selected_boxes
std::vector<int>{3}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"flipped_coordinates"};
}
matrix_nms_test_inputs get_matrix_nms_post_threshold_inputs() {
return {1, // num_butches
6, // num_boxes
1, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.8f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
std::vector<float>{0.00,
0.95,
0.00,
10.00,
1.00,
11.00, // expected_output
0.00,
0.9,
0.00,
0.00,
1.00,
1.00,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD},
std::vector<int>{3, 0, PADI}, // expected_selected_boxes
std::vector<int>{2}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"post_threshold"};
}
matrix_nms_test_inputs get_matrix_nms_identical_boxes_inputs() {
return {1, // num_butches
10, // num_boxes
1, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.3f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, // boxes
1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0},
std::vector<float>{0.4, 0.01, 0.2, 0.09, 0.15, 0.05, 0.02, 0.03, 0.05, 0.0}, // scores
std::vector<float>{0.00,
0.40,
0.00,
0.00,
1.00,
1.00, // expected_output
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD},
std::vector<int>{0, PADI, PADI}, // expected_selected_boxes
std::vector<int>{1}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"identical_boxes"};
};
matrix_nms_test_inputs get_matrix_nms_top_k_inputs() {
return {1, // num_butches
6, // num_boxes
1, // num_classes
2, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
2, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
std::vector<float>{0.00,
0.95,
0.00,
10.00,
1.00,
11.00, // expected_output
0.00,
0.90,
0.00,
0.00,
1.00,
1.00},
std::vector<int>{3, 0}, // expected_selected_boxes
std::vector<int>{2}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_nms_top_k"};
}
matrix_nms_test_inputs get_matrix_nms_single_box_inputs() {
return {1, // num_butches
1, // num_boxes
1, // num_classes
1, // num_selected_boxes
false, // sort_result_across_bch
0.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0}, // boxes
std::vector<float>{0.9}, // scores
std::vector<float>{0.00, 0.90, 0.00, 0.00, 1.00, 1.00}, // expected_output
std::vector<int>{0}, // expected_selected_boxes
std::vector<int>{1}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_single_box"};
}
matrix_nms_test_inputs get_matrix_nms_no_output_inputs() {
return {1, // num_butches
6, // num_boxes
1, // num_classes
3, // num_selected_boxes
false, // sort_result_across_bch
2.0f, // score_threshold
3, // nms_top_k
-1, // keep_top_k
-1, // background_class
2.0f, // gaussian_sigma
0.0f, // post_threshold
true, // normalized
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
std::vector<float>{PAD,
PAD,
PAD,
PAD,
PAD,
PAD, // expected_output
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD,
PAD},
std::vector<int>{PADI, PADI, PADI}, // expected_selected_boxes
std::vector<int>{0}, // expected_valid_output
matrix_nms::sort_result_type::score, // sort_result_type
matrix_nms::decay_function::linear, // decay_function
"matrix_nms_no_output"};
}
const std::vector<format::type> layout_formats = {format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv32,
format::bs_fs_yx_bsv32_fsv16};
#define INSTANTIATE_MATRIX_NMS_TEST_SUITE(input_type, func) \
using matrix_nms_gpu_test_##input_type##func = matrix_nms_gpu_test<input_type>; \
TEST_P(matrix_nms_gpu_test_##input_type##func, test) { \
test(); \
} \
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_##input_type##func, \
matrix_nms_gpu_test_##input_type##func, \
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats)), \
matrix_nms_gpu_test_##input_type##func::PrintToStringParamName);
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_smoke_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_gaussian_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_by_keep_top_k_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_background_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_flipped_coordinates_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_post_threshold_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_identical_boxes_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_top_k_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_single_box_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_no_output_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_smoke_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_gaussian_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_by_keep_top_k_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_background_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_flipped_coordinates_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_post_threshold_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_identical_boxes_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_top_k_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_single_box_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_no_output_inputs)
#undef INSTANTIATE_MATRIX_NMS_TEST_SUITE
} // namespace

View File

@ -0,0 +1,48 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/matrix_nms.hpp"
#include <tuple>
#include <vector>
#include "common_test_utils/test_constants.hpp"
using namespace ov::test::subgraph;
using namespace InferenceEngine;
using namespace ngraph;
const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {{{3, 100, 4}, {3, 1, 100}},
{{1, 10, 4}, {1, 100, 10}}};
const auto inputPrecisions = InputPrecisions{ov::element::f32, ov::element::i32, ov::element::f32};
const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
op::v8::MatrixNms::SortResultType::SCORE,
op::v8::MatrixNms::SortResultType::NONE};
const std::vector<element::Type> outType = {element::i32, element::i64};
const std::vector<TopKParams> topKParams = {TopKParams{-1, 5}, TopKParams{100, -1}};
const std::vector<ThresholdParams> thresholdParams = {ThresholdParams{0.0f, 2.0f, 0.0f},
ThresholdParams{0.1f, 1.5f, 0.2f}};
const std::vector<int> backgroudClass = {-1, 1};
const std::vector<bool> normalized = {true, false};
const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
op::v8::MatrixNms::DecayFunction::LINEAR};
const auto nmsParamsStatic =
::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
::testing::Values(inputPrecisions),
::testing::ValuesIn(sortResultType),
::testing::ValuesIn(outType),
::testing::ValuesIn(topKParams),
::testing::ValuesIn(thresholdParams),
::testing::ValuesIn(backgroudClass),
::testing::ValuesIn(normalized),
::testing::ValuesIn(decayFunction),
::testing::Values(CommonTestUtils::DEVICE_GPU));
INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_static,
MatrixNmsLayerTest,
nmsParamsStatic,
MatrixNmsLayerTest::getTestCaseName);

View File

@ -325,7 +325,10 @@ void MatrixNmsLayerTest::SetUp() {
const auto paramOuts =
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], m_attrs);
if (!m_outStaticShape) {
if (targetDevice == CommonTestUtils::DEVICE_GPU) {
function = std::make_shared<Function>(nms, params, "MatrixNMS");
} else if (!m_outStaticShape) {
auto result = std::make_shared<opset5::Result>(nms);
function = std::make_shared<Function>(result, params, "MatrixNMS");
} else {