[GPU] Matrix NMS (#13137)
This commit is contained in:
parent
f8b12b4f1a
commit
c6528ee4ea
@ -14,6 +14,7 @@
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph_ops/nms_static_shape_ie.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type) {
|
||||
MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE);
|
||||
@ -46,13 +47,13 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool
|
||||
|
||||
if (nms->output(1).get_element_type() != output_1.get_element_type()) {
|
||||
output_1 = std::make_shared<opset1::Convert>(output_1, nms->output(1).get_element_type());
|
||||
output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1");
|
||||
output_1.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(1)));
|
||||
new_ops.emplace_back(output_1.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
if (nms->output(2).get_element_type() != output_2.get_element_type()) {
|
||||
output_2 = std::make_shared<opset1::Convert>(output_2, nms->output(2).get_element_type());
|
||||
output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2");
|
||||
output_2.get_node_shared_ptr()->set_friendly_name(op::util::create_ie_output_name(nms->output(2)));
|
||||
new_ops.emplace_back(output_2.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
|
@ -247,3 +247,4 @@ REGISTER_FACTORY(v9, Eye);
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
REGISTER_FACTORY(internal, GenerateProposalsIEInternal);
|
||||
REGISTER_FACTORY(internal, NmsStaticShapeIE8);
|
||||
|
@ -0,0 +1,154 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/op/matrix_nms.hpp"
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief Performs matrix nms of input boxes and returns indices of selected boxes.
|
||||
struct matrix_nms : public primitive_base<matrix_nms> {
|
||||
CLDNN_DECLARE_PRIMITIVE(matrix_nms)
|
||||
|
||||
enum decay_function { gaussian, linear };
|
||||
|
||||
enum sort_result_type {
|
||||
class_id, // sort selected boxes by class id (ascending) in each batch element
|
||||
score, // sort selected boxes by score (descending) in each batch element
|
||||
none // do not guarantee the order in each batch element
|
||||
};
|
||||
|
||||
/// \brief Structure that specifies attributes of the operation
|
||||
struct attributes {
|
||||
// specifies order of output elements
|
||||
sort_result_type sort_type = sort_result_type::none;
|
||||
// specifies whenever it is necessary to sort selected boxes across batches or not
|
||||
bool sort_result_across_batch = false;
|
||||
// specifies minimum score to consider box for the processing
|
||||
float score_threshold = 0.0f;
|
||||
// specifies maximum number of boxes to be selected per class, -1 meaning to
|
||||
// keep all boxes
|
||||
int nms_top_k = -1;
|
||||
// specifies maximum number of boxes to be selected per batch element, -1
|
||||
// meaning to keep all boxes
|
||||
int keep_top_k = -1;
|
||||
// specifies the background class id, -1 meaning to keep all classes
|
||||
int background_class = -1;
|
||||
// specifies decay function used to decay scores
|
||||
decay_function decay = decay_function::linear;
|
||||
// specifies gaussian_sigma parameter for gaussian decay_function
|
||||
float gaussian_sigma = 2.0f;
|
||||
// specifies threshold to filter out boxes with low confidence score after
|
||||
// decaying
|
||||
float post_threshold = 0.0f;
|
||||
// specifies whether boxes are normalized or not
|
||||
bool normalized = true;
|
||||
|
||||
attributes() {}
|
||||
|
||||
attributes(const ngraph::op::v8::MatrixNms::Attributes& attrs)
|
||||
: attributes(from(attrs.sort_result_type),
|
||||
attrs.sort_result_across_batch,
|
||||
attrs.score_threshold,
|
||||
attrs.nms_top_k,
|
||||
attrs.keep_top_k,
|
||||
attrs.background_class,
|
||||
from(attrs.decay_function),
|
||||
attrs.gaussian_sigma,
|
||||
attrs.post_threshold,
|
||||
attrs.normalized) {}
|
||||
|
||||
attributes(sort_result_type sort_type,
|
||||
bool sort_result_across_batch,
|
||||
float score_threshold,
|
||||
int nms_top_k,
|
||||
int keep_top_k,
|
||||
int background_class,
|
||||
decay_function decay,
|
||||
float gaussian_sigma,
|
||||
float post_threshold,
|
||||
bool normalized)
|
||||
: sort_type(sort_type),
|
||||
sort_result_across_batch(sort_result_across_batch),
|
||||
score_threshold(score_threshold),
|
||||
nms_top_k(nms_top_k),
|
||||
keep_top_k(keep_top_k),
|
||||
background_class(background_class),
|
||||
decay(decay),
|
||||
gaussian_sigma(gaussian_sigma),
|
||||
post_threshold(post_threshold),
|
||||
normalized(normalized) {}
|
||||
};
|
||||
|
||||
/// @brief Constructs matrix_nms primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param boxes primitive id.
|
||||
/// @param scores primitive id.
|
||||
/// @param second_output primitive id.
|
||||
/// @param third_output primitive id.
|
||||
/// @param attrs attributes.
|
||||
matrix_nms(const primitive_id& id,
|
||||
const primitive_id& boxes,
|
||||
const primitive_id& scores,
|
||||
const primitive_id& second_output,
|
||||
const primitive_id& third_output,
|
||||
const matrix_nms::attributes& attrs)
|
||||
: primitive_base(id, {boxes, scores, second_output, third_output}),
|
||||
attribs(attrs) {}
|
||||
|
||||
/// @brief Constructs matrix_nms primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param boxes primitive id.
|
||||
/// @param scores primitive id.
|
||||
/// @param second_output primitive id.
|
||||
/// @param third_output primitive id.
|
||||
/// @param attrs operation attributes.
|
||||
matrix_nms(const primitive_id& id,
|
||||
const primitive_id& boxes,
|
||||
const primitive_id& scores,
|
||||
const primitive_id& second_output,
|
||||
const primitive_id& third_output,
|
||||
const ngraph::op::v8::MatrixNms::Attributes& attrs)
|
||||
: primitive_base(id, {boxes, scores, second_output, third_output}),
|
||||
attribs(attrs) {}
|
||||
|
||||
attributes attribs;
|
||||
|
||||
private:
|
||||
static cldnn::matrix_nms::decay_function from(ngraph::op::v8::MatrixNms::DecayFunction decay) {
|
||||
switch (decay) {
|
||||
case ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN:
|
||||
return cldnn::matrix_nms::decay_function::gaussian;
|
||||
case ngraph::op::v8::MatrixNms::DecayFunction::LINEAR:
|
||||
default:
|
||||
return cldnn::matrix_nms::decay_function::linear;
|
||||
}
|
||||
}
|
||||
|
||||
static cldnn::matrix_nms::sort_result_type from(ngraph::op::v8::MatrixNms::SortResultType type) {
|
||||
switch (type) {
|
||||
case ngraph::op::v8::MatrixNms::SortResultType::CLASSID:
|
||||
return cldnn::matrix_nms::sort_result_type::class_id;
|
||||
case ngraph::op::v8::MatrixNms::SortResultType::SCORE:
|
||||
return cldnn::matrix_nms::sort_result_type::score;
|
||||
case ngraph::op::v8::MatrixNms::SortResultType::NONE:
|
||||
default:
|
||||
return cldnn::matrix_nms::sort_result_type::none;
|
||||
}
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
117
src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp
Normal file
117
src/plugins/intel_gpu/src/graph/impls/ocl/matrix_nms.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "data_inst.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "matrix_nms/matrix_nms_kernel_ref.h"
|
||||
#include "matrix_nms/matrix_nms_kernel_selector.h"
|
||||
#include "matrix_nms_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
kernel_selector::matrix_nms_params::decay_function from(matrix_nms::decay_function decay) {
|
||||
switch (decay) {
|
||||
case matrix_nms::decay_function::gaussian:
|
||||
return kernel_selector::matrix_nms_params::decay_function::GAUSSIAN;
|
||||
default:
|
||||
case matrix_nms::decay_function::linear:
|
||||
return kernel_selector::matrix_nms_params::decay_function::LINEAR;
|
||||
}
|
||||
}
|
||||
|
||||
kernel_selector::matrix_nms_params::sort_result_type from(matrix_nms::sort_result_type type) {
|
||||
switch (type) {
|
||||
case matrix_nms::sort_result_type::class_id:
|
||||
return kernel_selector::matrix_nms_params::sort_result_type::CLASS_ID;
|
||||
case matrix_nms::sort_result_type::score:
|
||||
return kernel_selector::matrix_nms_params::sort_result_type::SCORE;
|
||||
default:
|
||||
case matrix_nms::sort_result_type::none:
|
||||
return kernel_selector::matrix_nms_params::sort_result_type::NONE;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct matrix_nms_impl : typed_primitive_impl_ocl<matrix_nms> {
|
||||
using parent = typed_primitive_impl_ocl<matrix_nms>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<matrix_nms_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(matrix_nms_inst& instance, int32_t) const override {
|
||||
kernel_arguments_data args;
|
||||
args.inputs = {instance.input_boxes_mem(),
|
||||
instance.input_scores_mem(),
|
||||
instance.input_selected_boxes_mem(),
|
||||
instance.input_valid_outputs_mem()};
|
||||
args.outputs = {instance.output_memory_ptr()};
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const matrix_nms_node& node, const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::matrix_nms_params>(impl_param);
|
||||
auto optional_params =
|
||||
get_default_optional_params<kernel_selector::matrix_nms_optional_params>(node.get_program());
|
||||
|
||||
const auto& scores_layout = impl_param.get_input_layout(1);
|
||||
const auto& second_output_layout = impl_param.get_input_layout(2);
|
||||
const auto& third_output_layout = impl_param.get_input_layout(3);
|
||||
|
||||
params.inputs.push_back(convert_data_tensor(scores_layout));
|
||||
params.inputs.push_back(convert_data_tensor(second_output_layout));
|
||||
params.inputs.push_back(convert_data_tensor(third_output_layout));
|
||||
|
||||
const auto& primitive = node.get_primitive();
|
||||
params.sort_type = from(primitive->attribs.sort_type);
|
||||
params.sort_result_across_batch = primitive->attribs.sort_result_across_batch;
|
||||
params.score_threshold = primitive->attribs.score_threshold;
|
||||
params.nms_top_k = primitive->attribs.nms_top_k;
|
||||
params.keep_top_k = primitive->attribs.keep_top_k;
|
||||
params.background_class = primitive->attribs.background_class;
|
||||
params.decay = from(primitive->attribs.decay);
|
||||
params.gaussian_sigma = primitive->attribs.gaussian_sigma;
|
||||
params.post_threshold = primitive->attribs.post_threshold;
|
||||
params.normalized = primitive->attribs.normalized;
|
||||
|
||||
auto& kernel_selector = kernel_selector::matrix_nms_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(node.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this nodeuments");
|
||||
|
||||
auto matrix_nms_node = new matrix_nms_impl(node, best_kernels[0]);
|
||||
|
||||
return matrix_nms_node;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_matrix_nms_impl::attach_matrix_nms_impl() {
|
||||
auto types = {data_types::f16, data_types::f32, data_types::i32};
|
||||
|
||||
auto formats = {format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32};
|
||||
|
||||
implementation_map<matrix_nms>::add(impl_types::ocl, matrix_nms_impl::create, types, formats);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -50,6 +50,7 @@ void register_implementations() {
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(non_max_suppression);
|
||||
REGISTER_OCL(matrix_nms);
|
||||
REGISTER_OCL(normalize);
|
||||
REGISTER_OCL(one_hot);
|
||||
REGISTER_OCL(permute);
|
||||
|
@ -131,6 +131,7 @@ REGISTER_OCL(max_unpooling);
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(non_max_suppression);
|
||||
REGISTER_OCL(matrix_nms);
|
||||
REGISTER_OCL(normalize);
|
||||
REGISTER_OCL(one_hot);
|
||||
REGISTER_OCL(permute);
|
||||
|
43
src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h
Normal file
43
src/plugins/intel_gpu/src/graph/include/matrix_nms_inst.h
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "intel_gpu/primitives/matrix_nms.hpp"
|
||||
#include "primitive_inst.h"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
using matrix_nms_node = typed_program_node<matrix_nms>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<matrix_nms> : public typed_primitive_inst_base<matrix_nms> {
|
||||
using parent = typed_primitive_inst_base<matrix_nms>;
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network& network, const matrix_nms_node& node) : parent(network, node) {}
|
||||
|
||||
static layout calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param);
|
||||
static std::string to_string(const matrix_nms_node& node);
|
||||
|
||||
memory::ptr input_boxes_mem() const {
|
||||
return dep_memory_ptr(0);
|
||||
}
|
||||
memory::ptr input_scores_mem() const {
|
||||
return dep_memory_ptr(1);
|
||||
}
|
||||
memory::ptr input_selected_boxes_mem() const {
|
||||
return dep_memory_ptr(2);
|
||||
}
|
||||
memory::ptr input_valid_outputs_mem() const {
|
||||
return dep_memory_ptr(3);
|
||||
}
|
||||
};
|
||||
|
||||
using matrix_nms_inst = typed_primitive_inst<matrix_nms>;
|
||||
|
||||
} // namespace cldnn
|
88
src/plugins/intel_gpu/src/graph/matrix_nms.cpp
Normal file
88
src/plugins/intel_gpu/src/graph/matrix_nms.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include <json_object.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "matrix_nms_inst.h"
|
||||
#include "openvino/core/enum_names.hpp"
|
||||
#include "primitive_type_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
primitive_type_id matrix_nms::type_id() {
|
||||
static primitive_type_base<matrix_nms> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
layout matrix_nms_inst::calc_output_layout(const matrix_nms_node& node, const kernel_impl_params& impl_param) {
|
||||
const auto primitive = impl_param.typed_desc<matrix_nms>();
|
||||
const auto boxes_layout = impl_param.get_input_layout(0);
|
||||
const auto scores_layout = impl_param.get_input_layout(1);
|
||||
|
||||
const auto batches_num = boxes_layout.batch();
|
||||
auto classes_num = scores_layout.feature();
|
||||
const auto boxes_num = boxes_layout.feature();
|
||||
|
||||
if (primitive->attribs.background_class >= 0 && primitive->attribs.background_class < classes_num)
|
||||
classes_num = std::max(1, classes_num - 1);
|
||||
|
||||
int max_output_boxes_per_class{boxes_num};
|
||||
if (primitive->attribs.nms_top_k >= 0)
|
||||
max_output_boxes_per_class = std::min(max_output_boxes_per_class, primitive->attribs.nms_top_k);
|
||||
|
||||
auto max_output_boxes_per_batch = max_output_boxes_per_class * classes_num;
|
||||
if (primitive->attribs.keep_top_k >= 0)
|
||||
max_output_boxes_per_batch = std::min(max_output_boxes_per_batch, primitive->attribs.keep_top_k);
|
||||
|
||||
auto output_num = max_output_boxes_per_batch * batches_num;
|
||||
|
||||
// BOX_DATA: class_id, box_score, xmin, ymin, xmax, ymax
|
||||
constexpr size_t BOX_DATA{6};
|
||||
return layout(boxes_layout.data_type, boxes_layout.format, {output_num, BOX_DATA, 1, 1});
|
||||
}
|
||||
|
||||
std::string matrix_nms_inst::to_string(const matrix_nms_node& node) {
|
||||
json_composite matrix_nms_info;
|
||||
matrix_nms_info.add("boxes id", node.input().id());
|
||||
matrix_nms_info.add("scores id", node.get_dependency(1).id());
|
||||
matrix_nms_info.add("sort_result_type", ov::as_string(node.get_primitive()->attribs.sort_type));
|
||||
matrix_nms_info.add("decay_function", ov::as_string(node.get_primitive()->attribs.decay));
|
||||
matrix_nms_info.add("sort_result_across_batch", node.get_primitive()->attribs.sort_result_across_batch);
|
||||
matrix_nms_info.add("score_threshold", node.get_primitive()->attribs.score_threshold);
|
||||
matrix_nms_info.add("nms_top_k", node.get_primitive()->attribs.nms_top_k);
|
||||
matrix_nms_info.add("keep_top_k", node.get_primitive()->attribs.keep_top_k);
|
||||
matrix_nms_info.add("background_class", node.get_primitive()->attribs.background_class);
|
||||
matrix_nms_info.add("gaussian_sigma", node.get_primitive()->attribs.gaussian_sigma);
|
||||
matrix_nms_info.add("post_threshold", node.get_primitive()->attribs.post_threshold);
|
||||
matrix_nms_info.add("normalized", node.get_primitive()->attribs.normalized);
|
||||
|
||||
auto node_info = node.desc_to_json();
|
||||
node_info->add("matrix_nms info", matrix_nms_info);
|
||||
std::stringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
||||
namespace ov {
|
||||
template <>
|
||||
EnumNames<matrix_nms::decay_function>& EnumNames<matrix_nms::decay_function>::get() {
|
||||
static auto enum_names = EnumNames<matrix_nms::decay_function>(
|
||||
"decay_function",
|
||||
{{"gaussian", matrix_nms::decay_function::gaussian}, {"linear", matrix_nms::decay_function::linear}});
|
||||
return enum_names;
|
||||
}
|
||||
|
||||
template <>
|
||||
EnumNames<matrix_nms::sort_result_type>& EnumNames<matrix_nms::sort_result_type>::get() {
|
||||
static auto enum_names =
|
||||
EnumNames<matrix_nms::sort_result_type>("sort_result_type",
|
||||
{{"class_id", matrix_nms::sort_result_type::class_id},
|
||||
{"score", matrix_nms::sort_result_type::score},
|
||||
{"none", matrix_nms::sort_result_type::none}});
|
||||
return enum_names;
|
||||
}
|
||||
} // namespace ov
|
@ -20,6 +20,7 @@
|
||||
#include "sliding_window_utils.hpp"
|
||||
#include "program_helpers.h"
|
||||
|
||||
#include "matrix_nms_inst.h"
|
||||
#include "roi_pooling_inst.h"
|
||||
#include "reorg_yolo_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
@ -1442,6 +1443,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::ctc_loss::type_id() &&
|
||||
prim.type() != cldnn::non_max_suppression::type_id() &&
|
||||
prim.type() != cldnn::roi_align::type_id() &&
|
||||
prim.type() != cldnn::matrix_nms::type_id() &&
|
||||
prim.type() != cldnn::adaptive_pooling::type_id() &&
|
||||
prim.type() != cldnn::bucketize::type_id() &&
|
||||
prim.type() != cldnn::roll::type_id() &&
|
||||
@ -1487,6 +1489,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::ctc_loss::type_id() &&
|
||||
prim.type() != cldnn::non_max_suppression::type_id() &&
|
||||
prim.type() != cldnn::roi_align::type_id() &&
|
||||
prim.type() != cldnn::matrix_nms::type_id() &&
|
||||
prim.type() != cldnn::adaptive_pooling::type_id() &&
|
||||
prim.type() != cldnn::bucketize::type_id() &&
|
||||
prim.type() != cldnn::roll::type_id() &&
|
||||
|
@ -0,0 +1,333 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define NUM_BATCHES INPUT0_BATCH_NUM
|
||||
#define NUM_BOXES INPUT0_FEATURE_NUM
|
||||
#define NUM_CLASSES INPUT1_FEATURE_NUM
|
||||
|
||||
typedef struct {
|
||||
int batch_idx;
|
||||
int class_idx;
|
||||
int box_idx;
|
||||
INPUT1_TYPE score;
|
||||
} FUNC(BoxInfo);
|
||||
|
||||
#define BOX_INFO FUNC(BoxInfo)
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
inline INPUT1_TYPE FUNC(decay_gaussian)(INPUT1_TYPE iou, INPUT1_TYPE max_iou) {
|
||||
return exp((max_iou * max_iou - iou * iou) * GAUSSIAN_SIGMA);
|
||||
}
|
||||
|
||||
inline INPUT1_TYPE FUNC(decay_linear)(INPUT1_TYPE iou, INPUT1_TYPE max_iou) {
|
||||
return (INPUT1_VAL_ONE - iou) / (INPUT1_VAL_ONE - max_iou + TINY);
|
||||
}
|
||||
|
||||
inline void FUNC(swap)(int* a, int* b) {
|
||||
int temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
inline void FUNC(sortIterative)(const __global INPUT1_TYPE* scores,
|
||||
const int batchId,
|
||||
const int classId,
|
||||
int* indices,
|
||||
const int size) {
|
||||
for (int i = 1; i <= size; i++) {
|
||||
bool swapped = false;
|
||||
for (int j = 0; j < size - i; j++) {
|
||||
const INPUT1_TYPE score_curr = scores[INPUT1_GET_INDEX(batchId, classId, 0, indices[j])];
|
||||
const INPUT1_TYPE score_next = scores[INPUT1_GET_INDEX(batchId, classId, 0, indices[j + 1])];
|
||||
if (score_curr < score_next) {
|
||||
FUNC_CALL(swap)(&indices[j], &indices[j + 1]);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!swapped)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void FUNC(swap_boxes)(__global BOX_INFO* a, __global BOX_INFO* b) {
|
||||
BOX_INFO temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
inline void FUNC(sortIterativeBoxes)(__global BOX_INFO* boxes, int l, int h) {
|
||||
for (int i = 1; i < h - l; i++) {
|
||||
bool swapped = false;
|
||||
for (int j = l; j < h - i; j++) {
|
||||
if ((boxes[j].score < boxes[j + 1].score) ||
|
||||
(boxes[j].score == boxes[j + 1].score && boxes[j].class_idx > boxes[j + 1].class_idx) ||
|
||||
(boxes[j].score == boxes[j + 1].score && boxes[j].class_idx == boxes[j + 1].class_idx &&
|
||||
boxes[j].box_idx > boxes[j + 1].box_idx)) {
|
||||
FUNC_CALL(swap_boxes)(&boxes[j], &boxes[j + 1]);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!swapped)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void FUNC(sortIterativeBoxesAcrossBatches)(__global BOX_INFO* boxes) {
|
||||
const int size = NUM_BATCHES * NUM_CLASSES * MAX_BOXES_PER_CLASS;
|
||||
for (int i = 1; i < size; i++) {
|
||||
bool swapped = false;
|
||||
for (int j = 0; j < size - i; j++) {
|
||||
__global BOX_INFO* l = boxes + j;
|
||||
__global BOX_INFO* r = boxes + j + 1;
|
||||
// sort by score
|
||||
#if SORT_TYPE == 1
|
||||
if ((l->score < r->score) || (l->score == r->score && l->batch_idx > r->batch_idx) ||
|
||||
(l->score == r->score && l->batch_idx == r->batch_idx && l->class_idx > r->class_idx) ||
|
||||
(l->score == r->score && l->batch_idx == r->batch_idx && l->class_idx == r->class_idx &&
|
||||
l->box_idx > r->box_idx)) {
|
||||
FUNC_CALL(swap_boxes)(l, r);
|
||||
swapped = true;
|
||||
}
|
||||
// sort by class id
|
||||
#elif SORT_TYPE == 0
|
||||
if (r->score != INPUT1_VAL_ZERO &&
|
||||
((l->score == INPUT1_VAL_ZERO) || // case with empty buffer
|
||||
(l->class_idx > r->class_idx) || (l->class_idx == r->class_idx && l->batch_idx > r->batch_idx) ||
|
||||
(l->class_idx == r->class_idx && l->batch_idx == r->batch_idx && l->score < r->score) ||
|
||||
(l->class_idx == r->class_idx && l->batch_idx == r->batch_idx && l->score == r->score &&
|
||||
l->box_idx > r->box_idx))) {
|
||||
FUNC_CALL(swap_boxes)(l, r);
|
||||
swapped = true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!swapped)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline COORD_TYPE_4 FUNC(getBoxCoords)(const __global INPUT0_TYPE* boxes, const short batch, const ushort box_idx) {
|
||||
COORD_TYPE_4 coords = (COORD_TYPE_4)(boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 0)],
|
||||
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 1)],
|
||||
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 2)],
|
||||
boxes[INPUT0_GET_INDEX(batch, box_idx, 0, 3)]);
|
||||
|
||||
// uncomment when flipped coordinates will be fixed in reference impl
|
||||
/*
|
||||
const INPUT0_TYPE x1 = min(coords[0], coords[2]);
|
||||
const INPUT0_TYPE x2 = max(coords[0], coords[2]);
|
||||
const INPUT0_TYPE y1 = min(coords[1], coords[3]);
|
||||
const INPUT0_TYPE y2 = max(coords[1], coords[3]);
|
||||
coords[0] = x1;
|
||||
coords[1] = y1;
|
||||
coords[2] = x2;
|
||||
coords[3] = y2;
|
||||
*/
|
||||
|
||||
return coords;
|
||||
}
|
||||
|
||||
inline INPUT0_TYPE FUNC(area)(const INPUT0_TYPE w, const INPUT0_TYPE h) {
|
||||
return (w + NORM) * (h + NORM);
|
||||
}
|
||||
|
||||
inline INPUT0_TYPE FUNC(areaBox)(const COORD_TYPE_4 box) {
|
||||
if (box[2] < box[0] || box[3] < box[1])
|
||||
return INPUT0_VAL_ZERO;
|
||||
return FUNC_CALL(area)(box[3] - box[1], box[2] - box[0]);
|
||||
}
|
||||
|
||||
inline INPUT0_TYPE FUNC(intersectionOverUnion)(const COORD_TYPE_4 box1, const COORD_TYPE_4 box2) {
|
||||
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || box2[3] < box1[1])
|
||||
return INPUT0_VAL_ZERO;
|
||||
|
||||
const INPUT0_TYPE area = FUNC_CALL(areaBox)(box1);
|
||||
const INPUT0_TYPE areaBox = FUNC_CALL(areaBox)(box2);
|
||||
|
||||
const INPUT0_TYPE intersection_xmin = max(box1[0], box2[0]);
|
||||
const INPUT0_TYPE intersection_ymin = max(box1[1], box2[1]);
|
||||
const INPUT0_TYPE intersection_xmax = min(box1[2], box2[2]);
|
||||
const INPUT0_TYPE intersection_ymax = min(box1[3], box2[3]);
|
||||
|
||||
const INPUT0_TYPE intersection_area =
|
||||
FUNC_CALL(area)(intersection_xmax - intersection_xmin, intersection_ymax - intersection_ymin);
|
||||
const INPUT0_TYPE union_area = area + areaBox - intersection_area;
|
||||
|
||||
return intersection_area / union_area;
|
||||
}
|
||||
|
||||
#ifdef MATRIX_NMS_STAGE_0
|
||||
KERNEL(matrix_nms_ref_stage_0)
|
||||
(const __global INPUT0_TYPE* input_boxes,
|
||||
const __global INPUT1_TYPE* input_scores,
|
||||
__global uchar* buffer0,
|
||||
__global int* selected_boxes_num) {
|
||||
const int batchId = get_global_id(0);
|
||||
const int classId = get_global_id(1);
|
||||
|
||||
if (classId == BACKGROUND_CLASS)
|
||||
return;
|
||||
|
||||
int sorted_score_indices[NUM_BOXES];
|
||||
|
||||
for (int i = 0; i < NUM_BOXES; ++i)
|
||||
sorted_score_indices[i] = i;
|
||||
|
||||
int valid_boxes_num = 0;
|
||||
for (int i = 0; i < NUM_BOXES; i++) {
|
||||
if (input_scores[INPUT1_GET_INDEX(batchId, classId, 0, i)] > SCORE_THRESHOLD)
|
||||
++valid_boxes_num;
|
||||
}
|
||||
|
||||
// TODO: consider faster sorting algorithm
|
||||
FUNC_CALL(sortIterative)(input_scores, batchId, classId, sorted_score_indices, NUM_BOXES);
|
||||
|
||||
valid_boxes_num = min(valid_boxes_num, MAX_BOXES_PER_CLASS);
|
||||
|
||||
const int matrix_size = MAX_BOXES_PER_CLASS < 3 ? 1 : (MAX_BOXES_PER_CLASS * (MAX_BOXES_PER_CLASS - 1)) >> 1;
|
||||
INPUT1_TYPE iou_matrix[matrix_size];
|
||||
INPUT1_TYPE iou_max[MAX_BOXES_PER_CLASS];
|
||||
|
||||
iou_max[0] = INPUT1_VAL_ZERO;
|
||||
for (int i = 1; i < valid_boxes_num; ++i) {
|
||||
INPUT1_TYPE max_iou = INPUT1_VAL_ZERO;
|
||||
const COORD_TYPE_4 box_i = FUNC_CALL(getBoxCoords)(input_boxes, batchId, sorted_score_indices[i]);
|
||||
for (int j = 0; j < i; ++j) {
|
||||
const COORD_TYPE_4 box_j = FUNC_CALL(getBoxCoords)(input_boxes, batchId, sorted_score_indices[j]);
|
||||
const INPUT1_TYPE iou = FUNC_CALL(intersectionOverUnion)(box_i, box_j);
|
||||
|
||||
max_iou = max(iou, max_iou);
|
||||
iou_matrix[i * (i - 1) / 2 + j] = iou;
|
||||
}
|
||||
iou_max[i] = max_iou;
|
||||
}
|
||||
|
||||
const INPUT1_TYPE first_score = input_scores[INPUT1_GET_INDEX(batchId, classId, 0, sorted_score_indices[0])];
|
||||
|
||||
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
|
||||
box_info = &box_info[batchId * NUM_CLASSES * MAX_BOXES_PER_CLASS + classId * MAX_BOXES_PER_CLASS];
|
||||
|
||||
int box_info_counter = 0;
|
||||
if (first_score > POST_THRESHOLD && valid_boxes_num > 0) {
|
||||
box_info[box_info_counter].class_idx = classId;
|
||||
box_info[box_info_counter].score = first_score;
|
||||
box_info[box_info_counter].box_idx = sorted_score_indices[0];
|
||||
box_info[box_info_counter].batch_idx = batchId;
|
||||
++box_info_counter;
|
||||
}
|
||||
|
||||
for (int i = 1; i < valid_boxes_num; ++i) {
|
||||
INPUT1_TYPE min_decay = INPUT1_VAL_ONE;
|
||||
for (int j = 0; j < i; ++j) {
|
||||
INPUT1_TYPE iou = iou_matrix[i * (i - 1) / 2 + j];
|
||||
INPUT1_TYPE decay =
|
||||
DECAY_FUNC == 0 ? FUNC_CALL(decay_gaussian)(iou, iou_max[j]) : FUNC_CALL(decay_linear)(iou, iou_max[j]);
|
||||
min_decay = min(min_decay, decay);
|
||||
}
|
||||
|
||||
INPUT1_TYPE ds = min_decay * input_scores[INPUT1_GET_INDEX(batchId, classId, 0, sorted_score_indices[i])];
|
||||
|
||||
if (ds <= POST_THRESHOLD)
|
||||
continue;
|
||||
|
||||
box_info[box_info_counter].batch_idx = batchId;
|
||||
box_info[box_info_counter].class_idx = classId;
|
||||
box_info[box_info_counter].box_idx = sorted_score_indices[i];
|
||||
box_info[box_info_counter].score = ds;
|
||||
++box_info_counter;
|
||||
}
|
||||
|
||||
selected_boxes_num[batchId * NUM_CLASSES + classId] = box_info_counter;
|
||||
}
|
||||
#endif /* MATRIX_NMS_STAGE_0 */
|
||||
|
||||
#ifdef MATRIX_NMS_STAGE_1
|
||||
KERNEL(matrix_nms_ref_stage_1)
|
||||
(__global INPUT3_TYPE* valid_outputs, __global uchar* buffer0, __global int* selected_boxes_num) {
|
||||
const int batchId = get_global_id(0);
|
||||
|
||||
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
|
||||
|
||||
const int first_idx = batchId * NUM_CLASSES * MAX_BOXES_PER_CLASS;
|
||||
const int last_idx = first_idx + NUM_CLASSES * MAX_BOXES_PER_CLASS;
|
||||
|
||||
// TODO: consider faster sorting algorithm
|
||||
FUNC_CALL(sortIterativeBoxes)(box_info, first_idx, last_idx);
|
||||
|
||||
for (int i = 0; i < NUM_CLASSES; ++i) {
|
||||
if (i == BACKGROUND_CLASS)
|
||||
continue;
|
||||
|
||||
valid_outputs[INPUT3_GET_INDEX(batchId, 0, 0, 0)] += selected_boxes_num[batchId * NUM_CLASSES + i];
|
||||
}
|
||||
}
|
||||
#endif /* MATRIX_NMS_STAGE_1 */
|
||||
|
||||
#ifdef MATRIX_NMS_STAGE_2
|
||||
KERNEL(matrix_nms_ref_stage_2)
|
||||
(const __global INPUT0_TYPE* input_boxes,
|
||||
__global OUTPUT_TYPE* output,
|
||||
__global INPUT2_TYPE* selected_indices,
|
||||
__global INPUT3_TYPE* valid_outputs,
|
||||
__global uchar* buffer0) {
|
||||
__global BOX_INFO* box_info = (__global BOX_INFO*)buffer0;
|
||||
|
||||
// TODO: consider faster sorting algorithm
|
||||
// and index sorting instead of data sorting
|
||||
#if SORT_RESULT_ACROSS_BATCH == 1 && SORT_TYPE != 2
|
||||
FUNC_CALL(sortIterativeBoxesAcrossBatches)(box_info);
|
||||
#endif
|
||||
|
||||
int output_idx = 0;
|
||||
int box_info_idx = 0;
|
||||
for (int i = 0; i < NUM_BATCHES; ++i) {
|
||||
if (KEEP_TOP_K != -1 && KEEP_TOP_K < valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)])
|
||||
valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)] = KEEP_TOP_K;
|
||||
|
||||
#if SORT_RESULT_ACROSS_BATCH == 0
|
||||
box_info_idx = i * NUM_CLASSES * MAX_BOXES_PER_CLASS;
|
||||
#endif
|
||||
|
||||
unroll_for(int j = 0; j < valid_outputs[INPUT3_GET_INDEX(i, 0, 0, 0)]; ++j) {
|
||||
output[OUTPUT_GET_INDEX(output_idx, 0, 0, 0)] = box_info[box_info_idx].class_idx;
|
||||
output[OUTPUT_GET_INDEX(output_idx, 1, 0, 0)] = box_info[box_info_idx].score;
|
||||
output[OUTPUT_GET_INDEX(output_idx, 2, 0, 0)] =
|
||||
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 0)];
|
||||
output[OUTPUT_GET_INDEX(output_idx, 3, 0, 0)] =
|
||||
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 1)];
|
||||
output[OUTPUT_GET_INDEX(output_idx, 4, 0, 0)] =
|
||||
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 2)];
|
||||
output[OUTPUT_GET_INDEX(output_idx, 5, 0, 0)] =
|
||||
input_boxes[INPUT0_GET_INDEX(box_info[box_info_idx].batch_idx, box_info[box_info_idx].box_idx, 0, 3)];
|
||||
|
||||
selected_indices[INPUT2_GET_INDEX(output_idx, 0, 0, 0)] =
|
||||
box_info[box_info_idx].batch_idx * NUM_BOXES + box_info[box_info_idx].box_idx;
|
||||
|
||||
++output_idx;
|
||||
++box_info_idx;
|
||||
}
|
||||
|
||||
// Paddings
|
||||
while (output_idx < (i + 1) * MAX_BOXES_PER_BATCH) {
|
||||
unroll_for(int j = 0; j < 6; ++j) {
|
||||
output[OUTPUT_GET_INDEX(output_idx, j, 0, 0)] = -OUTPUT_VAL_ONE;
|
||||
}
|
||||
selected_indices[INPUT2_GET_INDEX(output_idx, 0, 0, 0)] = -INPUT2_VAL_ONE;
|
||||
++output_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* MATRIX_NMS_STAGE_2 */
|
||||
|
||||
#undef NUM_BATCHES
|
||||
#undef NUM_BOXES
|
||||
#undef NUM_CLASSES
|
||||
#undef BOX_INFO
|
@ -90,6 +90,7 @@ enum class KernelType {
|
||||
CONVERT_COLOR,
|
||||
RANDOM_UNIFORM,
|
||||
ADAPTIVE_POOLING,
|
||||
MATRIX_NMS,
|
||||
REVERSE,
|
||||
PRIOR_BOX,
|
||||
EYE,
|
||||
|
@ -175,6 +175,7 @@ std::string toString(KernelType kt) {
|
||||
return "BINARY_CONVOLUTION";
|
||||
case KernelType::NON_MAX_SUPPRESSION:
|
||||
return "NON_MAX_SUPPRESSION";
|
||||
case KernelType::MATRIX_NMS: return "MATRIX_NMS";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,196 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "matrix_nms_kernel_ref.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey MatrixNmsKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT32);
|
||||
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableAllInputLayout();
|
||||
k.EnableAllOutputLayout();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
MatrixNmsKernelRef::DispatchData SetDefault(const matrix_nms_params& params, size_t idx) {
|
||||
MatrixNmsKernelRef::DispatchData dispatch_data;
|
||||
|
||||
const auto& input_scores = params.inputs[1];
|
||||
if (idx == 0) {
|
||||
dispatch_data.gws = {input_scores.Batch().v, input_scores.Feature().v, 1};
|
||||
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
|
||||
} else if (idx == 1) {
|
||||
dispatch_data.gws = {input_scores.Batch().v, 1, 1};
|
||||
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
|
||||
} else {
|
||||
dispatch_data.gws = {1, 1, 1};
|
||||
dispatch_data.lws = {1, 1, 1};
|
||||
}
|
||||
|
||||
return dispatch_data;
|
||||
}
|
||||
|
||||
std::tuple<int, int> GetMaxBoxes(const matrix_nms_params& params) {
|
||||
const int classes_num = params.inputs[1].Feature().v;
|
||||
const int boxes_num = params.inputs[0].Feature().v;
|
||||
|
||||
int max_boxes_per_class{boxes_num};
|
||||
if (params.nms_top_k >= 0)
|
||||
max_boxes_per_class = std::min(max_boxes_per_class, params.nms_top_k);
|
||||
|
||||
auto classes_num_adj = classes_num;
|
||||
if (params.background_class >= 0 && params.background_class < classes_num)
|
||||
classes_num_adj = std::max(1, classes_num - 1);
|
||||
|
||||
auto max_boxes_per_batch = max_boxes_per_class * classes_num_adj;
|
||||
if (params.keep_top_k >= 0)
|
||||
max_boxes_per_batch = std::min(max_boxes_per_batch, params.keep_top_k);
|
||||
|
||||
return {max_boxes_per_class, max_boxes_per_batch};
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
KernelsData MatrixNmsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
constexpr size_t kernels_num{3};
|
||||
KernelData kernel_data = KernelData::Default<matrix_nms_params>(params, kernels_num);
|
||||
const matrix_nms_params& new_params = dynamic_cast<const matrix_nms_params&>(*kernel_data.params.get());
|
||||
|
||||
constexpr size_t BOX_INFO_SIZE{16};
|
||||
|
||||
const int batches_num = new_params.inputs[1].Batch().v;
|
||||
const int classes_num = new_params.inputs[1].Feature().v;
|
||||
|
||||
int max_boxes_per_class, max_boxes_per_batch;
|
||||
std::tie(max_boxes_per_class, max_boxes_per_batch) = GetMaxBoxes(new_params);
|
||||
|
||||
const size_t box_info_num = batches_num * classes_num * max_boxes_per_class;
|
||||
|
||||
const size_t box_info_buffer_size = box_info_num * BOX_INFO_SIZE;
|
||||
const size_t sel_boxes_num_buffer_size = batches_num * classes_num * sizeof(int);
|
||||
|
||||
kernel_data.internalBufferSizes.push_back(box_info_buffer_size);
|
||||
kernel_data.internalBufferSizes.push_back(sel_boxes_num_buffer_size);
|
||||
kernel_data.internalBufferDataType = Datatype::F32;
|
||||
|
||||
for (size_t i{}; i < kernels_num; ++i) {
|
||||
auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i);
|
||||
auto jit_constants = GetJitConstants(new_params);
|
||||
jit_constants.AddConstant(MakeJitConstant("MATRIX_NMS_STAGE_" + std::to_string(i), "true"));
|
||||
|
||||
jit_constants.AddConstant(MakeJitConstant("MAX_BOXES_PER_CLASS", max_boxes_per_class));
|
||||
jit_constants.AddConstant(MakeJitConstant("MAX_BOXES_PER_BATCH", max_boxes_per_batch));
|
||||
auto jit = CreateJit(kernelName, jit_constants, entry_point);
|
||||
|
||||
DispatchData dispatch_data = SetDefault(new_params, i);
|
||||
auto& kernel = kernel_data.kernels[i];
|
||||
KernelBase::CheckDispatchData(kernelName, dispatch_data, params.engineInfo.maxWorkGroupSize);
|
||||
kernel.params.workGroups.global = dispatch_data.gws;
|
||||
kernel.params.workGroups.local = dispatch_data.lws;
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
|
||||
|
||||
SetKernelArguments(new_params, kernel, i);
|
||||
}
|
||||
|
||||
return {kernel_data};
|
||||
}
|
||||
|
||||
float MatrixNmsKernelRef::GetKernelsPriority(const Params& params, const optional_params& options) const {
|
||||
return FORCE_PRIORITY_9;
|
||||
}
|
||||
|
||||
bool MatrixNmsKernelRef::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::MATRIX_NMS || o.GetType() != KernelType::MATRIX_NMS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const matrix_nms_params& params = static_cast<const matrix_nms_params&>(p);
|
||||
// inputs: boxes, scores, second output, third output
|
||||
if (params.inputs.size() != 4)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants MatrixNmsKernelRef::GetJitConstants(const matrix_nms_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
const auto& boxes = params.inputs[0];
|
||||
switch (boxes.GetDType()) {
|
||||
case Datatype::F32:
|
||||
jit.AddConstant(MakeJitConstant("COORD_TYPE_4", "float4"));
|
||||
jit.AddConstant(MakeJitConstant("TINY", "1e-10f"));
|
||||
break;
|
||||
|
||||
case Datatype::F16:
|
||||
jit.AddConstant(MakeJitConstant("COORD_TYPE_4", "half4"));
|
||||
jit.AddConstant(MakeJitConstant("TINY", "1e-7h"));
|
||||
break;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::invalid_argument("Matrix NMS boxes type should be one of F32 or F16.");
|
||||
}
|
||||
|
||||
jit.AddConstant(MakeJitConstant("SORT_TYPE", params.sort_type));
|
||||
jit.AddConstant(MakeJitConstant("SORT_RESULT_ACROSS_BATCH", params.sort_result_across_batch));
|
||||
jit.AddConstant(MakeJitConstant("SCORE_THRESHOLD", params.score_threshold));
|
||||
jit.AddConstant(MakeJitConstant("KEEP_TOP_K", params.keep_top_k));
|
||||
jit.AddConstant(MakeJitConstant("BACKGROUND_CLASS", params.background_class));
|
||||
jit.AddConstant(MakeJitConstant("DECAY_FUNC", params.decay));
|
||||
jit.AddConstant(MakeJitConstant("GAUSSIAN_SIGMA", params.gaussian_sigma));
|
||||
jit.AddConstant(MakeJitConstant("POST_THRESHOLD", params.post_threshold));
|
||||
jit.AddConstant(MakeJitConstant("NORM", params.normalized ? "INPUT0_VAL_ZERO" : "INPUT0_VAL_ONE"));
|
||||
return jit;
|
||||
}
|
||||
|
||||
void MatrixNmsKernelRef::SetKernelArguments(const matrix_nms_params& params, clKernelData& kernel, size_t idx) const {
|
||||
switch (idx) {
|
||||
case 0:
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
|
||||
break;
|
||||
|
||||
case 1:
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 3});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
|
||||
break;
|
||||
|
||||
case 2:
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 3});
|
||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
|
||||
break;
|
||||
|
||||
default:
|
||||
throw std::invalid_argument("Matrix NMS has 3 kernels. valid index is 0 ~ 2.");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,67 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
#include "kernel_selector_params.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
struct matrix_nms_params : public base_params {
|
||||
matrix_nms_params() : base_params(KernelType::MATRIX_NMS) {}
|
||||
|
||||
enum decay_function { GAUSSIAN, LINEAR };
|
||||
|
||||
enum sort_result_type {
|
||||
CLASS_ID, // sort selected boxes by class id (ascending) in each batch element
|
||||
SCORE, // sort selected boxes by score (descending) in each batch element
|
||||
NONE // do not guarantee the order in each batch element
|
||||
};
|
||||
|
||||
// specifies order of output elements
|
||||
sort_result_type sort_type = sort_result_type::NONE;
|
||||
// specifies whenever it is necessary to sort selected boxes across batches or not
|
||||
bool sort_result_across_batch = false;
|
||||
// specifies minimum score to consider box for the processing
|
||||
float score_threshold = 0.0f;
|
||||
// specifies maximum number of boxes to be selected per class, -1 meaning to
|
||||
// keep all boxes
|
||||
int nms_top_k = -1;
|
||||
// specifies maximum number of boxes to be selected per batch element, -1
|
||||
// meaning to keep all boxes
|
||||
int keep_top_k = -1;
|
||||
// specifies the background class id, -1 meaning to keep all classes
|
||||
int background_class = -1;
|
||||
// specifies decay function used to decay scores
|
||||
decay_function decay = decay_function::LINEAR;
|
||||
// specifies gaussian_sigma parameter for gaussian decay_function
|
||||
float gaussian_sigma = 2.0f;
|
||||
// specifies threshold to filter out boxes with low confidence score after
|
||||
// decaying
|
||||
float post_threshold = 0.0f;
|
||||
// specifies whether boxes are normalized or not
|
||||
bool normalized = true;
|
||||
};
|
||||
|
||||
struct matrix_nms_optional_params : optional_params {
|
||||
matrix_nms_optional_params() : optional_params(KernelType::MATRIX_NMS) {}
|
||||
};
|
||||
|
||||
class MatrixNmsKernelRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
MatrixNmsKernelRef() : KernelBaseOpenCL("matrix_nms_ref") {}
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
||||
protected:
|
||||
JitConstants GetJitConstants(const matrix_nms_params& params) const;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
void SetKernelArguments(const matrix_nms_params& params, clKernelData& kernel, size_t idx) const;
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "matrix_nms_kernel_selector.h"
|
||||
|
||||
#include "matrix_nms_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
matrix_nms_kernel_selector::matrix_nms_kernel_selector() {
|
||||
Attach<MatrixNmsKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData matrix_nms_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::MATRIX_NMS);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class matrix_nms_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static matrix_nms_kernel_selector& Instance() {
|
||||
static matrix_nms_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
matrix_nms_kernel_selector();
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
83
src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp
Normal file
83
src/plugins/intel_gpu/src/plugin/ops/matrix_nms.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "intel_gpu/primitives/matrix_nms.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset8.hpp>
|
||||
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "ngraph_ops/nms_static_shape_ie.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace op {
|
||||
namespace internal {
|
||||
using NmsStaticShapeIE8 = ngraph::op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>;
|
||||
}
|
||||
} // namespace op
|
||||
} // namespace ngraph
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
void CreateNmsStaticShapeIE8Op(Program& p, const std::shared_ptr<ngraph::op::internal::NmsStaticShapeIE8>& op) {
|
||||
validate_inputs_count(op, {2});
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
|
||||
std::vector<cldnn::memory::ptr> shared_memory;
|
||||
|
||||
auto outputIndices = op->get_output_shape(0)[0];
|
||||
cldnn::layout mutableLayoutFirst = cldnn::layout(cldnn::element_type_to_data_type(ngraph::element::i32),
|
||||
cldnn::format::bfyx,
|
||||
cldnn::tensor(static_cast<int32_t>(outputIndices), 1, 1, 1));
|
||||
|
||||
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
|
||||
|
||||
cldnn::primitive_id matrix_nms_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
|
||||
auto matrix_nms_mutable_prim_first = cldnn::mutable_data(matrix_nms_mutable_id_w_first, shared_memory.back());
|
||||
p.add_primitive(*op, matrix_nms_mutable_prim_first);
|
||||
inputPrimitives.push_back(matrix_nms_mutable_id_w_first);
|
||||
|
||||
auto batches_num = op->get_output_shape(2)[0];
|
||||
cldnn::layout mutableLayoutSecond = cldnn::layout(cldnn::element_type_to_data_type(ngraph::element::i32),
|
||||
cldnn::format::bfyx,
|
||||
cldnn::tensor(static_cast<int32_t>(batches_num), 1, 1, 1));
|
||||
|
||||
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
|
||||
|
||||
cldnn::primitive_id matrix_nms_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
|
||||
auto matrix_nms_mutable_prim_second = cldnn::mutable_data(matrix_nms_mutable_id_w_second, shared_memory.back());
|
||||
p.add_primitive(*op, matrix_nms_mutable_prim_second);
|
||||
inputPrimitives.push_back(matrix_nms_mutable_id_w_second);
|
||||
|
||||
auto matrixNmsLayerName = layer_type_name_ID(op) + ".out0";
|
||||
|
||||
auto prim = cldnn::matrix_nms(matrixNmsLayerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[inputPrimitives.size() - 2],
|
||||
inputPrimitives[inputPrimitives.size() - 1],
|
||||
op->get_attrs());
|
||||
|
||||
p.add_primitive(*op, prim);
|
||||
|
||||
cldnn::primitive_id matrix_nms_id_r_first = layer_type_name_ID(op) + ".out1";
|
||||
auto matrix_nms_mutable_prim_r_first =
|
||||
cldnn::mutable_data(matrix_nms_id_r_first, {matrixNmsLayerName}, shared_memory.front());
|
||||
p.add_primitive(*op, matrix_nms_mutable_prim_r_first);
|
||||
|
||||
cldnn::primitive_id matrix_nms_id_r_second = layer_type_name_ID(op) + ".out2";
|
||||
auto matrix_nms_mutable_prim_r_second =
|
||||
cldnn::mutable_data(matrix_nms_id_r_second, {matrixNmsLayerName}, shared_memory.back());
|
||||
p.add_primitive(*op, matrix_nms_mutable_prim_r_second);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_FACTORY_IMPL(internal, NmsStaticShapeIE8);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
@ -70,6 +70,7 @@
|
||||
#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp>
|
||||
#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
|
||||
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
|
||||
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
|
||||
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
||||
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
||||
#include <transformations/op_conversions/convert_gather_0d.hpp>
|
||||
@ -165,6 +166,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertGP9ToGPIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
|
||||
manager.register_pass<ngraph::pass::ConvertGather0D>();
|
||||
manager.register_pass<ngraph::pass::ConvertPriorBox8To0, false>();
|
||||
|
||||
|
673
src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
Normal file
673
src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp
Normal file
@ -0,0 +1,673 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <intel_gpu/graph/network.hpp>
|
||||
#include <intel_gpu/graph/topology.hpp>
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/matrix_nms.hpp>
|
||||
#include <intel_gpu/primitives/mutable_data.hpp>
|
||||
#include <intel_gpu/runtime/memory.hpp>
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
namespace {
|
||||
|
||||
#define PAD -1.0
|
||||
#define PADI -1
|
||||
#define THRESHOLD 1e-3f
|
||||
|
||||
template <class T>
|
||||
std::vector<T> convert(const std::vector<float>& v) {
|
||||
return {v.begin(), v.end()};
|
||||
}
|
||||
|
||||
struct matrix_nms_test_inputs {
|
||||
int num_butches;
|
||||
int num_boxes;
|
||||
int num_classes;
|
||||
int num_selected_boxes;
|
||||
bool sort_result_across_batch;
|
||||
float score_threshold;
|
||||
int nms_top_k;
|
||||
int keep_top_k;
|
||||
int background_class;
|
||||
float gaussian_sigma;
|
||||
float post_threshold;
|
||||
bool normalized;
|
||||
std::vector<float> boxes_values;
|
||||
std::vector<float> scores_values;
|
||||
std::vector<float> expected_output;
|
||||
std::vector<int> expected_selected_boxes;
|
||||
std::vector<int> expected_valid_outputs;
|
||||
matrix_nms::sort_result_type sort_result_type;
|
||||
matrix_nms::decay_function decay_function;
|
||||
std::string test_name;
|
||||
};
|
||||
|
||||
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type>;
|
||||
|
||||
template <class T>
|
||||
struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_params> {
|
||||
public:
|
||||
void test() {
|
||||
format::type blocked_format;
|
||||
matrix_nms_test_inputs test_inputs;
|
||||
std::tie(test_inputs, blocked_format) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
|
||||
|
||||
const auto data_type = type_to_data_type<T>::value;
|
||||
const auto plain_format = format::bfyx;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto boxes = engine.allocate_memory(
|
||||
{data_type, plain_format, tensor{test_inputs.num_butches, test_inputs.num_boxes, 1, 4}});
|
||||
auto scores = engine.allocate_memory(
|
||||
{data_type,
|
||||
plain_format,
|
||||
tensor{test_inputs.num_butches, test_inputs.num_classes, 1, test_inputs.num_boxes}});
|
||||
|
||||
auto selected_boxes =
|
||||
engine.allocate_memory({data_types::i32, plain_format, tensor{test_inputs.num_selected_boxes, 1, 1, 1}});
|
||||
auto valid_outputs =
|
||||
engine.allocate_memory({data_types::i32, plain_format, tensor{test_inputs.num_butches, 1, 1, 1}});
|
||||
|
||||
set_values(boxes, convert<T>(test_inputs.boxes_values));
|
||||
set_values(scores, convert<T>(test_inputs.scores_values));
|
||||
|
||||
const matrix_nms::attributes attrs(test_inputs.sort_result_type,
|
||||
test_inputs.sort_result_across_batch,
|
||||
test_inputs.score_threshold,
|
||||
test_inputs.nms_top_k,
|
||||
test_inputs.keep_top_k,
|
||||
test_inputs.background_class,
|
||||
test_inputs.decay_function,
|
||||
test_inputs.gaussian_sigma,
|
||||
test_inputs.post_threshold,
|
||||
test_inputs.normalized);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("boxes", boxes->get_layout()));
|
||||
topology.add(input_layout("scores", scores->get_layout()));
|
||||
topology.add(mutable_data("selected_boxes", selected_boxes));
|
||||
topology.add(mutable_data("valid_outputs", valid_outputs));
|
||||
|
||||
topology.add(reorder("reordered_boxes", "boxes", blocked_format, data_type));
|
||||
topology.add(reorder("reordered_scores", "scores", blocked_format, data_type));
|
||||
|
||||
topology.add(matrix_nms("reordered_matrix_nms",
|
||||
"reordered_boxes",
|
||||
"reordered_scores",
|
||||
"selected_boxes",
|
||||
"valid_outputs",
|
||||
attrs));
|
||||
topology.add(reorder("matrix_nms", "reordered_matrix_nms", plain_format, data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("boxes", boxes);
|
||||
network.set_input_data("scores", scores);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("matrix_nms").get_memory();
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
cldnn::mem_lock<int> selected_boxes_ptr(selected_boxes, get_test_stream());
|
||||
cldnn::mem_lock<int> valid_outputs_ptr(valid_outputs, get_test_stream());
|
||||
|
||||
const auto expected_output = convert<T>(test_inputs.expected_output);
|
||||
ASSERT_EQ(expected_output.size(), output_ptr.size());
|
||||
for (size_t i = 0; i < expected_output.size(); ++i) {
|
||||
EXPECT_NEAR(expected_output[i], output_ptr[i], THRESHOLD);
|
||||
}
|
||||
|
||||
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
|
||||
EXPECT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
|
||||
}
|
||||
|
||||
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
|
||||
EXPECT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string PrintToStringParamName(const testing::TestParamInfo<matrix_nms_test_params>& info) {
|
||||
auto& test_inputs = std::get<0>(info.param);
|
||||
std::ostringstream result;
|
||||
|
||||
auto sort_res_type_str =
|
||||
test_inputs.sort_result_type == matrix_nms::sort_result_type::score
|
||||
? "score"
|
||||
: test_inputs.sort_result_type == matrix_nms::sort_result_type::class_id ? "class_id" : "none";
|
||||
auto decay_function_str =
|
||||
test_inputs.decay_function == matrix_nms::decay_function::linear
|
||||
? "linear"
|
||||
: test_inputs.decay_function == matrix_nms::decay_function::gaussian ? "gaussian" : "none";
|
||||
|
||||
result << "SortResultAcrossBatch=" << bool_to_str(test_inputs.sort_result_across_batch) << "_";
|
||||
result << "ScoreThreshold=" << test_inputs.score_threshold << "_";
|
||||
result << "NmsTopK=" << test_inputs.nms_top_k << "_";
|
||||
result << "KeepTopK=" << test_inputs.keep_top_k << "_";
|
||||
result << "BackgroundClass=" << test_inputs.background_class << "_";
|
||||
result << "GaussianSigma=" << test_inputs.gaussian_sigma << "_";
|
||||
result << "PostThreshold=" << test_inputs.post_threshold << "_";
|
||||
result << "Normalized=" << bool_to_str(test_inputs.normalized) << "_";
|
||||
result << "sort_result_type=" << sort_res_type_str << "_";
|
||||
result << "decay_function=" << decay_function_str << "_";
|
||||
result << "Format=" << fmt_to_str(std::get<1>(info.param));
|
||||
|
||||
if (!test_inputs.test_name.empty())
|
||||
result << "_TN=" << test_inputs.test_name;
|
||||
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_smoke_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
0, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
|
||||
std::vector<float>{1.00, // expected_output
|
||||
0.95,
|
||||
0.00,
|
||||
0.00,
|
||||
1.00,
|
||||
1.00,
|
||||
1.00,
|
||||
0.8,
|
||||
0.00,
|
||||
10.00,
|
||||
1.00,
|
||||
11.00,
|
||||
1.00,
|
||||
0.13636364,
|
||||
0.0,
|
||||
0.1,
|
||||
1.0,
|
||||
1.1},
|
||||
std::vector<int>{0, 3, 1}, // expected_selected_boxes
|
||||
std::vector<int>{3}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"smoke"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_gaussian_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
0, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
|
||||
std::vector<float>{1.00, // expected_output
|
||||
0.95,
|
||||
0.00,
|
||||
0.00,
|
||||
1.00,
|
||||
1.00,
|
||||
1.00,
|
||||
0.8,
|
||||
0.00,
|
||||
10.00,
|
||||
1.00,
|
||||
11.00,
|
||||
1.00,
|
||||
0.1966116,
|
||||
0.0,
|
||||
0.1,
|
||||
1.0,
|
||||
1.1},
|
||||
std::vector<int>{0, 3, 1}, // expected_selected_boxes
|
||||
std::vector<int>{3}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::gaussian, // decay_function
|
||||
"gaussian"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_inputs() {
|
||||
return {2, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
6, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
0, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
|
||||
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
|
||||
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
|
||||
std::vector<float>{1.00, 0.95, 0.00, 0.00, 1.00, 1.00, // expected_output
|
||||
1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1,
|
||||
1.0, 1.1, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8,
|
||||
0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
|
||||
std::vector<int>{0, 3, 1, 6, 9, 7}, // expected_selected_boxes
|
||||
std::vector<int>{3, 3}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"two_batches_two_classes"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs() {
|
||||
return {2, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
12, // num_selected_boxes
|
||||
true, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.5f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
|
||||
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
|
||||
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
|
||||
std::vector<float>{0.00, 0.95, 0.00, 10.00, 1.00, 11.00, // expected_output
|
||||
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 1.00, 0.95,
|
||||
0.00, 0.00, 1.00, 1.00, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD,
|
||||
PAD, PAD, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00,
|
||||
1.00, 0.80, 0.00, 10.00, 1.00, 11.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00, PAD, PAD,
|
||||
PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD},
|
||||
std::vector<int>{3, 0, 9, 6, PADI, PADI, 0, 6, 3, 9, PADI, PADI}, // expected_selected_boxes
|
||||
std::vector<int>{4, 4}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"two_batches_two_classes_by_score_cross_batch"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs() {
|
||||
return {2, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
12, // num_selected_boxes
|
||||
true, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.5f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
|
||||
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
|
||||
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
|
||||
std::vector<float>{0.00, 0.95, 0.00, 10.00, 1.00, 11.00, // expected_output
|
||||
0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90,
|
||||
0.00, 0.00, 1.00, 1.00, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD,
|
||||
PAD, PAD, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
|
||||
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00, PAD, PAD,
|
||||
PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD, PAD},
|
||||
std::vector<int>{3, 0, 9, 6, PADI, PADI, 0, 3, 6, 9, PADI, PADI}, // expected_selected_boxes
|
||||
std::vector<int>{4, 4}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::class_id, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_two_batches_two_classes_by_classid_cross_batch"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_by_keep_top_k_inputs() {
|
||||
return {2, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
6, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
3, // keep_top_k
|
||||
0, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
|
||||
0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9,
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3, // scores
|
||||
0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3},
|
||||
std::vector<float>{1.00, 0.95, 0.00, 0.00, 1.00, 1.00, // expected_output
|
||||
1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1,
|
||||
1.0, 1.1, 1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8,
|
||||
0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
|
||||
std::vector<int>{0, 3, 1, 6, 9, 7}, // expected_selected_boxes
|
||||
std::vector<int>{3, 3}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::class_id, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_by_keep_top_k"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_background_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
2, // num_classes
|
||||
6, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}, // scores
|
||||
std::vector<float>{0.00, 0.95, 0.0, 10.0, 1.0, 11.0, // expected_output
|
||||
1.00, 0.95, 0.0, 0.0, 1.0, 1.0, 0.00, 0.9, 0.0, 0.0,
|
||||
1.0, 1.0, 1.00, 0.8, 0.0, 10.0, 1.0, 11.0, 0.00, 0.13636364,
|
||||
0.0, 0.1, 1.0, 1.1, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1},
|
||||
std::vector<int>{3, 0, 0, 3, 1, 1}, // expected_selected_boxes
|
||||
std::vector<int>{6}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_background"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_flipped_coordinates_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
1, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 1.0, 1.1, 0.0, 0.9, 1.0, -0.1, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
|
||||
std::vector<float>{0.00,
|
||||
0.95,
|
||||
0.0,
|
||||
10.0,
|
||||
1.0,
|
||||
11.0, // expected_output
|
||||
0.00,
|
||||
0.9,
|
||||
1.0,
|
||||
1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.00,
|
||||
0.75,
|
||||
0.0,
|
||||
0.1,
|
||||
1.0,
|
||||
1.1},
|
||||
std::vector<int>{3, 0, 1}, // expected_selected_boxes
|
||||
std::vector<int>{3}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"flipped_coordinates"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_post_threshold_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
1, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.8f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
|
||||
std::vector<float>{0.00,
|
||||
0.95,
|
||||
0.00,
|
||||
10.00,
|
||||
1.00,
|
||||
11.00, // expected_output
|
||||
0.00,
|
||||
0.9,
|
||||
0.00,
|
||||
0.00,
|
||||
1.00,
|
||||
1.00,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD},
|
||||
std::vector<int>{3, 0, PADI}, // expected_selected_boxes
|
||||
std::vector<int>{2}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"post_threshold"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_identical_boxes_inputs() {
|
||||
return {1, // num_butches
|
||||
10, // num_boxes
|
||||
1, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.3f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, // boxes
|
||||
1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
|
||||
0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0},
|
||||
std::vector<float>{0.4, 0.01, 0.2, 0.09, 0.15, 0.05, 0.02, 0.03, 0.05, 0.0}, // scores
|
||||
std::vector<float>{0.00,
|
||||
0.40,
|
||||
0.00,
|
||||
0.00,
|
||||
1.00,
|
||||
1.00, // expected_output
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD},
|
||||
std::vector<int>{0, PADI, PADI}, // expected_selected_boxes
|
||||
std::vector<int>{1}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"identical_boxes"};
|
||||
};
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_top_k_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
1, // num_classes
|
||||
2, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
2, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
|
||||
std::vector<float>{0.00,
|
||||
0.95,
|
||||
0.00,
|
||||
10.00,
|
||||
1.00,
|
||||
11.00, // expected_output
|
||||
0.00,
|
||||
0.90,
|
||||
0.00,
|
||||
0.00,
|
||||
1.00,
|
||||
1.00},
|
||||
std::vector<int>{3, 0}, // expected_selected_boxes
|
||||
std::vector<int>{2}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_nms_top_k"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_single_box_inputs() {
|
||||
return {1, // num_butches
|
||||
1, // num_boxes
|
||||
1, // num_classes
|
||||
1, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
0.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0}, // boxes
|
||||
std::vector<float>{0.9}, // scores
|
||||
std::vector<float>{0.00, 0.90, 0.00, 0.00, 1.00, 1.00}, // expected_output
|
||||
std::vector<int>{0}, // expected_selected_boxes
|
||||
std::vector<int>{1}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_single_box"};
|
||||
}
|
||||
|
||||
matrix_nms_test_inputs get_matrix_nms_no_output_inputs() {
|
||||
return {1, // num_butches
|
||||
6, // num_boxes
|
||||
1, // num_classes
|
||||
3, // num_selected_boxes
|
||||
false, // sort_result_across_bch
|
||||
2.0f, // score_threshold
|
||||
3, // nms_top_k
|
||||
-1, // keep_top_k
|
||||
-1, // background_class
|
||||
2.0f, // gaussian_sigma
|
||||
0.0f, // post_threshold
|
||||
true, // normalized
|
||||
std::vector<float>{0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, // boxes
|
||||
0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0},
|
||||
std::vector<float>{0.9, 0.75, 0.6, 0.95, 0.5, 0.3}, // scores
|
||||
std::vector<float>{PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD, // expected_output
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD,
|
||||
PAD},
|
||||
std::vector<int>{PADI, PADI, PADI}, // expected_selected_boxes
|
||||
std::vector<int>{0}, // expected_valid_output
|
||||
matrix_nms::sort_result_type::score, // sort_result_type
|
||||
matrix_nms::decay_function::linear, // decay_function
|
||||
"matrix_nms_no_output"};
|
||||
}
|
||||
|
||||
const std::vector<format::type> layout_formats = {format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16};
|
||||
|
||||
#define INSTANTIATE_MATRIX_NMS_TEST_SUITE(input_type, func) \
|
||||
using matrix_nms_gpu_test_##input_type##func = matrix_nms_gpu_test<input_type>; \
|
||||
TEST_P(matrix_nms_gpu_test_##input_type##func, test) { \
|
||||
test(); \
|
||||
} \
|
||||
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_##input_type##func, \
|
||||
matrix_nms_gpu_test_##input_type##func, \
|
||||
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats)), \
|
||||
matrix_nms_gpu_test_##input_type##func::PrintToStringParamName);
|
||||
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_smoke_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_gaussian_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_by_keep_top_k_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_background_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_flipped_coordinates_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_post_threshold_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_identical_boxes_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_top_k_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_single_box_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_no_output_inputs)
|
||||
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_smoke_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_gaussian_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_by_keep_top_k_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_by_classid_cross_batch_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_two_batches_two_classes_by_score_cross_batch_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_background_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_flipped_coordinates_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_post_threshold_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_identical_boxes_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_top_k_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_single_box_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_no_output_inputs)
|
||||
|
||||
#undef INSTANTIATE_MATRIX_NMS_TEST_SUITE
|
||||
|
||||
} // namespace
|
@ -0,0 +1,48 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "single_layer_tests/matrix_nms.hpp"
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace ov::test::subgraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace ngraph;
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {{{3, 100, 4}, {3, 1, 100}},
|
||||
{{1, 10, 4}, {1, 100, 10}}};
|
||||
|
||||
const auto inputPrecisions = InputPrecisions{ov::element::f32, ov::element::i32, ov::element::f32};
|
||||
|
||||
const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
|
||||
op::v8::MatrixNms::SortResultType::SCORE,
|
||||
op::v8::MatrixNms::SortResultType::NONE};
|
||||
const std::vector<element::Type> outType = {element::i32, element::i64};
|
||||
const std::vector<TopKParams> topKParams = {TopKParams{-1, 5}, TopKParams{100, -1}};
|
||||
const std::vector<ThresholdParams> thresholdParams = {ThresholdParams{0.0f, 2.0f, 0.0f},
|
||||
ThresholdParams{0.1f, 1.5f, 0.2f}};
|
||||
const std::vector<int> backgroudClass = {-1, 1};
|
||||
const std::vector<bool> normalized = {true, false};
|
||||
const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
|
||||
op::v8::MatrixNms::DecayFunction::LINEAR};
|
||||
|
||||
const auto nmsParamsStatic =
|
||||
::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
|
||||
::testing::Values(inputPrecisions),
|
||||
::testing::ValuesIn(sortResultType),
|
||||
::testing::ValuesIn(outType),
|
||||
::testing::ValuesIn(topKParams),
|
||||
::testing::ValuesIn(thresholdParams),
|
||||
::testing::ValuesIn(backgroudClass),
|
||||
::testing::ValuesIn(normalized),
|
||||
::testing::ValuesIn(decayFunction),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_static,
|
||||
MatrixNmsLayerTest,
|
||||
nmsParamsStatic,
|
||||
MatrixNmsLayerTest::getTestCaseName);
|
@ -325,7 +325,10 @@ void MatrixNmsLayerTest::SetUp() {
|
||||
const auto paramOuts =
|
||||
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], m_attrs);
|
||||
if (!m_outStaticShape) {
|
||||
|
||||
if (targetDevice == CommonTestUtils::DEVICE_GPU) {
|
||||
function = std::make_shared<Function>(nms, params, "MatrixNMS");
|
||||
} else if (!m_outStaticShape) {
|
||||
auto result = std::make_shared<opset5::Result>(nms);
|
||||
function = std::make_shared<Function>(result, params, "MatrixNMS");
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user