[GPU] GenerateProposals-9 (#11994)

* add static_output attribute to enable GPU implementation of GenerateProposals.
This change may be reverted when dynamic shapes support will be implemented
in GPU plugin.

* - add GPU single layer test;
- add normalized attribute to SLT.

* add GPU primitive for GenerateProposals

* add kernel for GenerateProposals

* add unit test for GenerateProposals

* add blocked layouts support

* tidy up

* support blocked layouts also for 2nd and 3d outputs

* Transformation to deal with dynamic output in GPU
  - revert changes in GenerateProposals interface and shape inferenece;
  - add transformation;
  - custom compare method in single-layer test.

* address review comments

* fix after rebase

* fix after rebase

* review comment: added nms_eta to testcase name generation

* - added input types combination to functional tests;
- fix float16 comparison.

* fix after rebase

* use vector for input ids

* fix after rebase
This commit is contained in:
Oleksii Khovan 2022-10-11 14:10:32 +02:00 committed by GitHub
parent b2e35592f7
commit 66b5f9acf2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 1853 additions and 5 deletions

View File

@ -0,0 +1,41 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <string>
#include <transformations_visibility.hpp>
#include "ngraph/op/generate_proposals.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph {
namespace op {
namespace internal {
class TRANSFORMATIONS_API GenerateProposalsIEInternal : public op::v9::GenerateProposals {
using Base = op::v9::GenerateProposals;
public:
OPENVINO_OP("GenerateProposalsIEInternal", "ie_internal_opset");
BWDCMP_RTTI_DECLARATION;
GenerateProposalsIEInternal() = default;
GenerateProposalsIEInternal(const Output<Node>& im_info,
const Output<Node>& anchors,
const Output<Node>& deltas,
const Output<Node>& scores,
const Attributes& attrs,
const element::Type& roi_num_type = element::i64);
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
};
} // namespace internal
} // namespace op
} // namespace ngraph

View File

@ -0,0 +1,22 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
#include <transformations_visibility.hpp>
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API ConvertGP9ToGPIEInternal;
} // namespace pass
} // namespace ngraph
class ngraph::pass::ConvertGP9ToGPIEInternal : public ngraph::pass::MatcherPass {
public:
OPENVINO_RTTI("ConvertGP9ToGPIEInternal", "0");
ConvertGP9ToGPIEInternal();
};

View File

@ -0,0 +1,58 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_ops/generate_proposals_ie_internal.hpp"
#include <memory>
#include <ngraph/opsets/opset9.hpp>
#include "itt.hpp"
using namespace std;
using namespace ngraph;
BWDCMP_RTTI_DEFINITION(op::internal::GenerateProposalsIEInternal);
op::internal::GenerateProposalsIEInternal::GenerateProposalsIEInternal(const Output<Node>& im_info,
const Output<Node>& anchors,
const Output<Node>& deltas,
const Output<Node>& scores,
const Attributes& attrs,
const element::Type& roi_num_type)
: Base(im_info, anchors, deltas, scores, attrs, roi_num_type) {
validate_and_infer_types();
}
std::shared_ptr<Node> op::internal::GenerateProposalsIEInternal::clone_with_new_inputs(
const ngraph::OutputVector& new_args) const {
INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_clone_with_new_inputs);
check_new_args_count(this, new_args);
return make_shared<op::internal::GenerateProposalsIEInternal>(new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
get_attrs(),
get_roi_num_type());
}
void op::internal::GenerateProposalsIEInternal::validate_and_infer_types() {
INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_validate_and_infer_types);
Base::validate_and_infer_types();
const auto im_info_shape = get_input_partial_shape(0);
const auto num_batches = im_info_shape[0];
NODE_VALIDATION_CHECK(this, num_batches.is_static(), "Number of batches must be static");
const Dimension post_nms_count{get_attrs().post_nms_count};
const auto first_dim_shape = num_batches * post_nms_count;
const auto rois_shape = ov::PartialShape({first_dim_shape, 4});
const auto scores_shape = ov::PartialShape({first_dim_shape});
const auto roisnum_shape = ov::PartialShape({num_batches});
const auto input_type = get_input_element_type(0);
set_output_type(0, input_type, rois_shape);
set_output_type(1, input_type, scores_shape);
set_output_type(2, get_roi_num_type(), roisnum_shape);
}

View File

@ -0,0 +1,57 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset9.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include "itt.hpp"
#include "ngraph_ops/generate_proposals_ie_internal.hpp"
#include "transformations/utils/utils.hpp"
ngraph::pass::ConvertGP9ToGPIEInternal::ConvertGP9ToGPIEInternal() {
ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
const auto root = m.get_match_root();
const auto old_node = std::dynamic_pointer_cast<ngraph::opset9::GenerateProposals>(root);
if (!old_node) {
return false;
}
for (const auto& i : old_node->inputs()) {
if (i.get_partial_shape().is_dynamic()) {
return false;
}
}
NodeVector new_ops;
auto new_node =
std::make_shared<ngraph::op::internal::GenerateProposalsIEInternal>(old_node->input_value(0),
old_node->input_value(1),
old_node->input_value(2),
old_node->input_value(3),
old_node->get_attrs(),
old_node->get_roi_num_type());
new_ops.push_back(new_node);
Output<ngraph::Node> output_0 = new_node->output(0);
new_ops.emplace_back(output_0.get_node_shared_ptr());
Output<ngraph::Node> output_1 = new_node->output(1);
new_ops.emplace_back(output_1.get_node_shared_ptr());
Output<ngraph::Node> output_2 = new_node->output(2);
new_ops.emplace_back(output_2.get_node_shared_ptr());
new_node->set_friendly_name(old_node->get_friendly_name());
copy_runtime_info(old_node, new_ops);
replace_node(old_node, {output_0, output_1, output_2});
return true;
};
const auto generate_proposals = ngraph::pattern::wrap_type<ngraph::opset9::GenerateProposals>();
const auto matcher = std::make_shared<ngraph::pattern::Matcher>(generate_proposals, "ConvertGP9ToGPIEInternal");
register_matcher(matcher, callback);
}

View File

@ -38,6 +38,7 @@
#include <transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp>
#include <transformations/disable_decompression_convert_constant_folding.hpp>
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
@ -458,6 +459,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(false);
manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false);
manager.register_pass<::ngraph::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<::ngraph::pass::ConvertGP9ToGPIEInternal>();
manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
manager.register_pass<::ov::pass::DisableDecompressionConvertConstantFolding>();
manager.register_pass<::ngraph::pass::ConstantFolding>();

View File

@ -226,6 +226,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(nms_threshold),
::testing::ValuesIn(post_nms_count),
::testing::ValuesIn(pre_nms_count),
::testing::ValuesIn({true, false}),
::testing::ValuesIn(inputTensors),
::testing::Values(ov::element::Type_t::f32),
::testing::Values(ov::element::Type_t::i32),

View File

@ -245,3 +245,4 @@ REGISTER_FACTORY(v9, Eye);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
REGISTER_FACTORY(internal, GenerateProposalsIEInternal);

View File

@ -0,0 +1,82 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "primitive.hpp"
#include <vector>
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief generate proposals
struct generate_proposals
: public primitive_base<generate_proposals> {
CLDNN_DECLARE_PRIMITIVE(generate_proposals)
/// @brief Constructs generate_proposals primitive
/// @param id This primitive id
/// @param input_im_info image size info
/// @param input_anchors anchors
/// @param input_deltas deltas for anchors
/// @param input_scores proposal scores
/// @param output_rois_scores ROIs scores
/// @param output_rois_num number of proposed ROIs
/// @param min_size minimum box width and height
/// @param nms_threshold threshold to be used in NonMaxSuppression stage
/// @param pre_nms_count number of top-n proposals before NMS
/// @param post_nms_count number of top-n proposals after NMS
/// @param normalized indicates whether proposal bboxes are normalized
/// @param nms_eta eta parameter for adaptive NMS
/// @param roi_num_type type of 3rd output elements
generate_proposals(const primitive_id& id,
const std::vector<primitive_id>& inputs,
float min_size,
float nms_threshold,
int64_t pre_nms_count,
int64_t post_nms_count,
bool normalized,
float nms_eta,
const data_types roi_num_type,
const padding& output_padding = {}) :
primitive_base{id, inputs, output_padding},
output_rois_scores{inputs[4]},
output_rois_num{inputs[5]},
min_size{min_size},
nms_threshold{nms_threshold},
pre_nms_count{pre_nms_count},
post_nms_count{post_nms_count},
normalized{normalized},
nms_eta{nms_eta},
roi_num_type{roi_num_type} {}
primitive_id output_rois_scores;
primitive_id output_rois_num;
float min_size;
float nms_threshold;
int64_t pre_nms_count;
int64_t post_nms_count;
bool normalized;
float nms_eta;
data_types roi_num_type;
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
if (!output_rois_scores.empty())
ret.push_back(output_rois_scores);
if (!output_rois_num.empty())
ret.push_back(output_rois_num);
return ret;
}
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -0,0 +1,43 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "generate_proposals_inst.h"
#include "primitive_type_base.h"
#include "intel_gpu/runtime/error_handler.hpp"
#include "json_object.h"
#include <string>
namespace cldnn {
primitive_type_id generate_proposals::type_id() {
static primitive_type_base<generate_proposals> instance;
return &instance;
}
layout generate_proposals_inst::calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param) {
const layout data_layout = impl_param.get_input_layout();
const auto num_batches = data_layout.batch();
const auto desc = impl_param.typed_desc<generate_proposals>();
return layout(data_layout.data_type, data_layout.format, {static_cast<int>(num_batches * desc->post_nms_count), 4, 1, 1});
}
std::string generate_proposals_inst::to_string(const generate_proposals_node& node) {
auto desc = node.get_primitive();
std::stringstream primitive_description;
json_composite info;
info.add("min_size", desc->min_size);
info.add("nms_threshold", desc->nms_threshold);
info.add("pre_nms_count", desc->pre_nms_count);
info.add("post_nms_count", desc->post_nms_count);
info.add("normalized", desc->normalized);
info.add("nms_eta", desc->nms_eta);
auto node_info = node.desc_to_json();
node_info->add("generate_proposals_info", info);
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn

View File

@ -0,0 +1,89 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "generate_proposals_inst.h"
#include "primitive_base.hpp"
#include "impls/implementation_map.hpp"
#include "kernel_selector_helper.h"
#include "generate_proposals/generate_proposals_kernel_selector.h"
#include "generate_proposals/generate_proposals_kernel_ref.h"
namespace cldnn {
namespace ocl {
struct generate_proposals_impl
: public typed_primitive_impl_ocl<generate_proposals> {
using parent = typed_primitive_impl_ocl<generate_proposals>;
using parent::parent;
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<generate_proposals_impl>(*this);
}
protected:
kernel_arguments_data get_arguments(typed_primitive_inst<generate_proposals>& instance, int32_t) const override {
auto args = parent::get_arguments(instance, 0);
args.inputs.push_back(instance.output_rois_scores_memory());
args.inputs.push_back(instance.output_rois_nums_memory());
return args;
}
public:
static primitive_impl* create(const generate_proposals_node& arg, const kernel_impl_params& impl_param) {
auto params = get_default_params<kernel_selector::generate_proposals_params>(impl_param);
auto optional_params = get_default_optional_params<
kernel_selector::generate_proposals_optional_params>(arg.get_program());
const auto& primitive = arg.get_primitive();
params.min_size = primitive->min_size;
params.nms_threshold = primitive->nms_threshold;
params.pre_nms_count = primitive->pre_nms_count;
params.post_nms_count = primitive->post_nms_count;
params.normalized = primitive->normalized;
params.nms_eta = primitive->nms_eta;
params.roi_num_type = primitive->roi_num_type == cldnn::data_types::i32 ?
kernel_selector::Datatype::INT32 : kernel_selector::Datatype::INT64;
params.inputs.push_back(convert_data_tensor(arg.anchors().get_output_layout()));
params.inputs.push_back(convert_data_tensor(arg.deltas().get_output_layout()));
params.inputs.push_back(convert_data_tensor(arg.scores().get_output_layout()));
params.inputs.push_back(convert_data_tensor(arg.output_rois_scores_node().get_output_layout()));
params.inputs.push_back(convert_data_tensor(arg.output_rois_nums_node().get_output_layout()));
const auto& kernel_selector = kernel_selector::generate_proposals_kernel_selector::Instance();
const auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
CLDNN_ERROR_BOOL(arg.id(),
"best_kernels.empty()",
best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new generate_proposals_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_generate_proposals_impl::attach_generate_proposals_impl() {
implementation_map<generate_proposals>::add(impl_types::ocl,
generate_proposals_impl::create, {
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32)});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -41,6 +41,7 @@ void register_implementations() {
REGISTER_OCL(gather_elements);
REGISTER_OCL(gather_nd);
REGISTER_OCL(gemm);
REGISTER_OCL(generate_proposals);
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);

View File

@ -119,6 +119,7 @@ REGISTER_OCL(gather);
REGISTER_OCL(gather_nd);
REGISTER_OCL(gather_elements);
REGISTER_OCL(gemm);
REGISTER_OCL(generate_proposals);
REGISTER_OCL(lrn);
REGISTER_OCL(lstm_gemm);
REGISTER_OCL(lstm_elt);

View File

@ -0,0 +1,50 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "intel_gpu/primitives/generate_proposals.hpp"
#include "primitive_inst.h"
namespace cldnn {
template <>
struct typed_program_node<generate_proposals>
: public typed_program_node_base<generate_proposals> {
using parent = typed_program_node_base<generate_proposals>;
public:
using parent::parent;
program_node& input() const { return get_dependency(0); }
program_node& anchors() const { return get_dependency(1); }
program_node& deltas() const { return get_dependency(2); }
program_node& scores() const { return get_dependency(3); }
program_node& output_rois_scores_node() const { return get_dependency(4); }
program_node& output_rois_nums_node() const { return get_dependency(5); }
};
using generate_proposals_node = typed_program_node<generate_proposals>;
template <>
class typed_primitive_inst<generate_proposals>
: public typed_primitive_inst_base<generate_proposals> {
using parent = typed_primitive_inst_base<generate_proposals>;
public:
static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param);
static std::string to_string(const generate_proposals_node& node);
typed_primitive_inst(network& network, const generate_proposals_node& node)
: parent(network, node)
{}
memory::ptr output_rois_scores_memory() const { return dep_memory_ptr(4); }
memory::ptr output_rois_nums_memory() const { return dep_memory_ptr(5); }
};
using generate_proposals_inst = typed_primitive_inst<generate_proposals>;
} // namespace cldnn

View File

@ -38,6 +38,7 @@
#include "data_inst.h"
#include "deconvolution_inst.h"
#include "detection_output_inst.h"
#include "generate_proposals_inst.h"
#include "input_layout_inst.h"
#include "shuffle_channels_inst.h"
#include "arg_max_min_inst.h"
@ -1432,7 +1433,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::roll::type_id() &&
prim.type() != cldnn::prior_box::type_id() &&
prim.type() != cldnn::resample::type_id() &&
prim.type() != cldnn::eye::type_id()) {
prim.type() != cldnn::eye::type_id() &&
prim.type() != cldnn::generate_proposals::type_id()) {
can_use_fsv16 = false;
}
@ -1468,7 +1470,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::roll::type_id() &&
prim.type() != cldnn::resample::type_id() &&
prim.type() != cldnn::prior_box::type_id() &&
prim.type() != cldnn::eye::type_id()) {
prim.type() != cldnn::eye::type_id() &&
prim.type() != cldnn::generate_proposals::type_id()) {
can_use_bs_fs_yx_bsv16_fsv16 = false;
}
}

View File

@ -91,6 +91,7 @@ enum class KernelType {
REVERSE,
PRIOR_BOX,
EYE,
GENERATE_PROPOSALS
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -0,0 +1,196 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "generate_proposals_kernel_ref.h"
#include "kernel_selector_utils.h"
namespace kernel_selector {
ParamsKey GenerateProposalsRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableBatching();
k.EnableDifferentTypes();
k.EnableTensorPitches();
return k;
}
KernelsPriority GenerateProposalsRef::GetKernelsPriority(const Params&, const optional_params&) const {
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
}
bool GenerateProposalsRef::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::GENERATE_PROPOSALS
|| o.GetType() != KernelType::GENERATE_PROPOSALS) {
return false;
}
return true;
}
namespace {
constexpr size_t kImInfoInputIdx = 0;
constexpr size_t kAnchorsInputIdx = 1;
constexpr size_t kDeltasInputIdx = 2;
constexpr size_t kScoresInputIdx = 3;
constexpr size_t kRoisScoresOutputIdx = 4;
constexpr size_t kRoisNumsOutputIdx = 5;
GenerateProposalsRef::DispatchData SetDefault(const generate_proposals_params& params, size_t idx) {
GenerateProposalsRef::DispatchData dispatch_data;
const auto& inputs = params.inputs;
const auto num_batches = inputs[kScoresInputIdx].Batch().v;
const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
const auto bottom_W = inputs[kDeltasInputIdx].X().v;
if (idx == 0) {
dispatch_data.gws = {bottom_H, bottom_W, num_batches * anchors_num};
} else if (idx == 1 || idx == 2) {
dispatch_data.gws = {num_batches, 1, 1};
} else if (idx == 3) {
dispatch_data.gws = {1, 1, 1};
}
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
return dispatch_data;
}
} // namespace
void GenerateProposalsRef::SetKernelArguments(
const generate_proposals_params& params,
size_t idx, cldnn::arguments_desc& arguments) const {
switch (idx) {
case 0: { // refine anchors
arguments.push_back({ArgumentDescriptor::Types::INPUT, kImInfoInputIdx});
arguments.push_back({ArgumentDescriptor::Types::INPUT, kAnchorsInputIdx});
arguments.push_back({ArgumentDescriptor::Types::INPUT, kDeltasInputIdx});
arguments.push_back({ArgumentDescriptor::Types::INPUT, kScoresInputIdx});
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
break;
}
case 1: { // sort proposals by score
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
break;
}
case 2: { // NMS
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
break;
}
case 3: { // convert proposals to rois and roi scores
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); // rois
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisScoresOutputIdx}); // roi scores
break;
}
default:
throw std::invalid_argument("generate_proposals has 4 kernels. valid index is 0 ~ 3.");
}
}
KernelsData GenerateProposalsRef::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
constexpr size_t kKernelsNum = 4;
KernelData kd = KernelData::Default<generate_proposals_params>(params, kKernelsNum);
const generate_proposals_params& new_params = static_cast<const generate_proposals_params&>(params);
const auto& inputs = new_params.inputs;
const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
const auto bottom_W = inputs[kDeltasInputIdx].X().v;
const auto scale_w_index = inputs[kImInfoInputIdx].Feature().v == 3 ? 2 : 3;
const auto num_proposals = anchors_num * bottom_H * bottom_W;
const auto pre_nms_topn = std::min(num_proposals, new_params.pre_nms_count);
const auto max_delta_log_wh = static_cast<float>(std::log(1000.0 / 16.0));
kd.internalBufferDataType = Datatype::F32;
const auto num_batches = inputs[kScoresInputIdx].Batch().v;
constexpr size_t kProposalBoxSize = 6; // 6 values: {x0, y0, x1, y1, score, keep}
const auto proposals_buffer_size = num_batches * num_proposals * sizeof(float) * kProposalBoxSize;
kd.internalBufferSizes.push_back(proposals_buffer_size);
const auto out_indices_size = num_batches * new_params.post_nms_count * sizeof(float);
kd.internalBufferSizes.push_back(out_indices_size);
for (size_t i = 0; i < kKernelsNum; ++i) {
const auto dispatchData = SetDefault(new_params, i);
const auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i);
auto cldnn_jit = MakeBaseParamsJitConstants(new_params);
cldnn_jit.AddConstant(MakeJitConstant("GENERATE_PROPOSALS_STAGE_" + std::to_string(i), "true"));
cldnn_jit.AddConstant(MakeJitConstant("PROPOSAL_SIZE", kProposalBoxSize));
cldnn_jit.Merge(MakeTypeJitConstants(new_params.roi_num_type, "ROI_NUM"));
if (new_params.normalized) {
cldnn_jit.AddConstant(MakeJitConstant("NORMALIZED", 1));
}
switch (i) {
case 0: {
cldnn_jit.AddConstants({MakeJitConstant("MIN_SIZE", new_params.min_size),
MakeJitConstant("ANCHORS_NUM", anchors_num),
MakeJitConstant("NUM_PROPOSALS", num_proposals),
MakeJitConstant("BOTTOM_H", bottom_H),
MakeJitConstant("BOTTOM_W", bottom_W),
MakeJitConstant("BOTTOM_AREA", bottom_H * bottom_W),
MakeJitConstant("SCALE_W_INDEX", scale_w_index),
MakeJitConstant("MAX_DELTA_LOG_WH", max_delta_log_wh)
});
break;
}
case 1: {
cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals)});
break;
}
case 2: {
cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals),
MakeJitConstant("PRE_NMS_TOPN", pre_nms_topn),
MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
MakeJitConstant("NMS_THRESHOLD", new_params.nms_threshold),
});
break;
}
case 3: {
cldnn_jit.AddConstants({MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
MakeJitConstant("NUM_PROPOSALS", num_proposals)
});
break;
}
default:
throw std::invalid_argument("GENERATE_PROPOSALS has 4 kernels. valid index is 0 ~ 3.");
}
const auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[i];
KernelBase::CheckDispatchData(kernelName, dispatchData, params.engineInfo.maxWorkGroupSize);
kernel.params.workGroups.global = dispatchData.gws;
kernel.params.workGroups.local = dispatchData.lws;
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
SetKernelArguments(new_params, i, kernel.params.arguments);
}
return {kd};
}
} // namespace kernel_selector

View File

@ -0,0 +1,45 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
namespace kernel_selector {
struct generate_proposals_params : public base_params {
generate_proposals_params()
: base_params(KernelType::GENERATE_PROPOSALS) {}
float min_size{0.0f};
float nms_threshold{0.0f};
size_t pre_nms_count{0};
size_t post_nms_count{0};
bool normalized{true};
float nms_eta{1.0f};
Datatype roi_num_type = Datatype::INT64;
};
struct generate_proposals_optional_params : public optional_params {
generate_proposals_optional_params()
: optional_params(KernelType::GENERATE_PROPOSALS) {}
};
class GenerateProposalsRef : public KernelBaseOpenCL {
public:
GenerateProposalsRef()
: KernelBaseOpenCL("generate_proposals_ref") {}
~GenerateProposalsRef() = default;
using DispatchData = CommonDispatchData;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
void SetKernelArguments(const generate_proposals_params& params,
size_t idx, cldnn::arguments_desc& kernel) const;
};
} // namespace kernel_selector

View File

@ -0,0 +1,21 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "generate_proposals_kernel_selector.h"
#include "generate_proposals_kernel_ref.h"
namespace kernel_selector {
generate_proposals_kernel_selector::generate_proposals_kernel_selector() {
Attach<GenerateProposalsRef>();
}
generate_proposals_kernel_selector& generate_proposals_kernel_selector::Instance() {
static generate_proposals_kernel_selector instance_;
return instance_;
}
KernelsData generate_proposals_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
return GetNaiveBestKernel(params, options, KernelType::GENERATE_PROPOSALS);
}
} // namespace kernel_selector

View File

@ -0,0 +1,18 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_selector.h"
namespace kernel_selector {
class generate_proposals_kernel_selector : public kernel_selector_base {
public:
static generate_proposals_kernel_selector& Instance();
generate_proposals_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,306 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#if INPUT0_TYPE_SIZE == 2 //f16
#define HALF_ONE 0.5h
#else
#define HALF_ONE 0.5f
#endif
#define ZERO INPUT0_VAL_ZERO
#ifdef NORMALIZED
#define COORDINATES_OFFSET INPUT0_VAL_ZERO
#else
#define COORDINATES_OFFSET INPUT0_VAL_ONE
#endif
#ifdef GENERATE_PROPOSALS_STAGE_0
// 0. Refine anchors
KERNEL(generate_proposals_ref_stage_0)
(const __global INPUT0_TYPE* im_info,
const __global INPUT1_TYPE* anchors,
const __global INPUT2_TYPE* deltas,
const __global INPUT3_TYPE* scores,
__global OUTPUT_TYPE* proposals) {
const uint h = get_global_id(0);
const uint w = get_global_id(1);
const uint ba = (uint)get_global_id(2);
const uint batch = ba / INPUT0_FEATURE_NUM;
const uint anchor = ba % INPUT0_FEATURE_NUM;
const INPUT0_TYPE img_H = im_info[INPUT0_GET_INDEX(batch, 0, 0, 0)];
const INPUT0_TYPE img_W = im_info[INPUT0_GET_INDEX(batch, 1, 0, 0)];
const INPUT0_TYPE scale_H = im_info[INPUT0_GET_INDEX(batch, 2, 0, 0)];
const INPUT0_TYPE scale_W = im_info[INPUT0_GET_INDEX(batch, SCALE_W_INDEX, 0, 0)];
const float min_box_H = MIN_SIZE * scale_H;
const float min_box_W = MIN_SIZE * scale_W;
INPUT0_TYPE x0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 0)];
INPUT0_TYPE y0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 1)];
INPUT0_TYPE x1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 2)];
INPUT0_TYPE y1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 3)];
const INPUT0_TYPE dx = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 0 , h, w)];
const INPUT0_TYPE dy = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 1 , h , w)];
const INPUT0_TYPE d_log_w = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 2 , h, w)];
const INPUT0_TYPE d_log_h = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 3 , h, w)];
const INPUT0_TYPE score = scores[INPUT3_GET_INDEX(batch, anchor, h, w)];
// width & height of box
const INPUT0_TYPE ww = x1 - x0 + COORDINATES_OFFSET;
const INPUT0_TYPE hh = y1 - y0 + COORDINATES_OFFSET;
// center location of box
const INPUT0_TYPE ctr_x = x0 + HALF_ONE * ww;
const INPUT0_TYPE ctr_y = y0 + HALF_ONE * hh;
// new center location according to deltas (dx, dy)
const INPUT0_TYPE pred_ctr_x = dx * ww + ctr_x;
const INPUT0_TYPE pred_ctr_y = dy * hh + ctr_y;
// new width & height according to deltas d(log w), d(log h)
const INPUT0_TYPE pred_w = exp(min(d_log_w, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * ww;
const INPUT0_TYPE pred_h = exp(min(d_log_h, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * hh;
// update upper-left corner location
x0 = pred_ctr_x - HALF_ONE * pred_w;
y0 = pred_ctr_y - HALF_ONE * pred_h;
// update lower-right corner location
x1 = pred_ctr_x + HALF_ONE * pred_w - COORDINATES_OFFSET;
y1 = pred_ctr_y + HALF_ONE * pred_h - COORDINATES_OFFSET;
// adjust new corner locations to be within the image region
x0 = max(ZERO, min(x0, img_W - COORDINATES_OFFSET));
y0 = max(ZERO, min(y0, img_H - COORDINATES_OFFSET));
x1 = max(ZERO, min(x1, img_W - COORDINATES_OFFSET));
y1 = max(ZERO, min(y1, img_H - COORDINATES_OFFSET));
// recompute new width & height
const INPUT0_TYPE box_w = x1 - x0 + COORDINATES_OFFSET;
const INPUT0_TYPE box_h = y1 - y0 + COORDINATES_OFFSET;
const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
const uint offset = h * BOTTOM_W + w;
const uint proposal_idx = batch_offset + (offset * ANCHORS_NUM + anchor) * PROPOSAL_SIZE;
proposals[proposal_idx + 0] = x0;
proposals[proposal_idx + 1] = y0;
proposals[proposal_idx + 2] = x1;
proposals[proposal_idx + 3] = y1;
proposals[proposal_idx + 4] = score;
proposals[proposal_idx + 5] = ((min_box_W <= box_w) && (min_box_H <= box_h)) ? 1 : 0;
}
#endif /* GENERATE_PROPOSALS_STAGE_0 */
#ifdef GENERATE_PROPOSALS_STAGE_1
typedef struct __attribute__((__packed__)) {
INPUT0_TYPE x0;
INPUT0_TYPE y0;
INPUT0_TYPE x1;
INPUT0_TYPE y1;
INPUT0_TYPE score;
INPUT0_TYPE keep;
} Box;
inline void FUNC(swap_box)(__global Box* a, __global Box* b) {
const Box temp = *a;
*a = *b;
*b = temp;
}
inline int FUNC(partition)(__global Box* arr, int l, int h) {
INPUT0_TYPE pivotScore = arr[h].score;
int i = (l - 1);
for (int j = l; j <= h - 1; j++) {
if (arr[j].score > pivotScore) {
i++;
FUNC_CALL(swap_box)(&arr[i], &arr[j]);
}
}
FUNC_CALL(swap_box)(&arr[i + 1], &arr[h]);
return (i + 1);
}
inline void FUNC(bubbleSortIterative)(__global Box* arr, int l, int h) {
for (int i = 0; i < h - l; i++) {
bool swapped = false;
for (int j = l; j < h - i; j++) {
if ((arr[j].score > arr[j + 1].score)) {
FUNC_CALL(swap_box)(&arr[j], &arr[j + 1]);
swapped = true;
}
}
if (!swapped)
break;
}
}
inline void FUNC(quickSortIterative)(__global Box* arr, int l, int h) {
// Create an auxiliary stack
const int kStackSize = 100;
int stack[kStackSize];
// initialize top of stack
int top = -1;
// push initial values of l and h to stack
stack[++top] = l;
stack[++top] = h;
// Keep popping from stack while is not empty
while (top >= 0) {
// Pop h and l
h = stack[top--];
l = stack[top--];
// Set pivot element at its correct position
// in sorted array
int p = FUNC_CALL(partition)(arr, l, h);
// If there are elements on left side of pivot,
// then push left side to stack
if (p - 1 > l) {
if (top >= (kStackSize - 1)) {
FUNC_CALL(bubbleSortIterative)(arr, l, p - 1);
} else {
stack[++top] = l;
stack[++top] = p - 1;
}
}
// If there are elements on right side of pivot,
// then push right side to stack
if (p + 1 < h) {
if (top >= (kStackSize - 1)) {
FUNC_CALL(bubbleSortIterative)(arr, p + 1, h);
} else {
stack[++top] = p + 1;
stack[++top] = h;
}
}
}
}
// 1. Sort boxes by scores
KERNEL(generate_proposals_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
const uint batch = get_global_id(0);
__global Box* boxes = (__global Box*)(proposals + batch * NUM_PROPOSALS * PROPOSAL_SIZE);
FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
}
#endif /* GENERATE_PROPOSALS_STAGE_1 */
#ifdef GENERATE_PROPOSALS_STAGE_2
// 2. NMS
KERNEL(generate_proposals_ref_stage_2)
(const __global INPUT0_TYPE* boxes,
__global size_t* out_indices,
__global ROI_NUM_TYPE* num_outputs) {
const uint batch = get_global_id(0);
const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
uint count = 0;
__local uint index_out[INPUT0_BATCH_NUM * POST_NMS_COUNT];
__local bool is_dead[INPUT0_BATCH_NUM * PRE_NMS_TOPN];
for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
is_dead[batch * PRE_NMS_TOPN + box] = boxes[batch_offset + PROPOSAL_SIZE * box + 5] == 0.0f;
}
for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
if (is_dead[batch * PRE_NMS_TOPN + box])
continue;
index_out[batch * POST_NMS_COUNT + count++] = box;
if (count == POST_NMS_COUNT)
break;
const uint box_offset = batch_offset + box * PROPOSAL_SIZE;
const INPUT0_TYPE x0i = boxes[box_offset + 0];
const INPUT0_TYPE y0i = boxes[box_offset + 1];
const INPUT0_TYPE x1i = boxes[box_offset + 2];
const INPUT0_TYPE y1i = boxes[box_offset + 3];
const INPUT0_TYPE a_width = x1i - x0i;
const INPUT0_TYPE a_height = y1i - y0i;
const INPUT0_TYPE a_area = (a_width + COORDINATES_OFFSET) * (a_height + COORDINATES_OFFSET);
for (uint tail = box + 1; tail < PRE_NMS_TOPN; ++tail) {
const uint tail_offset = batch_offset + tail * PROPOSAL_SIZE;
const INPUT0_TYPE x0j = boxes[tail_offset + 0];
const INPUT0_TYPE y0j = boxes[tail_offset + 1];
const INPUT0_TYPE x1j = boxes[tail_offset + 2];
const INPUT0_TYPE y1j = boxes[tail_offset + 3];
const INPUT0_TYPE x0 = max(x0i, x0j);
const INPUT0_TYPE y0 = max(y0i, y0j);
const INPUT0_TYPE x1 = min(x1i, x1j);
const INPUT0_TYPE y1 = min(y1i, y1j);
const INPUT0_TYPE width = x1 - x0 + COORDINATES_OFFSET;
const INPUT0_TYPE height = y1 - y0 + COORDINATES_OFFSET;
const INPUT0_TYPE area = max(ZERO, width) * max(ZERO, height);
const INPUT0_TYPE b_width = x1j - x0j;
const INPUT0_TYPE b_height = y1j - y0j;
const INPUT0_TYPE b_area = (b_width + COORDINATES_OFFSET) * (b_height + COORDINATES_OFFSET);
const INPUT0_TYPE intersection_area = area / (a_area + b_area - area);
if ( (NMS_THRESHOLD < intersection_area) && (x0i <= x1j) && (y0i <= y1j) && (x0j <= x1i) && (y0j <= y1i) ) {
is_dead[batch * PRE_NMS_TOPN + tail] = true;
}
}
}
num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)] = count;
for (uint i = 0; i < count; ++i) {
out_indices[batch * POST_NMS_COUNT + i] = index_out[batch * POST_NMS_COUNT + i];
}
}
#endif /* GENERATE_PROPOSALS_STAGE_2 */
#ifdef GENERATE_PROPOSALS_STAGE_3
// 3. Convert proposals to rois and roi_scores
KERNEL(generate_proposals_ref_stage_3)
(const __global INPUT0_TYPE* boxes,
const __global size_t* out_indices,
const __global ROI_NUM_TYPE* num_outputs,
__global OUTPUT_TYPE* rois,
__global INPUT4_TYPE* roi_scores) {
uint roi_index = 0;
for (uint batch = 0; batch < INPUT0_BATCH_NUM; ++batch) {
for (uint i = 0; i < num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)]; ++i) {
const uint box_index = (batch * NUM_PROPOSALS + out_indices[batch * POST_NMS_COUNT + i]) * PROPOSAL_SIZE;
rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 0];
rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = boxes[box_index + 1];
rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = boxes[box_index + 2];
rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = boxes[box_index + 3];
roi_scores[INPUT4_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 4];
++roi_index;
}
}
// fill the rest of outputs with zeros
while(roi_index < INPUT0_BATCH_NUM * POST_NMS_COUNT) {
rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = 0.0f;
rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = 0.0f;
rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = 0.0f;
rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = 0.0f;
roi_scores[roi_index] = 0.0f;
++roi_index;
}
}
#endif /* GENERATE_PROPOSALS_STAGE_3 */
#undef HALF_ONE
#undef ZERO
#undef COORDINATES_OFFSET

View File

@ -0,0 +1,79 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/primitives/generate_proposals.hpp"
#include <ngraph_ops/generate_proposals_ie_internal.hpp>
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
namespace ov {
namespace intel_gpu {
static void CreateGenerateProposalsIEInternalOp(
Program& p,
const std::shared_ptr<ngraph::op::internal::GenerateProposalsIEInternal>& op) {
validate_inputs_count(op, {4});
if (op->get_output_size() != 3) {
IE_THROW() << "GenerateProposals requires 3 outputs";
}
auto inputs = p.GetInputPrimitiveIDs(op);
const auto& attrs = op->get_attrs();
const auto layer_type_name = layer_type_name_ID(op);
const auto layer_name = layer_type_name + ".out0";
// output 2 - scores
const auto mutable_precision_1 = op->get_output_element_type(1);
const auto output_shape_1 = op->get_output_shape(1);
const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1),
cldnn::format::get_default_format(output_shape_1.size()),
tensor_from_dims(output_shape_1)};
cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)};
const auto mutable_id_w_1 = layer_type_name + "_md_write.1";
const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1};
p.add_primitive(*op, mutable_prim_w_1);
inputs.push_back(mutable_id_w_1);
// output 3 - roisNum
const auto output_shape_2 = op->get_output_shape(2);
const auto mutable_precision_2 = op->get_output_element_type(2);
const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2),
cldnn::format::get_default_format(output_shape_2.size()),
tensor_from_dims(output_shape_2)};
cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)};
const auto mutable_id_w_2 = layer_type_name + "_md_write.2";
const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2};
p.add_primitive(*op, mutable_prim_w_2);
inputs.push_back(mutable_id_w_2);
const cldnn::generate_proposals prim{layer_name,
inputs,
attrs.min_size,
attrs.nms_threshold,
attrs.pre_nms_count,
attrs.post_nms_count,
attrs.normalized,
attrs.nms_eta,
cldnn::element_type_to_data_type(op->get_roi_num_type())};
p.add_primitive(*op, prim);
const auto mutable_id_r_1 = layer_type_name + ".out1";
const cldnn::mutable_data mutable_prim_r_1{mutable_id_r_1, {layer_name}, shared_memory_1};
p.add_primitive(*op, mutable_prim_r_1);
const auto mutable_id_r_2 = layer_type_name + ".out2";
const cldnn::mutable_data mutable_prim_r_2{mutable_id_r_2, {layer_name}, shared_memory_2};
p.add_primitive(*op, mutable_prim_r_2);
}
REGISTER_FACTORY_IMPL(internal, GenerateProposalsIEInternal);
} // namespace intel_gpu
} // namespace ov

View File

@ -74,6 +74,7 @@
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
#include <transformations/op_conversions/convert_gather_0d.hpp>
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
#include "transformations/op_conversions/softmax_decomposition.hpp"
#include <transformations/op_conversions/gelu7_downgrade.hpp>
@ -163,6 +164,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertGP9ToGPIEInternal>();
manager.register_pass<ngraph::pass::ConvertGather0D>();
manager.register_pass<ngraph::pass::ConvertPriorBox8To0, false>();

View File

@ -0,0 +1,461 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/generate_proposals.hpp>
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/mutable_data.hpp>
using namespace cldnn;
using namespace ::tests;
namespace {
template <typename T>
struct GenerateProposalsParams {
float min_size;
float nms_threshold;
int64_t pre_nms_count;
int64_t post_nms_count;
bool normalized;
float nms_eta;
std::vector<T> expected_rois;
std::vector<T> expected_roi_scores;
std::vector<size_t> expected_rois_num;
};
template <typename T>
using GenerateProposalsParamsWithLayout = std::tuple<GenerateProposalsParams<T>, format::type>;
constexpr size_t num_batches = 2;
constexpr size_t image_height = 200;
constexpr size_t image_width = 200;
constexpr float image_scale = 4.0f;
constexpr size_t height = 2;
constexpr size_t width = 6;
constexpr size_t number_of_channels = 3;
constexpr size_t number_of_anchors = 3;
const std::vector<float> im_info{image_height, image_width, image_scale, image_height, image_width, image_scale};
const std::vector<float> anchors{
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f,
16.0f, 17.0f, 18.0f, 19.0f,
20.0f, 21.0f, 22.0f, 23.0f,
24.0f, 25.0f, 26.0f, 27.0f,
28.0f, 29.0f, 30.0f, 31.0f,
32.0f, 33.0f, 34.0f, 35.0f,
36.0f, 37.0f, 38.0f, 39.0f,
40.0f, 41.0f, 42.0f, 43.0f,
44.0f, 45.0f, 46.0f, 47.0f,
48.0f, 49.0f, 50.0f, 51.0f,
52.0f, 53.0f, 54.0f, 55.0f,
56.0f, 57.0f, 58.0f, 59.0f,
60.0f, 61.0f, 62.0f, 63.0f,
64.0f, 65.0f, 66.0f, 67.0f,
68.0f, 69.0f, 70.0f, 71.0f,
72.0f, 73.0f, 74.0f, 75.0f,
76.0f, 77.0f, 78.0f, 79.0f,
80.0f, 81.0f, 82.0f, 83.0f,
84.0f, 85.0f, 86.0f, 87.0f,
88.0f, 89.0f, 90.0f, 91.0f,
92.0f, 93.0f, 94.0f, 95.0f,
96.0f, 97.0f, 98.0f, 99.0f,
100.0f, 101.0f, 102.0f, 103.0f,
104.0f, 105.0f, 106.0f, 107.0f,
108.0f, 109.0f, 110.0f, 111.0f,
112.0f, 113.0f, 114.0f, 115.0f,
116.0f, 117.0f, 118.0f, 119.0f,
120.0f, 121.0f, 122.0f, 123.0f,
124.0f, 125.0f, 126.0f, 127.0f,
128.0f, 129.0f, 130.0f, 131.0f,
132.0f, 133.0f, 134.0f, 135.0f,
136.0f, 137.0f, 138.0f, 139.0f,
140.0f, 141.0f, 142.0f, 143.0f};
const std::vector<float> deltas{
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207,
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207};
const std::vector<float> scores{
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978,
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978};
const std::vector<format::type> layouts{
format::bfyx,
format::b_fs_yx_fsv16,
format::b_fs_yx_fsv32,
format::bs_fs_yx_bsv16_fsv16,
format::bs_fs_yx_bsv32_fsv16,
format::bs_fs_yx_bsv32_fsv32};
template <typename T>
std::vector<T> getValues(const std::vector<float>& values) {
std::vector<T> result(values.begin(), values.end());
return result;
}
template <typename T> float getError();
template<>
float getError<float>() {
return 0.001;
}
template<>
float getError<half_t>() {
return 0.2;
}
template <typename T>
std::vector<GenerateProposalsParams<T>> getGenerateProposalsParams() {
std::vector<GenerateProposalsParams<T>> params = {
{
1.0f, 0.7f, 14, 6, true, 1.0,
getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
0, 1.01395, 4.66909, 5.14337,
135.501, 137.467, 139.81, 141.726,
4.49132, 4.30537, 8.75027, 8.8035,
0, 1.01395, 4.66909, 5.14337,
135.501, 137.467, 139.81, 141.726}),
getValues<T>({0.826641, 0.566372, 0.559075,
0.826641, 0.566372, 0.559075}),
{3, 3}
},
{
1.0f, 0.7f, 1000, 6, true, 1.0,
getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
0, 1.01395, 4.66909, 5.14337,
135.501, 137.467, 139.81, 141.726,
47.2348, 47.8342, 52.5503, 52.3864,
126.483, 128.3, 131.625, 133.707,
4.49132, 4.30537, 8.75027, 8.8035,
0, 1.01395, 4.66909, 5.14337,
135.501, 137.467, 139.81, 141.726,
47.2348, 47.8342, 52.5503, 52.3864,
126.483, 128.3, 131.625, 133.707}),
getValues<T>({0.826641, 0.566372, 0.559075, 0.479851, 0.267999,
0.826641, 0.566372, 0.559075, 0.479851, 0.267999}),
{5, 5}
},
{
0.0f, 0.7f, 14, 6, true, 1.0,
getValues<T>({108.129, 109.37, 111.623, 111.468,
12.9725, 11.6102, 16.4918, 16.9624,
112.883, 113.124, 115.17, 118.213,
4.49132, 4.30537, 8.75027, 8.8035,
24.9778, 25.0318, 27.2283, 28.495,
100.126, 101.409, 102.354, 106.125,
108.129, 109.37, 111.623, 111.468,
12.9725, 11.6102, 16.4918, 16.9624,
112.883, 113.124, 115.17, 118.213,
4.49132, 4.30537, 8.75027, 8.8035,
24.9778, 25.0318, 27.2283, 28.495,
100.126, 101.409, 102.354, 106.125}),
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
{6, 6}
},
{
0.1f, 0.7f, 1000, 6, true, 1.0,
getValues<T>({108.129, 109.37, 111.623, 111.468,
12.9725, 11.6102, 16.4918, 16.9624,
112.883, 113.124, 115.17, 118.213,
4.49132, 4.30537, 8.75027, 8.8035,
24.9778, 25.0318, 27.2283, 28.495,
100.126, 101.409, 102.354, 106.125,
108.129, 109.37, 111.623, 111.468,
12.9725, 11.6102, 16.4918, 16.9624,
112.883, 113.124, 115.17, 118.213,
4.49132, 4.30537, 8.75027, 8.8035,
24.9778, 25.0318, 27.2283, 28.495,
100.126, 101.409, 102.354, 106.125}),
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
{6, 6}
},
{
1.0f, 0.7f, 14, 6, false, 1.0,
getValues<T>({13.4588, 10.9153, 17.7377, 17.9436,
4.73698, 3.95806, 10.1254, 9.70525,
89.5773, 90.0053, 92.9476, 95.3396,
0, 1.02093, 6.00364, 6.21505,
92.3608, 94.306, 96.3198, 98.4288,
135.252, 137.7, 140.716, 143.09,
13.4588, 10.9153, 17.7377, 17.9436,
4.73698, 3.95806, 10.1254, 9.70525,
89.5773, 90.0053, 92.9476, 95.3396,
0, 1.02093, 6.00364, 6.21505,
92.3608, 94.306, 96.3198, 98.4288,
135.252, 137.7, 140.716, 143.09}),
getValues<T>({0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075,
0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075}),
{6, 6}
},
{
0.0f, 0.7f, 1000, 6, false, 1.0,
getValues<T>({108.194, 109.556, 112.435, 111.701,
13.4588, 10.9153, 17.7377, 17.9436,
113.324, 113.186, 115.755, 119.82,
4.73698, 3.95806, 10.1254, 9.70525,
25.4666, 25.0477, 27.8424, 29.2425,
100.188, 101.614, 102.532, 107.687,
108.194, 109.556, 112.435, 111.701,
13.4588, 10.9153, 17.7377, 17.9436,
113.324, 113.186, 115.755, 119.82,
4.73698, 3.95806, 10.1254, 9.70525,
25.4666, 25.0477, 27.8424, 29.2425,
100.188, 101.614, 102.532, 107.687}),
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
{6, 6}
}
};
return params;
}
}; // namespace
template <typename T, typename ROIS_NUM_T>
struct generate_proposals_test
: public ::testing::TestWithParam<GenerateProposalsParamsWithLayout<T> > {
public:
void test() {
GenerateProposalsParams<T> param;
format::type data_layout;
std::tie(param, data_layout) = this->GetParam();
const bool need_reorder = data_layout != format::bfyx;
const auto data_type = type_to_data_type<T>::value;
const auto rois_num_type = type_to_data_type<ROIS_NUM_T>::value;
auto& engine = get_test_engine();
const primitive_id input_im_info_id = "InputImInfo";
const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}});
set_values(input_im_info, getValues<T>(im_info));
const primitive_id input_anchors_id = "InputAnchors";
auto input_anchors = engine.allocate_memory(
{data_type, format::bfyx, tensor{batch(height), feature(width), spatial(4, number_of_anchors)}});
set_values(input_anchors, getValues<T>(anchors));
const primitive_id input_deltas_id = "InputDeltas";
auto input_deltas = engine.allocate_memory(
{data_type, format::bfyx,
tensor{batch(num_batches), feature(number_of_anchors * 4), spatial(width, height)}});
set_values(input_deltas, getValues<T>(deltas));
const primitive_id input_scores_id = "InputScores";
auto input_scores = engine.allocate_memory(
{data_type, format::bfyx, tensor{batch(num_batches), feature(number_of_anchors), spatial(width, height)}});
set_values(input_scores, getValues<T>(scores));
const primitive_id output_roi_scores_id = "OutputRoiScores";
const layout rois_scores_layout{data_type, data_layout, tensor{batch(num_batches * param.post_nms_count)}};
auto output_roi_scores = engine.allocate_memory(rois_scores_layout);
const primitive_id output_rois_num_id = "OutputRoisNum";
const layout rois_num_layout{rois_num_type, data_layout, tensor{batch(num_batches)}};
auto output_rois_num = engine.allocate_memory(rois_num_layout);
const primitive_id reorder_im_info_id = input_im_info_id + "Reordered";
const primitive_id reorder_anchors_id = input_anchors_id + "Reordered";
const primitive_id reorder_deltas_id = input_deltas_id + "Reordered";
const primitive_id reorder_scores_id = input_scores_id + "Reordered";
topology topology;
topology.add(input_layout{input_im_info_id, input_im_info->get_layout()});
topology.add(input_layout{input_anchors_id, input_anchors->get_layout()});
topology.add(input_layout{input_deltas_id, input_deltas->get_layout()});
topology.add(input_layout{input_scores_id, input_scores->get_layout()});
topology.add(mutable_data{output_roi_scores_id, output_roi_scores});
topology.add(mutable_data{output_rois_num_id, output_rois_num});
topology.add(reorder(reorder_im_info_id, input_im_info_id, data_layout, data_type));
topology.add(reorder(reorder_anchors_id, input_anchors_id, data_layout, data_type));
topology.add(reorder(reorder_deltas_id, input_deltas_id, data_layout, data_type));
topology.add(reorder(reorder_scores_id, input_scores_id, data_layout, data_type));
const primitive_id generate_proposals_id = "generate_proposals";
const std::vector<primitive_id> inputs{ reorder_im_info_id, reorder_anchors_id, reorder_deltas_id,
reorder_scores_id, output_roi_scores_id, output_rois_num_id};
const auto generate_proposals_primitive = generate_proposals{
generate_proposals_id,
inputs,
param.min_size,
param.nms_threshold,
param.pre_nms_count,
param.post_nms_count,
param.normalized,
param.nms_eta,
rois_num_type};
topology.add(generate_proposals_primitive);
const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
topology.add(reorder(reorder_result_id, generate_proposals_id, format::bfyx, data_type));
network network{engine, topology};
network.set_input_data(input_im_info_id, input_im_info);
network.set_input_data(input_anchors_id, input_anchors);
network.set_input_data(input_deltas_id, input_deltas);
network.set_input_data(input_scores_id, input_scores);
const auto outputs = network.execute();
const auto rois = outputs.at(reorder_result_id).get_memory();
const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4);
const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) {
if (!need_reorder) {
return mem;
}
cldnn::topology reorder_topology;
reorder_topology.add(input_layout("data", from_layout));
reorder_topology.add(reorder("plane_data", "data", format::bfyx, data_type));
cldnn::network reorder_net{engine, reorder_topology};
reorder_net.set_input_data("data", mem);
const auto second_output_result = reorder_net.execute();
const auto plane_data_mem = second_output_result.at("plane_data").get_memory();
return plane_data_mem;
};
const cldnn::mem_lock<T> roi_scores_ptr(
get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream());
ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count);
const cldnn::mem_lock<ROIS_NUM_T> rois_num_ptr(
get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream());
ASSERT_EQ(rois_num_ptr.size(), num_batches);
const auto& expected_rois = param.expected_rois;
const auto& expected_roi_scores = param.expected_roi_scores;
const auto& expected_rois_num = param.expected_rois_num;
for (size_t j = 0; j < expected_rois_num.size(); ++j) {
EXPECT_EQ(expected_rois_num[j], rois_num_ptr[j]) << "j=" << j;
}
for (auto i = 0; i < param.post_nms_count; ++i) {
EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], getError<T>()) << "i=" << i;
if (static_cast<float>(expected_roi_scores[i]) != 0.0f) {
for (size_t coord = 0; coord < 4; ++coord) {
const auto roi_idx = i * 4 + coord;
EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError<T>()) << "i=" << i << ", coord=" << coord;
}
}
}
}
};
using f32_i32 = generate_proposals_test<float, int32_t>;
TEST_P(f32_i32, f32_i32) {
test();
}
INSTANTIATE_TEST_SUITE_P(
generate_proposals_gpu_test,
f32_i32,
::testing::Combine(
::testing::ValuesIn(getGenerateProposalsParams<float>()),
::testing::ValuesIn(layouts)
));
using f32_i64 = generate_proposals_test<float, int64_t>;
TEST_P(f32_i64, f32_i64) {
test();
}
INSTANTIATE_TEST_SUITE_P(
generate_proposals_gpu_test,
f32_i64,
::testing::Combine(
::testing::ValuesIn(getGenerateProposalsParams<float>()),
::testing::ValuesIn(layouts)
));
using f16_i32 = generate_proposals_test<half_t, int32_t>;
TEST_P(f16_i32, f16_i32) {
test();
}
INSTANTIATE_TEST_SUITE_P(
generate_proposals_gpu_test,
f16_i32,
::testing::Combine(
::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
::testing::ValuesIn(layouts)
));
using f16_i64 = generate_proposals_test<half_t, int64_t>;
TEST_P(f16_i64, f16_i64) {
test();
}
INSTANTIATE_TEST_SUITE_P(
generate_proposals_gpu_test,
f16_i64,
::testing::Combine(
::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
::testing::ValuesIn(layouts)
));

View File

@ -0,0 +1,197 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/generate_proposals.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"
using namespace ov::test;
using namespace ov::test::subgraph;
namespace {
const std::vector<float> min_size = { 1.0f, 0.0f };
const std::vector<float> nms_threshold = { 0.7f };
const std::vector<int64_t> post_nms_count = { 6 };
const std::vector<int64_t> pre_nms_count = { 14, 1000 };
template <typename T>
const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> getInputTensors() {
const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> input_tensors = {
{
"empty",
{
// 3
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f}),
// 2 x 6 x 3 x 4 = 144
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, std::vector<T>(144, 1.0f)),
// 2 x 12 x 2 x 6 = 144 * 2
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, std::vector<T>(288, 1.0f)),
// {2 x 3 x 2 x 6} = 36 * 2
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f,
5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f})
}
},
{
"filled2",
{
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {200.0, 200.0, 4.0, 200.0, 200.0, 4.0}),
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, {0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f,
16.0f, 17.0f, 18.0f, 19.0f,
20.0f, 21.0f, 22.0f, 23.0f,
24.0f, 25.0f, 26.0f, 27.0f,
28.0f, 29.0f, 30.0f, 31.0f,
32.0f, 33.0f, 34.0f, 35.0f,
36.0f, 37.0f, 38.0f, 39.0f,
40.0f, 41.0f, 42.0f, 43.0f,
44.0f, 45.0f, 46.0f, 47.0f,
48.0f, 49.0f, 50.0f, 51.0f,
52.0f, 53.0f, 54.0f, 55.0f,
56.0f, 57.0f, 58.0f, 59.0f,
60.0f, 61.0f, 62.0f, 63.0f,
64.0f, 65.0f, 66.0f, 67.0f,
68.0f, 69.0f, 70.0f, 71.0f,
72.0f, 73.0f, 74.0f, 75.0f,
76.0f, 77.0f, 78.0f, 79.0f,
80.0f, 81.0f, 82.0f, 83.0f,
84.0f, 85.0f, 86.0f, 87.0f,
88.0f, 89.0f, 90.0f, 91.0f,
92.0f, 93.0f, 94.0f, 95.0f,
96.0f, 97.0f, 98.0f, 99.0f,
100.0f, 101.0f, 102.0f, 103.0f,
104.0f, 105.0f, 106.0f, 107.0f,
108.0f, 109.0f, 110.0f, 111.0f,
112.0f, 113.0f, 114.0f, 115.0f,
116.0f, 117.0f, 118.0f, 119.0f,
120.0f, 121.0f, 122.0f, 123.0f,
124.0f, 125.0f, 126.0f, 127.0f,
128.0f, 129.0f, 130.0f, 131.0f,
132.0f, 133.0f, 134.0f, 135.0f,
136.0f, 137.0f, 138.0f, 139.0f,
140.0f, 141.0f, 142.0f, 143.0f}),
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, {
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207,
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}),
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978,
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}),
}
}
};
return input_tensors;
}
constexpr size_t num_batches = 2;
constexpr size_t height = 2;
constexpr size_t width = 6;
constexpr size_t number_of_anchors = 3;
const std::vector<std::vector<InputShape>> input_shape = {
// im_info / anchors / boxesdeltas / scores
static_shapes_to_test_representation({{num_batches, 3},
{height, width, number_of_anchors, 4},
{num_batches, number_of_anchors * 4, height, width},
{num_batches, number_of_anchors, height, width}}),
};
INSTANTIATE_TEST_SUITE_P(
smoke_GenerateProposalsLayerTest_f16,
GenerateProposalsLayerTest,
::testing::Combine(
::testing::ValuesIn(input_shape),
::testing::ValuesIn(min_size),
::testing::ValuesIn(nms_threshold),
::testing::ValuesIn(post_nms_count),
::testing::ValuesIn(pre_nms_count),
::testing::ValuesIn({true}),
::testing::ValuesIn(getInputTensors<ov::float16>()),
::testing::ValuesIn({ov::element::Type_t::f16}),
::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GenerateProposalsLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(
smoke_GenerateProposalsLayerTest_f32,
GenerateProposalsLayerTest,
::testing::Combine(
::testing::ValuesIn(input_shape),
::testing::ValuesIn(min_size),
::testing::ValuesIn(nms_threshold),
::testing::ValuesIn(post_nms_count),
::testing::ValuesIn(pre_nms_count),
::testing::ValuesIn({false}),
::testing::ValuesIn(getInputTensors<float>()),
::testing::ValuesIn({ov::element::Type_t::f32}),
::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GenerateProposalsLayerTest::getTestCaseName);
} // namespace

View File

@ -16,6 +16,7 @@ typedef std::tuple<
float, // nms_threshold: specifies NMS threshold
int64_t, // post_nms_count: number of top-n proposals after NMS
int64_t, // pre_nms_count: number of top-n proposals after NMS
bool, // normalized: specifies whether box is normalized or not
std::pair<std::string, std::vector<ov::Tensor>>, // input tensors
ElementType, // Network precision
ElementType, // roi_num precision
@ -28,6 +29,7 @@ class GenerateProposalsLayerTest :
protected:
void SetUp() override;
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual) override;
public:
static std::string getTestCaseName(const testing::TestParamInfo<GenerateProposalsTestParams>& obj);

View File

@ -3,6 +3,7 @@
//
#include "shared_test_classes/single_layer/generate_proposals.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"
@ -16,8 +17,10 @@ std::ostream& operator <<(
const ov::op::v9::GenerateProposals::Attributes& attributes) {
ss << "score_threshold=" << attributes.min_size << "_";
ss << "nms_threshold=" << attributes.nms_threshold << "_";
ss << "max_delta_log_wh=" << attributes.post_nms_count << "_";
ss << "num_classes=" << attributes.pre_nms_count;
ss << "post_nms_count=" << attributes.post_nms_count << "_";
ss << "pre_nms_count=" << attributes.pre_nms_count;
ss << "normalized=" << attributes.normalized;
ss << "nms_eta=" << attributes.nms_eta;
return ss;
}
} // namespace
@ -36,6 +39,7 @@ std::string GenerateProposalsLayerTest::getTestCaseName(
attributes.nms_threshold,
attributes.post_nms_count,
attributes.pre_nms_count,
attributes.normalized,
inputTensors,
netPrecision,
roiNumPrecision,
@ -70,6 +74,7 @@ void GenerateProposalsLayerTest::SetUp() {
attributes.nms_threshold,
attributes.post_nms_count,
attributes.pre_nms_count,
attributes.normalized,
inputTensors,
netPrecision,
roiNumPrecision,
@ -77,6 +82,13 @@ void GenerateProposalsLayerTest::SetUp() {
inType = outType = netPrecision;
targetDevice = targetName;
if (targetDevice == CommonTestUtils::DEVICE_GPU) {
if (netPrecision == element::Type_t::f16) {
abs_threshold = 0.2;
} else {
abs_threshold = 0.00009;
}
}
init_input_shapes(inputShapes);
@ -97,7 +109,7 @@ void GenerateProposalsLayerTest::SetUp() {
}
void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
auto inputTensors = std::get<5>(GetParam());
auto inputTensors = std::get<6>(GetParam());
inputs.clear();
const auto& funcInputs = function->inputs();
@ -110,6 +122,63 @@ void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape
}
}
void GenerateProposalsLayerTest::compare(const std::vector<ov::Tensor>& expected,
const std::vector<ov::Tensor>& actual) {
if (targetDevice != CommonTestUtils::DEVICE_GPU) {
SubgraphBaseTest::compare(expected, actual);
return;
}
const auto outputsNum = expected.size();
ASSERT_EQ(outputsNum, 3);
ASSERT_EQ(outputsNum, actual.size());
ASSERT_EQ(outputsNum, function->get_results().size());
// actual outputs 0 (rois) and 1 (roi_scores) may be padded with zeros
for (size_t i = 0; i < 2; ++i) {
const auto expectedNumRois = expected[i].get_shape()[0];
const auto actualNumRois = actual[i].get_shape()[0];
ASSERT_LE(expectedNumRois, actualNumRois);
const auto actualBuffer = static_cast<uint8_t*>(actual[i].data());
const auto expectedBuffer = static_cast<uint8_t*>(expected[i].data());
const auto outputSize = i == 0 ? 4 : 1;
if (outType == element::Type_t::f32) {
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer),
reinterpret_cast<const float*>(actualBuffer),
expectedNumRois * outputSize,
rel_threshold,
abs_threshold);
} else {
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float16*>(expectedBuffer),
reinterpret_cast<const float16*>(actualBuffer),
expectedNumRois * outputSize,
rel_threshold,
abs_threshold);
}
if (expectedNumRois < actualNumRois) {
if (outType == element::Type_t::f32) {
const auto fBuffer = static_cast<const float*>(actual[i].data());
for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
ASSERT_TRUE(fBuffer[j] == 0.0f)
<< "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
}
} else {
const float16 zero{0};
const auto fBuffer = static_cast<const float16*>(actual[i].data());
for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
ASSERT_TRUE(fBuffer[j] == zero)
<< "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
}
}
}
}
// output 2 - rois_num
ov::test::utils::compare(expected[2], actual[2], abs_threshold, rel_threshold);
}
} // namespace subgraph
} // namespace test
} // namespace ov