[GPU] GenerateProposals-9 (#11994)
* add static_output attribute to enable GPU implementation of GenerateProposals. This change may be reverted when dynamic shapes support will be implemented in GPU plugin. * - add GPU single layer test; - add normalized attribute to SLT. * add GPU primitive for GenerateProposals * add kernel for GenerateProposals * add unit test for GenerateProposals * add blocked layouts support * tidy up * support blocked layouts also for 2nd and 3d outputs * Transformation to deal with dynamic output in GPU - revert changes in GenerateProposals interface and shape inferenece; - add transformation; - custom compare method in single-layer test. * address review comments * fix after rebase * fix after rebase * review comment: added nms_eta to testcase name generation * - added input types combination to functional tests; - fix float16 comparison. * fix after rebase * use vector for input ids * fix after rebase
This commit is contained in:
parent
b2e35592f7
commit
66b5f9acf2
@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
#include "ngraph/op/generate_proposals.hpp"
|
||||
#include "ngraph/op/op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace op {
|
||||
namespace internal {
|
||||
|
||||
class TRANSFORMATIONS_API GenerateProposalsIEInternal : public op::v9::GenerateProposals {
|
||||
using Base = op::v9::GenerateProposals;
|
||||
|
||||
public:
|
||||
OPENVINO_OP("GenerateProposalsIEInternal", "ie_internal_opset");
|
||||
BWDCMP_RTTI_DECLARATION;
|
||||
|
||||
GenerateProposalsIEInternal() = default;
|
||||
|
||||
GenerateProposalsIEInternal(const Output<Node>& im_info,
|
||||
const Output<Node>& anchors,
|
||||
const Output<Node>& deltas,
|
||||
const Output<Node>& scores,
|
||||
const Attributes& attrs,
|
||||
const element::Type& roi_num_type = element::i64);
|
||||
|
||||
void validate_and_infer_types() override;
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace op
|
||||
} // namespace ngraph
|
@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API ConvertGP9ToGPIEInternal;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::ConvertGP9ToGPIEInternal : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ConvertGP9ToGPIEInternal", "0");
|
||||
ConvertGP9ToGPIEInternal();
|
||||
};
|
@ -0,0 +1,58 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_ops/generate_proposals_ie_internal.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset9.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
BWDCMP_RTTI_DEFINITION(op::internal::GenerateProposalsIEInternal);
|
||||
|
||||
op::internal::GenerateProposalsIEInternal::GenerateProposalsIEInternal(const Output<Node>& im_info,
|
||||
const Output<Node>& anchors,
|
||||
const Output<Node>& deltas,
|
||||
const Output<Node>& scores,
|
||||
const Attributes& attrs,
|
||||
const element::Type& roi_num_type)
|
||||
: Base(im_info, anchors, deltas, scores, attrs, roi_num_type) {
|
||||
validate_and_infer_types();
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> op::internal::GenerateProposalsIEInternal::clone_with_new_inputs(
|
||||
const ngraph::OutputVector& new_args) const {
|
||||
INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_clone_with_new_inputs);
|
||||
check_new_args_count(this, new_args);
|
||||
return make_shared<op::internal::GenerateProposalsIEInternal>(new_args.at(0),
|
||||
new_args.at(1),
|
||||
new_args.at(2),
|
||||
new_args.at(3),
|
||||
get_attrs(),
|
||||
get_roi_num_type());
|
||||
}
|
||||
|
||||
void op::internal::GenerateProposalsIEInternal::validate_and_infer_types() {
|
||||
INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_validate_and_infer_types);
|
||||
Base::validate_and_infer_types();
|
||||
|
||||
const auto im_info_shape = get_input_partial_shape(0);
|
||||
const auto num_batches = im_info_shape[0];
|
||||
NODE_VALIDATION_CHECK(this, num_batches.is_static(), "Number of batches must be static");
|
||||
|
||||
const Dimension post_nms_count{get_attrs().post_nms_count};
|
||||
const auto first_dim_shape = num_batches * post_nms_count;
|
||||
|
||||
const auto rois_shape = ov::PartialShape({first_dim_shape, 4});
|
||||
const auto scores_shape = ov::PartialShape({first_dim_shape});
|
||||
const auto roisnum_shape = ov::PartialShape({num_batches});
|
||||
|
||||
const auto input_type = get_input_element_type(0);
|
||||
set_output_type(0, input_type, rois_shape);
|
||||
set_output_type(1, input_type, scores_shape);
|
||||
set_output_type(2, get_roi_num_type(), roisnum_shape);
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset9.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph_ops/generate_proposals_ie_internal.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
ngraph::pass::ConvertGP9ToGPIEInternal::ConvertGP9ToGPIEInternal() {
|
||||
ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
|
||||
const auto root = m.get_match_root();
|
||||
const auto old_node = std::dynamic_pointer_cast<ngraph::opset9::GenerateProposals>(root);
|
||||
if (!old_node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto& i : old_node->inputs()) {
|
||||
if (i.get_partial_shape().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
NodeVector new_ops;
|
||||
|
||||
auto new_node =
|
||||
std::make_shared<ngraph::op::internal::GenerateProposalsIEInternal>(old_node->input_value(0),
|
||||
old_node->input_value(1),
|
||||
old_node->input_value(2),
|
||||
old_node->input_value(3),
|
||||
old_node->get_attrs(),
|
||||
old_node->get_roi_num_type());
|
||||
|
||||
new_ops.push_back(new_node);
|
||||
Output<ngraph::Node> output_0 = new_node->output(0);
|
||||
new_ops.emplace_back(output_0.get_node_shared_ptr());
|
||||
Output<ngraph::Node> output_1 = new_node->output(1);
|
||||
new_ops.emplace_back(output_1.get_node_shared_ptr());
|
||||
Output<ngraph::Node> output_2 = new_node->output(2);
|
||||
new_ops.emplace_back(output_2.get_node_shared_ptr());
|
||||
|
||||
new_node->set_friendly_name(old_node->get_friendly_name());
|
||||
copy_runtime_info(old_node, new_ops);
|
||||
replace_node(old_node, {output_0, output_1, output_2});
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto generate_proposals = ngraph::pattern::wrap_type<ngraph::opset9::GenerateProposals>();
|
||||
const auto matcher = std::make_shared<ngraph::pattern::Matcher>(generate_proposals, "ConvertGP9ToGPIEInternal");
|
||||
register_matcher(matcher, callback);
|
||||
}
|
@ -38,6 +38,7 @@
|
||||
#include <transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp>
|
||||
#include <transformations/disable_decompression_convert_constant_folding.hpp>
|
||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||
#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
|
||||
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
|
||||
#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
|
||||
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
|
||||
@ -458,6 +459,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
|
||||
manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(false);
|
||||
manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false);
|
||||
manager.register_pass<::ngraph::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<::ngraph::pass::ConvertGP9ToGPIEInternal>();
|
||||
manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
|
||||
manager.register_pass<::ov::pass::DisableDecompressionConvertConstantFolding>();
|
||||
manager.register_pass<::ngraph::pass::ConstantFolding>();
|
||||
|
@ -226,6 +226,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::ValuesIn(nms_threshold),
|
||||
::testing::ValuesIn(post_nms_count),
|
||||
::testing::ValuesIn(pre_nms_count),
|
||||
::testing::ValuesIn({true, false}),
|
||||
::testing::ValuesIn(inputTensors),
|
||||
::testing::Values(ov::element::Type_t::f32),
|
||||
::testing::Values(ov::element::Type_t::i32),
|
||||
|
@ -245,3 +245,4 @@ REGISTER_FACTORY(v9, Eye);
|
||||
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
REGISTER_FACTORY(internal, GenerateProposalsIEInternal);
|
||||
|
@ -0,0 +1,82 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief generate proposals
|
||||
struct generate_proposals
|
||||
: public primitive_base<generate_proposals> {
|
||||
CLDNN_DECLARE_PRIMITIVE(generate_proposals)
|
||||
|
||||
/// @brief Constructs generate_proposals primitive
|
||||
/// @param id This primitive id
|
||||
/// @param input_im_info image size info
|
||||
/// @param input_anchors anchors
|
||||
/// @param input_deltas deltas for anchors
|
||||
/// @param input_scores proposal scores
|
||||
/// @param output_rois_scores ROIs scores
|
||||
/// @param output_rois_num number of proposed ROIs
|
||||
/// @param min_size minimum box width and height
|
||||
/// @param nms_threshold threshold to be used in NonMaxSuppression stage
|
||||
/// @param pre_nms_count number of top-n proposals before NMS
|
||||
/// @param post_nms_count number of top-n proposals after NMS
|
||||
/// @param normalized indicates whether proposal bboxes are normalized
|
||||
/// @param nms_eta eta parameter for adaptive NMS
|
||||
/// @param roi_num_type type of 3rd output elements
|
||||
generate_proposals(const primitive_id& id,
|
||||
const std::vector<primitive_id>& inputs,
|
||||
float min_size,
|
||||
float nms_threshold,
|
||||
int64_t pre_nms_count,
|
||||
int64_t post_nms_count,
|
||||
bool normalized,
|
||||
float nms_eta,
|
||||
const data_types roi_num_type,
|
||||
const padding& output_padding = {}) :
|
||||
primitive_base{id, inputs, output_padding},
|
||||
output_rois_scores{inputs[4]},
|
||||
output_rois_num{inputs[5]},
|
||||
min_size{min_size},
|
||||
nms_threshold{nms_threshold},
|
||||
pre_nms_count{pre_nms_count},
|
||||
post_nms_count{post_nms_count},
|
||||
normalized{normalized},
|
||||
nms_eta{nms_eta},
|
||||
roi_num_type{roi_num_type} {}
|
||||
|
||||
primitive_id output_rois_scores;
|
||||
primitive_id output_rois_num;
|
||||
float min_size;
|
||||
float nms_threshold;
|
||||
int64_t pre_nms_count;
|
||||
int64_t post_nms_count;
|
||||
bool normalized;
|
||||
float nms_eta;
|
||||
data_types roi_num_type;
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
if (!output_rois_scores.empty())
|
||||
ret.push_back(output_rois_scores);
|
||||
if (!output_rois_num.empty())
|
||||
ret.push_back(output_rois_num);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
43
src/plugins/intel_gpu/src/graph/generate_proposals.cpp
Normal file
43
src/plugins/intel_gpu/src/graph/generate_proposals.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "generate_proposals_inst.h"
|
||||
#include "primitive_type_base.h"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "json_object.h"
|
||||
#include <string>
|
||||
|
||||
namespace cldnn {
|
||||
primitive_type_id generate_proposals::type_id() {
|
||||
static primitive_type_base<generate_proposals> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
layout generate_proposals_inst::calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param) {
|
||||
const layout data_layout = impl_param.get_input_layout();
|
||||
const auto num_batches = data_layout.batch();
|
||||
const auto desc = impl_param.typed_desc<generate_proposals>();
|
||||
return layout(data_layout.data_type, data_layout.format, {static_cast<int>(num_batches * desc->post_nms_count), 4, 1, 1});
|
||||
}
|
||||
|
||||
std::string generate_proposals_inst::to_string(const generate_proposals_node& node) {
|
||||
auto desc = node.get_primitive();
|
||||
|
||||
std::stringstream primitive_description;
|
||||
|
||||
json_composite info;
|
||||
info.add("min_size", desc->min_size);
|
||||
info.add("nms_threshold", desc->nms_threshold);
|
||||
info.add("pre_nms_count", desc->pre_nms_count);
|
||||
info.add("post_nms_count", desc->post_nms_count);
|
||||
info.add("normalized", desc->normalized);
|
||||
info.add("nms_eta", desc->nms_eta);
|
||||
|
||||
auto node_info = node.desc_to_json();
|
||||
node_info->add("generate_proposals_info", info);
|
||||
node_info->dump(primitive_description);
|
||||
|
||||
return primitive_description.str();
|
||||
}
|
||||
} // namespace cldnn
|
@ -0,0 +1,89 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "generate_proposals_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "generate_proposals/generate_proposals_kernel_selector.h"
|
||||
#include "generate_proposals/generate_proposals_kernel_ref.h"
|
||||
|
||||
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
struct generate_proposals_impl
|
||||
: public typed_primitive_impl_ocl<generate_proposals> {
|
||||
using parent = typed_primitive_impl_ocl<generate_proposals>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<generate_proposals_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<generate_proposals>& instance, int32_t) const override {
|
||||
auto args = parent::get_arguments(instance, 0);
|
||||
args.inputs.push_back(instance.output_rois_scores_memory());
|
||||
args.inputs.push_back(instance.output_rois_nums_memory());
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const generate_proposals_node& arg, const kernel_impl_params& impl_param) {
|
||||
auto params = get_default_params<kernel_selector::generate_proposals_params>(impl_param);
|
||||
auto optional_params = get_default_optional_params<
|
||||
kernel_selector::generate_proposals_optional_params>(arg.get_program());
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
|
||||
params.min_size = primitive->min_size;
|
||||
params.nms_threshold = primitive->nms_threshold;
|
||||
params.pre_nms_count = primitive->pre_nms_count;
|
||||
params.post_nms_count = primitive->post_nms_count;
|
||||
params.normalized = primitive->normalized;
|
||||
params.nms_eta = primitive->nms_eta;
|
||||
params.roi_num_type = primitive->roi_num_type == cldnn::data_types::i32 ?
|
||||
kernel_selector::Datatype::INT32 : kernel_selector::Datatype::INT64;
|
||||
|
||||
params.inputs.push_back(convert_data_tensor(arg.anchors().get_output_layout()));
|
||||
params.inputs.push_back(convert_data_tensor(arg.deltas().get_output_layout()));
|
||||
params.inputs.push_back(convert_data_tensor(arg.scores().get_output_layout()));
|
||||
|
||||
params.inputs.push_back(convert_data_tensor(arg.output_rois_scores_node().get_output_layout()));
|
||||
params.inputs.push_back(convert_data_tensor(arg.output_rois_nums_node().get_output_layout()));
|
||||
|
||||
const auto& kernel_selector = kernel_selector::generate_proposals_kernel_selector::Instance();
|
||||
const auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"best_kernels.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new generate_proposals_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
attach_generate_proposals_impl::attach_generate_proposals_impl() {
|
||||
implementation_map<generate_proposals>::add(impl_types::ocl,
|
||||
generate_proposals_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32)});
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -41,6 +41,7 @@ void register_implementations() {
|
||||
REGISTER_OCL(gather_elements);
|
||||
REGISTER_OCL(gather_nd);
|
||||
REGISTER_OCL(gemm);
|
||||
REGISTER_OCL(generate_proposals);
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
|
@ -119,6 +119,7 @@ REGISTER_OCL(gather);
|
||||
REGISTER_OCL(gather_nd);
|
||||
REGISTER_OCL(gather_elements);
|
||||
REGISTER_OCL(gemm);
|
||||
REGISTER_OCL(generate_proposals);
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
|
@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
#include "intel_gpu/primitives/generate_proposals.hpp"
|
||||
#include "primitive_inst.h"
|
||||
|
||||
namespace cldnn {
|
||||
template <>
|
||||
struct typed_program_node<generate_proposals>
|
||||
: public typed_program_node_base<generate_proposals> {
|
||||
using parent = typed_program_node_base<generate_proposals>;
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
|
||||
program_node& anchors() const { return get_dependency(1); }
|
||||
program_node& deltas() const { return get_dependency(2); }
|
||||
program_node& scores() const { return get_dependency(3); }
|
||||
|
||||
program_node& output_rois_scores_node() const { return get_dependency(4); }
|
||||
program_node& output_rois_nums_node() const { return get_dependency(5); }
|
||||
};
|
||||
|
||||
using generate_proposals_node = typed_program_node<generate_proposals>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<generate_proposals>
|
||||
: public typed_primitive_inst_base<generate_proposals> {
|
||||
using parent = typed_primitive_inst_base<generate_proposals>;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(const generate_proposals_node& node);
|
||||
|
||||
typed_primitive_inst(network& network, const generate_proposals_node& node)
|
||||
: parent(network, node)
|
||||
{}
|
||||
|
||||
memory::ptr output_rois_scores_memory() const { return dep_memory_ptr(4); }
|
||||
memory::ptr output_rois_nums_memory() const { return dep_memory_ptr(5); }
|
||||
};
|
||||
|
||||
using generate_proposals_inst = typed_primitive_inst<generate_proposals>;
|
||||
} // namespace cldnn
|
@ -38,6 +38,7 @@
|
||||
#include "data_inst.h"
|
||||
#include "deconvolution_inst.h"
|
||||
#include "detection_output_inst.h"
|
||||
#include "generate_proposals_inst.h"
|
||||
#include "input_layout_inst.h"
|
||||
#include "shuffle_channels_inst.h"
|
||||
#include "arg_max_min_inst.h"
|
||||
@ -1432,7 +1433,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::roll::type_id() &&
|
||||
prim.type() != cldnn::prior_box::type_id() &&
|
||||
prim.type() != cldnn::resample::type_id() &&
|
||||
prim.type() != cldnn::eye::type_id()) {
|
||||
prim.type() != cldnn::eye::type_id() &&
|
||||
prim.type() != cldnn::generate_proposals::type_id()) {
|
||||
can_use_fsv16 = false;
|
||||
}
|
||||
|
||||
@ -1468,7 +1470,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::roll::type_id() &&
|
||||
prim.type() != cldnn::resample::type_id() &&
|
||||
prim.type() != cldnn::prior_box::type_id() &&
|
||||
prim.type() != cldnn::eye::type_id()) {
|
||||
prim.type() != cldnn::eye::type_id() &&
|
||||
prim.type() != cldnn::generate_proposals::type_id()) {
|
||||
can_use_bs_fs_yx_bsv16_fsv16 = false;
|
||||
}
|
||||
}
|
||||
|
@ -91,6 +91,7 @@ enum class KernelType {
|
||||
REVERSE,
|
||||
PRIOR_BOX,
|
||||
EYE,
|
||||
GENERATE_PROPOSALS
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -0,0 +1,196 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "generate_proposals_kernel_ref.h"
|
||||
#include "kernel_selector_utils.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey GenerateProposalsRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
|
||||
k.EnableInputDataType(Datatype::INT32);
|
||||
k.EnableInputDataType(Datatype::INT64);
|
||||
k.EnableOutputDataType(Datatype::INT32);
|
||||
k.EnableOutputDataType(Datatype::INT64);
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableAllInputLayout();
|
||||
k.EnableAllOutputLayout();
|
||||
k.EnableBatching();
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableTensorPitches();
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
KernelsPriority GenerateProposalsRef::GetKernelsPriority(const Params&, const optional_params&) const {
|
||||
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
}
|
||||
|
||||
bool GenerateProposalsRef::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::GENERATE_PROPOSALS
|
||||
|| o.GetType() != KernelType::GENERATE_PROPOSALS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
constexpr size_t kImInfoInputIdx = 0;
|
||||
constexpr size_t kAnchorsInputIdx = 1;
|
||||
constexpr size_t kDeltasInputIdx = 2;
|
||||
constexpr size_t kScoresInputIdx = 3;
|
||||
constexpr size_t kRoisScoresOutputIdx = 4;
|
||||
constexpr size_t kRoisNumsOutputIdx = 5;
|
||||
|
||||
GenerateProposalsRef::DispatchData SetDefault(const generate_proposals_params& params, size_t idx) {
|
||||
GenerateProposalsRef::DispatchData dispatch_data;
|
||||
|
||||
const auto& inputs = params.inputs;
|
||||
const auto num_batches = inputs[kScoresInputIdx].Batch().v;
|
||||
const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
|
||||
const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
|
||||
const auto bottom_W = inputs[kDeltasInputIdx].X().v;
|
||||
|
||||
if (idx == 0) {
|
||||
dispatch_data.gws = {bottom_H, bottom_W, num_batches * anchors_num};
|
||||
} else if (idx == 1 || idx == 2) {
|
||||
dispatch_data.gws = {num_batches, 1, 1};
|
||||
} else if (idx == 3) {
|
||||
dispatch_data.gws = {1, 1, 1};
|
||||
}
|
||||
|
||||
dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
|
||||
|
||||
return dispatch_data;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void GenerateProposalsRef::SetKernelArguments(
|
||||
const generate_proposals_params& params,
|
||||
size_t idx, cldnn::arguments_desc& arguments) const {
|
||||
switch (idx) {
|
||||
case 0: { // refine anchors
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kImInfoInputIdx});
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kAnchorsInputIdx});
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kDeltasInputIdx});
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kScoresInputIdx});
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
|
||||
break;
|
||||
}
|
||||
case 1: { // sort proposals by score
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
|
||||
break;
|
||||
}
|
||||
case 2: { // NMS
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
|
||||
break;
|
||||
}
|
||||
case 3: { // convert proposals to rois and roi scores
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
|
||||
arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
|
||||
arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); // rois
|
||||
arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisScoresOutputIdx}); // roi scores
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw std::invalid_argument("generate_proposals has 4 kernels. valid index is 0 ~ 3.");
|
||||
}
|
||||
}
|
||||
|
||||
KernelsData GenerateProposalsRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
constexpr size_t kKernelsNum = 4;
|
||||
KernelData kd = KernelData::Default<generate_proposals_params>(params, kKernelsNum);
|
||||
const generate_proposals_params& new_params = static_cast<const generate_proposals_params&>(params);
|
||||
const auto& inputs = new_params.inputs;
|
||||
|
||||
const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
|
||||
const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
|
||||
const auto bottom_W = inputs[kDeltasInputIdx].X().v;
|
||||
const auto scale_w_index = inputs[kImInfoInputIdx].Feature().v == 3 ? 2 : 3;
|
||||
const auto num_proposals = anchors_num * bottom_H * bottom_W;
|
||||
const auto pre_nms_topn = std::min(num_proposals, new_params.pre_nms_count);
|
||||
const auto max_delta_log_wh = static_cast<float>(std::log(1000.0 / 16.0));
|
||||
|
||||
kd.internalBufferDataType = Datatype::F32;
|
||||
|
||||
const auto num_batches = inputs[kScoresInputIdx].Batch().v;
|
||||
constexpr size_t kProposalBoxSize = 6; // 6 values: {x0, y0, x1, y1, score, keep}
|
||||
const auto proposals_buffer_size = num_batches * num_proposals * sizeof(float) * kProposalBoxSize;
|
||||
kd.internalBufferSizes.push_back(proposals_buffer_size);
|
||||
|
||||
const auto out_indices_size = num_batches * new_params.post_nms_count * sizeof(float);
|
||||
kd.internalBufferSizes.push_back(out_indices_size);
|
||||
|
||||
for (size_t i = 0; i < kKernelsNum; ++i) {
|
||||
const auto dispatchData = SetDefault(new_params, i);
|
||||
const auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i);
|
||||
auto cldnn_jit = MakeBaseParamsJitConstants(new_params);
|
||||
|
||||
cldnn_jit.AddConstant(MakeJitConstant("GENERATE_PROPOSALS_STAGE_" + std::to_string(i), "true"));
|
||||
cldnn_jit.AddConstant(MakeJitConstant("PROPOSAL_SIZE", kProposalBoxSize));
|
||||
cldnn_jit.Merge(MakeTypeJitConstants(new_params.roi_num_type, "ROI_NUM"));
|
||||
if (new_params.normalized) {
|
||||
cldnn_jit.AddConstant(MakeJitConstant("NORMALIZED", 1));
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
case 0: {
|
||||
cldnn_jit.AddConstants({MakeJitConstant("MIN_SIZE", new_params.min_size),
|
||||
MakeJitConstant("ANCHORS_NUM", anchors_num),
|
||||
MakeJitConstant("NUM_PROPOSALS", num_proposals),
|
||||
MakeJitConstant("BOTTOM_H", bottom_H),
|
||||
MakeJitConstant("BOTTOM_W", bottom_W),
|
||||
MakeJitConstant("BOTTOM_AREA", bottom_H * bottom_W),
|
||||
MakeJitConstant("SCALE_W_INDEX", scale_w_index),
|
||||
MakeJitConstant("MAX_DELTA_LOG_WH", max_delta_log_wh)
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals)});
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals),
|
||||
MakeJitConstant("PRE_NMS_TOPN", pre_nms_topn),
|
||||
MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
|
||||
MakeJitConstant("NMS_THRESHOLD", new_params.nms_threshold),
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
cldnn_jit.AddConstants({MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
|
||||
MakeJitConstant("NUM_PROPOSALS", num_proposals)
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw std::invalid_argument("GENERATE_PROPOSALS has 4 kernels. valid index is 0 ~ 3.");
|
||||
}
|
||||
|
||||
const auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[i];
|
||||
|
||||
KernelBase::CheckDispatchData(kernelName, dispatchData, params.engineInfo.maxWorkGroupSize);
|
||||
kernel.params.workGroups.global = dispatchData.gws;
|
||||
kernel.params.workGroups.local = dispatchData.lws;
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
|
||||
SetKernelArguments(new_params, i, kernel.params.arguments);
|
||||
}
|
||||
|
||||
return {kd};
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
struct generate_proposals_params : public base_params {
|
||||
generate_proposals_params()
|
||||
: base_params(KernelType::GENERATE_PROPOSALS) {}
|
||||
|
||||
float min_size{0.0f};
|
||||
float nms_threshold{0.0f};
|
||||
size_t pre_nms_count{0};
|
||||
size_t post_nms_count{0};
|
||||
bool normalized{true};
|
||||
float nms_eta{1.0f};
|
||||
Datatype roi_num_type = Datatype::INT64;
|
||||
};
|
||||
|
||||
struct generate_proposals_optional_params : public optional_params {
|
||||
generate_proposals_optional_params()
|
||||
: optional_params(KernelType::GENERATE_PROPOSALS) {}
|
||||
};
|
||||
|
||||
class GenerateProposalsRef : public KernelBaseOpenCL {
|
||||
public:
|
||||
GenerateProposalsRef()
|
||||
: KernelBaseOpenCL("generate_proposals_ref") {}
|
||||
|
||||
~GenerateProposalsRef() = default;
|
||||
|
||||
using DispatchData = CommonDispatchData;
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
void SetKernelArguments(const generate_proposals_params& params,
|
||||
size_t idx, cldnn::arguments_desc& kernel) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "generate_proposals_kernel_selector.h"
|
||||
#include "generate_proposals_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
generate_proposals_kernel_selector::generate_proposals_kernel_selector() {
|
||||
Attach<GenerateProposalsRef>();
|
||||
}
|
||||
|
||||
generate_proposals_kernel_selector& generate_proposals_kernel_selector::Instance() {
|
||||
static generate_proposals_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
KernelsData generate_proposals_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::GENERATE_PROPOSALS);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_selector.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class generate_proposals_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static generate_proposals_kernel_selector& Instance();
|
||||
|
||||
generate_proposals_kernel_selector();
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,306 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#if INPUT0_TYPE_SIZE == 2 //f16
|
||||
#define HALF_ONE 0.5h
|
||||
#else
|
||||
#define HALF_ONE 0.5f
|
||||
#endif
|
||||
|
||||
#define ZERO INPUT0_VAL_ZERO
|
||||
|
||||
#ifdef NORMALIZED
|
||||
#define COORDINATES_OFFSET INPUT0_VAL_ZERO
|
||||
#else
|
||||
#define COORDINATES_OFFSET INPUT0_VAL_ONE
|
||||
#endif
|
||||
|
||||
#ifdef GENERATE_PROPOSALS_STAGE_0
|
||||
|
||||
// 0. Refine anchors
|
||||
KERNEL(generate_proposals_ref_stage_0)
|
||||
(const __global INPUT0_TYPE* im_info,
|
||||
const __global INPUT1_TYPE* anchors,
|
||||
const __global INPUT2_TYPE* deltas,
|
||||
const __global INPUT3_TYPE* scores,
|
||||
__global OUTPUT_TYPE* proposals) {
|
||||
const uint h = get_global_id(0);
|
||||
const uint w = get_global_id(1);
|
||||
const uint ba = (uint)get_global_id(2);
|
||||
const uint batch = ba / INPUT0_FEATURE_NUM;
|
||||
const uint anchor = ba % INPUT0_FEATURE_NUM;
|
||||
|
||||
const INPUT0_TYPE img_H = im_info[INPUT0_GET_INDEX(batch, 0, 0, 0)];
|
||||
const INPUT0_TYPE img_W = im_info[INPUT0_GET_INDEX(batch, 1, 0, 0)];
|
||||
const INPUT0_TYPE scale_H = im_info[INPUT0_GET_INDEX(batch, 2, 0, 0)];
|
||||
const INPUT0_TYPE scale_W = im_info[INPUT0_GET_INDEX(batch, SCALE_W_INDEX, 0, 0)];
|
||||
const float min_box_H = MIN_SIZE * scale_H;
|
||||
const float min_box_W = MIN_SIZE * scale_W;
|
||||
|
||||
INPUT0_TYPE x0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 0)];
|
||||
INPUT0_TYPE y0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 1)];
|
||||
INPUT0_TYPE x1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 2)];
|
||||
INPUT0_TYPE y1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 3)];
|
||||
|
||||
const INPUT0_TYPE dx = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 0 , h, w)];
|
||||
const INPUT0_TYPE dy = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 1 , h , w)];
|
||||
const INPUT0_TYPE d_log_w = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 2 , h, w)];
|
||||
const INPUT0_TYPE d_log_h = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 3 , h, w)];
|
||||
|
||||
const INPUT0_TYPE score = scores[INPUT3_GET_INDEX(batch, anchor, h, w)];
|
||||
|
||||
// width & height of box
|
||||
const INPUT0_TYPE ww = x1 - x0 + COORDINATES_OFFSET;
|
||||
const INPUT0_TYPE hh = y1 - y0 + COORDINATES_OFFSET;
|
||||
// center location of box
|
||||
const INPUT0_TYPE ctr_x = x0 + HALF_ONE * ww;
|
||||
const INPUT0_TYPE ctr_y = y0 + HALF_ONE * hh;
|
||||
// new center location according to deltas (dx, dy)
|
||||
const INPUT0_TYPE pred_ctr_x = dx * ww + ctr_x;
|
||||
const INPUT0_TYPE pred_ctr_y = dy * hh + ctr_y;
|
||||
// new width & height according to deltas d(log w), d(log h)
|
||||
const INPUT0_TYPE pred_w = exp(min(d_log_w, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * ww;
|
||||
const INPUT0_TYPE pred_h = exp(min(d_log_h, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * hh;
|
||||
// update upper-left corner location
|
||||
x0 = pred_ctr_x - HALF_ONE * pred_w;
|
||||
y0 = pred_ctr_y - HALF_ONE * pred_h;
|
||||
// update lower-right corner location
|
||||
x1 = pred_ctr_x + HALF_ONE * pred_w - COORDINATES_OFFSET;
|
||||
y1 = pred_ctr_y + HALF_ONE * pred_h - COORDINATES_OFFSET;
|
||||
// adjust new corner locations to be within the image region
|
||||
x0 = max(ZERO, min(x0, img_W - COORDINATES_OFFSET));
|
||||
y0 = max(ZERO, min(y0, img_H - COORDINATES_OFFSET));
|
||||
x1 = max(ZERO, min(x1, img_W - COORDINATES_OFFSET));
|
||||
y1 = max(ZERO, min(y1, img_H - COORDINATES_OFFSET));
|
||||
// recompute new width & height
|
||||
const INPUT0_TYPE box_w = x1 - x0 + COORDINATES_OFFSET;
|
||||
const INPUT0_TYPE box_h = y1 - y0 + COORDINATES_OFFSET;
|
||||
|
||||
const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
|
||||
const uint offset = h * BOTTOM_W + w;
|
||||
const uint proposal_idx = batch_offset + (offset * ANCHORS_NUM + anchor) * PROPOSAL_SIZE;
|
||||
|
||||
proposals[proposal_idx + 0] = x0;
|
||||
proposals[proposal_idx + 1] = y0;
|
||||
proposals[proposal_idx + 2] = x1;
|
||||
proposals[proposal_idx + 3] = y1;
|
||||
proposals[proposal_idx + 4] = score;
|
||||
proposals[proposal_idx + 5] = ((min_box_W <= box_w) && (min_box_H <= box_h)) ? 1 : 0;
|
||||
}
|
||||
|
||||
#endif /* GENERATE_PROPOSALS_STAGE_0 */
|
||||
|
||||
#ifdef GENERATE_PROPOSALS_STAGE_1
|
||||
|
||||
typedef struct __attribute__((__packed__)) {
|
||||
INPUT0_TYPE x0;
|
||||
INPUT0_TYPE y0;
|
||||
INPUT0_TYPE x1;
|
||||
INPUT0_TYPE y1;
|
||||
INPUT0_TYPE score;
|
||||
INPUT0_TYPE keep;
|
||||
} Box;
|
||||
|
||||
inline void FUNC(swap_box)(__global Box* a, __global Box* b) {
|
||||
const Box temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
inline int FUNC(partition)(__global Box* arr, int l, int h) {
|
||||
INPUT0_TYPE pivotScore = arr[h].score;
|
||||
int i = (l - 1);
|
||||
for (int j = l; j <= h - 1; j++) {
|
||||
if (arr[j].score > pivotScore) {
|
||||
i++;
|
||||
FUNC_CALL(swap_box)(&arr[i], &arr[j]);
|
||||
}
|
||||
}
|
||||
FUNC_CALL(swap_box)(&arr[i + 1], &arr[h]);
|
||||
return (i + 1);
|
||||
}
|
||||
|
||||
inline void FUNC(bubbleSortIterative)(__global Box* arr, int l, int h) {
|
||||
for (int i = 0; i < h - l; i++) {
|
||||
bool swapped = false;
|
||||
for (int j = l; j < h - i; j++) {
|
||||
if ((arr[j].score > arr[j + 1].score)) {
|
||||
FUNC_CALL(swap_box)(&arr[j], &arr[j + 1]);
|
||||
swapped = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!swapped)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void FUNC(quickSortIterative)(__global Box* arr, int l, int h) {
|
||||
// Create an auxiliary stack
|
||||
const int kStackSize = 100;
|
||||
int stack[kStackSize];
|
||||
|
||||
// initialize top of stack
|
||||
int top = -1;
|
||||
|
||||
// push initial values of l and h to stack
|
||||
stack[++top] = l;
|
||||
stack[++top] = h;
|
||||
|
||||
// Keep popping from stack while is not empty
|
||||
while (top >= 0) {
|
||||
// Pop h and l
|
||||
h = stack[top--];
|
||||
l = stack[top--];
|
||||
|
||||
// Set pivot element at its correct position
|
||||
// in sorted array
|
||||
int p = FUNC_CALL(partition)(arr, l, h);
|
||||
|
||||
// If there are elements on left side of pivot,
|
||||
// then push left side to stack
|
||||
if (p - 1 > l) {
|
||||
if (top >= (kStackSize - 1)) {
|
||||
FUNC_CALL(bubbleSortIterative)(arr, l, p - 1);
|
||||
} else {
|
||||
stack[++top] = l;
|
||||
stack[++top] = p - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are elements on right side of pivot,
|
||||
// then push right side to stack
|
||||
if (p + 1 < h) {
|
||||
if (top >= (kStackSize - 1)) {
|
||||
FUNC_CALL(bubbleSortIterative)(arr, p + 1, h);
|
||||
} else {
|
||||
stack[++top] = p + 1;
|
||||
stack[++top] = h;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1. Sort boxes by scores
|
||||
KERNEL(generate_proposals_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
|
||||
const uint batch = get_global_id(0);
|
||||
|
||||
__global Box* boxes = (__global Box*)(proposals + batch * NUM_PROPOSALS * PROPOSAL_SIZE);
|
||||
|
||||
FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
|
||||
}
|
||||
#endif /* GENERATE_PROPOSALS_STAGE_1 */
|
||||
|
||||
#ifdef GENERATE_PROPOSALS_STAGE_2
|
||||
|
||||
// 2. NMS
|
||||
KERNEL(generate_proposals_ref_stage_2)
|
||||
(const __global INPUT0_TYPE* boxes,
|
||||
__global size_t* out_indices,
|
||||
__global ROI_NUM_TYPE* num_outputs) {
|
||||
const uint batch = get_global_id(0);
|
||||
const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
|
||||
|
||||
uint count = 0;
|
||||
__local uint index_out[INPUT0_BATCH_NUM * POST_NMS_COUNT];
|
||||
__local bool is_dead[INPUT0_BATCH_NUM * PRE_NMS_TOPN];
|
||||
|
||||
for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
|
||||
is_dead[batch * PRE_NMS_TOPN + box] = boxes[batch_offset + PROPOSAL_SIZE * box + 5] == 0.0f;
|
||||
}
|
||||
|
||||
for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
|
||||
if (is_dead[batch * PRE_NMS_TOPN + box])
|
||||
continue;
|
||||
|
||||
index_out[batch * POST_NMS_COUNT + count++] = box;
|
||||
if (count == POST_NMS_COUNT)
|
||||
break;
|
||||
|
||||
const uint box_offset = batch_offset + box * PROPOSAL_SIZE;
|
||||
const INPUT0_TYPE x0i = boxes[box_offset + 0];
|
||||
const INPUT0_TYPE y0i = boxes[box_offset + 1];
|
||||
const INPUT0_TYPE x1i = boxes[box_offset + 2];
|
||||
const INPUT0_TYPE y1i = boxes[box_offset + 3];
|
||||
|
||||
const INPUT0_TYPE a_width = x1i - x0i;
|
||||
const INPUT0_TYPE a_height = y1i - y0i;
|
||||
const INPUT0_TYPE a_area = (a_width + COORDINATES_OFFSET) * (a_height + COORDINATES_OFFSET);
|
||||
|
||||
for (uint tail = box + 1; tail < PRE_NMS_TOPN; ++tail) {
|
||||
const uint tail_offset = batch_offset + tail * PROPOSAL_SIZE;
|
||||
const INPUT0_TYPE x0j = boxes[tail_offset + 0];
|
||||
const INPUT0_TYPE y0j = boxes[tail_offset + 1];
|
||||
const INPUT0_TYPE x1j = boxes[tail_offset + 2];
|
||||
const INPUT0_TYPE y1j = boxes[tail_offset + 3];
|
||||
|
||||
const INPUT0_TYPE x0 = max(x0i, x0j);
|
||||
const INPUT0_TYPE y0 = max(y0i, y0j);
|
||||
const INPUT0_TYPE x1 = min(x1i, x1j);
|
||||
const INPUT0_TYPE y1 = min(y1i, y1j);
|
||||
|
||||
const INPUT0_TYPE width = x1 - x0 + COORDINATES_OFFSET;
|
||||
const INPUT0_TYPE height = y1 - y0 + COORDINATES_OFFSET;
|
||||
const INPUT0_TYPE area = max(ZERO, width) * max(ZERO, height);
|
||||
|
||||
const INPUT0_TYPE b_width = x1j - x0j;
|
||||
const INPUT0_TYPE b_height = y1j - y0j;
|
||||
const INPUT0_TYPE b_area = (b_width + COORDINATES_OFFSET) * (b_height + COORDINATES_OFFSET);
|
||||
|
||||
const INPUT0_TYPE intersection_area = area / (a_area + b_area - area);
|
||||
|
||||
if ( (NMS_THRESHOLD < intersection_area) && (x0i <= x1j) && (y0i <= y1j) && (x0j <= x1i) && (y0j <= y1i) ) {
|
||||
is_dead[batch * PRE_NMS_TOPN + tail] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)] = count;
|
||||
|
||||
for (uint i = 0; i < count; ++i) {
|
||||
out_indices[batch * POST_NMS_COUNT + i] = index_out[batch * POST_NMS_COUNT + i];
|
||||
}
|
||||
}
|
||||
#endif /* GENERATE_PROPOSALS_STAGE_2 */
|
||||
|
||||
#ifdef GENERATE_PROPOSALS_STAGE_3
|
||||
|
||||
// 3. Convert proposals to rois and roi_scores
|
||||
KERNEL(generate_proposals_ref_stage_3)
|
||||
(const __global INPUT0_TYPE* boxes,
|
||||
const __global size_t* out_indices,
|
||||
const __global ROI_NUM_TYPE* num_outputs,
|
||||
__global OUTPUT_TYPE* rois,
|
||||
__global INPUT4_TYPE* roi_scores) {
|
||||
|
||||
uint roi_index = 0;
|
||||
for (uint batch = 0; batch < INPUT0_BATCH_NUM; ++batch) {
|
||||
for (uint i = 0; i < num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)]; ++i) {
|
||||
const uint box_index = (batch * NUM_PROPOSALS + out_indices[batch * POST_NMS_COUNT + i]) * PROPOSAL_SIZE;
|
||||
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 0];
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = boxes[box_index + 1];
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = boxes[box_index + 2];
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = boxes[box_index + 3];
|
||||
roi_scores[INPUT4_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 4];
|
||||
++roi_index;
|
||||
}
|
||||
}
|
||||
|
||||
// fill the rest of outputs with zeros
|
||||
while(roi_index < INPUT0_BATCH_NUM * POST_NMS_COUNT) {
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = 0.0f;
|
||||
rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = 0.0f;
|
||||
|
||||
roi_scores[roi_index] = 0.0f;
|
||||
++roi_index;
|
||||
}
|
||||
}
|
||||
#endif /* GENERATE_PROPOSALS_STAGE_3 */
|
||||
|
||||
#undef HALF_ONE
|
||||
#undef ZERO
|
||||
#undef COORDINATES_OFFSET
|
79
src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
Normal file
79
src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/primitives/generate_proposals.hpp"
|
||||
|
||||
#include <ngraph_ops/generate_proposals_ie_internal.hpp>
|
||||
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateGenerateProposalsIEInternalOp(
|
||||
Program& p,
|
||||
const std::shared_ptr<ngraph::op::internal::GenerateProposalsIEInternal>& op) {
|
||||
validate_inputs_count(op, {4});
|
||||
if (op->get_output_size() != 3) {
|
||||
IE_THROW() << "GenerateProposals requires 3 outputs";
|
||||
}
|
||||
|
||||
auto inputs = p.GetInputPrimitiveIDs(op);
|
||||
const auto& attrs = op->get_attrs();
|
||||
const auto layer_type_name = layer_type_name_ID(op);
|
||||
const auto layer_name = layer_type_name + ".out0";
|
||||
|
||||
// output 2 - scores
|
||||
const auto mutable_precision_1 = op->get_output_element_type(1);
|
||||
const auto output_shape_1 = op->get_output_shape(1);
|
||||
const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1),
|
||||
cldnn::format::get_default_format(output_shape_1.size()),
|
||||
tensor_from_dims(output_shape_1)};
|
||||
cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)};
|
||||
|
||||
const auto mutable_id_w_1 = layer_type_name + "_md_write.1";
|
||||
const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1};
|
||||
p.add_primitive(*op, mutable_prim_w_1);
|
||||
inputs.push_back(mutable_id_w_1);
|
||||
|
||||
// output 3 - roisNum
|
||||
const auto output_shape_2 = op->get_output_shape(2);
|
||||
const auto mutable_precision_2 = op->get_output_element_type(2);
|
||||
const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2),
|
||||
cldnn::format::get_default_format(output_shape_2.size()),
|
||||
tensor_from_dims(output_shape_2)};
|
||||
cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)};
|
||||
|
||||
const auto mutable_id_w_2 = layer_type_name + "_md_write.2";
|
||||
const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2};
|
||||
p.add_primitive(*op, mutable_prim_w_2);
|
||||
inputs.push_back(mutable_id_w_2);
|
||||
|
||||
const cldnn::generate_proposals prim{layer_name,
|
||||
inputs,
|
||||
attrs.min_size,
|
||||
attrs.nms_threshold,
|
||||
attrs.pre_nms_count,
|
||||
attrs.post_nms_count,
|
||||
attrs.normalized,
|
||||
attrs.nms_eta,
|
||||
cldnn::element_type_to_data_type(op->get_roi_num_type())};
|
||||
|
||||
p.add_primitive(*op, prim);
|
||||
|
||||
const auto mutable_id_r_1 = layer_type_name + ".out1";
|
||||
const cldnn::mutable_data mutable_prim_r_1{mutable_id_r_1, {layer_name}, shared_memory_1};
|
||||
p.add_primitive(*op, mutable_prim_r_1);
|
||||
|
||||
const auto mutable_id_r_2 = layer_type_name + ".out2";
|
||||
const cldnn::mutable_data mutable_prim_r_2{mutable_id_r_2, {layer_name}, shared_memory_2};
|
||||
p.add_primitive(*op, mutable_prim_r_2);
|
||||
}
|
||||
|
||||
REGISTER_FACTORY_IMPL(internal, GenerateProposalsIEInternal);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace ov
|
@ -74,6 +74,7 @@
|
||||
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
||||
#include <transformations/op_conversions/convert_gather_0d.hpp>
|
||||
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
|
||||
#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
|
||||
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
||||
#include "transformations/op_conversions/softmax_decomposition.hpp"
|
||||
#include <transformations/op_conversions/gelu7_downgrade.hpp>
|
||||
@ -163,6 +164,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
|
||||
manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertGP9ToGPIEInternal>();
|
||||
manager.register_pass<ngraph::pass::ConvertGather0D>();
|
||||
manager.register_pass<ngraph::pass::ConvertPriorBox8To0, false>();
|
||||
|
||||
|
@ -0,0 +1,461 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/generate_proposals.hpp>
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/mutable_data.hpp>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
struct GenerateProposalsParams {
|
||||
float min_size;
|
||||
float nms_threshold;
|
||||
int64_t pre_nms_count;
|
||||
int64_t post_nms_count;
|
||||
bool normalized;
|
||||
float nms_eta;
|
||||
std::vector<T> expected_rois;
|
||||
std::vector<T> expected_roi_scores;
|
||||
std::vector<size_t> expected_rois_num;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using GenerateProposalsParamsWithLayout = std::tuple<GenerateProposalsParams<T>, format::type>;
|
||||
|
||||
constexpr size_t num_batches = 2;
|
||||
constexpr size_t image_height = 200;
|
||||
constexpr size_t image_width = 200;
|
||||
constexpr float image_scale = 4.0f;
|
||||
constexpr size_t height = 2;
|
||||
constexpr size_t width = 6;
|
||||
constexpr size_t number_of_channels = 3;
|
||||
constexpr size_t number_of_anchors = 3;
|
||||
|
||||
const std::vector<float> im_info{image_height, image_width, image_scale, image_height, image_width, image_scale};
|
||||
|
||||
const std::vector<float> anchors{
|
||||
0.0f, 1.0f, 2.0f, 3.0f,
|
||||
4.0f, 5.0f, 6.0f, 7.0f,
|
||||
8.0f, 9.0f, 10.0f, 11.0f,
|
||||
12.0f, 13.0f, 14.0f, 15.0f,
|
||||
16.0f, 17.0f, 18.0f, 19.0f,
|
||||
20.0f, 21.0f, 22.0f, 23.0f,
|
||||
24.0f, 25.0f, 26.0f, 27.0f,
|
||||
28.0f, 29.0f, 30.0f, 31.0f,
|
||||
32.0f, 33.0f, 34.0f, 35.0f,
|
||||
36.0f, 37.0f, 38.0f, 39.0f,
|
||||
40.0f, 41.0f, 42.0f, 43.0f,
|
||||
44.0f, 45.0f, 46.0f, 47.0f,
|
||||
48.0f, 49.0f, 50.0f, 51.0f,
|
||||
52.0f, 53.0f, 54.0f, 55.0f,
|
||||
56.0f, 57.0f, 58.0f, 59.0f,
|
||||
60.0f, 61.0f, 62.0f, 63.0f,
|
||||
64.0f, 65.0f, 66.0f, 67.0f,
|
||||
68.0f, 69.0f, 70.0f, 71.0f,
|
||||
72.0f, 73.0f, 74.0f, 75.0f,
|
||||
76.0f, 77.0f, 78.0f, 79.0f,
|
||||
80.0f, 81.0f, 82.0f, 83.0f,
|
||||
84.0f, 85.0f, 86.0f, 87.0f,
|
||||
88.0f, 89.0f, 90.0f, 91.0f,
|
||||
92.0f, 93.0f, 94.0f, 95.0f,
|
||||
96.0f, 97.0f, 98.0f, 99.0f,
|
||||
100.0f, 101.0f, 102.0f, 103.0f,
|
||||
104.0f, 105.0f, 106.0f, 107.0f,
|
||||
108.0f, 109.0f, 110.0f, 111.0f,
|
||||
112.0f, 113.0f, 114.0f, 115.0f,
|
||||
116.0f, 117.0f, 118.0f, 119.0f,
|
||||
120.0f, 121.0f, 122.0f, 123.0f,
|
||||
124.0f, 125.0f, 126.0f, 127.0f,
|
||||
128.0f, 129.0f, 130.0f, 131.0f,
|
||||
132.0f, 133.0f, 134.0f, 135.0f,
|
||||
136.0f, 137.0f, 138.0f, 139.0f,
|
||||
140.0f, 141.0f, 142.0f, 143.0f};
|
||||
|
||||
const std::vector<float> deltas{
|
||||
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
|
||||
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
|
||||
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
|
||||
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
|
||||
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
|
||||
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
|
||||
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
|
||||
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
|
||||
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
|
||||
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
|
||||
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
|
||||
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
|
||||
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
|
||||
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
|
||||
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
|
||||
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
|
||||
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
|
||||
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
|
||||
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
|
||||
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
|
||||
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
|
||||
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
|
||||
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
|
||||
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207,
|
||||
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
|
||||
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
|
||||
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
|
||||
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
|
||||
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
|
||||
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
|
||||
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
|
||||
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
|
||||
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
|
||||
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
|
||||
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
|
||||
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
|
||||
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
|
||||
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
|
||||
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
|
||||
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
|
||||
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
|
||||
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
|
||||
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
|
||||
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
|
||||
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
|
||||
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
|
||||
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
|
||||
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207};
|
||||
|
||||
const std::vector<float> scores{
|
||||
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
|
||||
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
|
||||
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
|
||||
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
|
||||
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
|
||||
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978,
|
||||
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
|
||||
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
|
||||
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
|
||||
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
|
||||
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
|
||||
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978};
|
||||
|
||||
const std::vector<format::type> layouts{
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32};
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> getValues(const std::vector<float>& values) {
|
||||
std::vector<T> result(values.begin(), values.end());
|
||||
return result;
|
||||
}
|
||||
template <typename T> float getError();
|
||||
|
||||
template<>
|
||||
float getError<float>() {
|
||||
return 0.001;
|
||||
}
|
||||
|
||||
template<>
|
||||
float getError<half_t>() {
|
||||
return 0.2;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<GenerateProposalsParams<T>> getGenerateProposalsParams() {
|
||||
std::vector<GenerateProposalsParams<T>> params = {
|
||||
{
|
||||
1.0f, 0.7f, 14, 6, true, 1.0,
|
||||
getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
|
||||
0, 1.01395, 4.66909, 5.14337,
|
||||
135.501, 137.467, 139.81, 141.726,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
0, 1.01395, 4.66909, 5.14337,
|
||||
135.501, 137.467, 139.81, 141.726}),
|
||||
getValues<T>({0.826641, 0.566372, 0.559075,
|
||||
0.826641, 0.566372, 0.559075}),
|
||||
{3, 3}
|
||||
},
|
||||
{
|
||||
1.0f, 0.7f, 1000, 6, true, 1.0,
|
||||
getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
|
||||
0, 1.01395, 4.66909, 5.14337,
|
||||
135.501, 137.467, 139.81, 141.726,
|
||||
47.2348, 47.8342, 52.5503, 52.3864,
|
||||
126.483, 128.3, 131.625, 133.707,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
0, 1.01395, 4.66909, 5.14337,
|
||||
135.501, 137.467, 139.81, 141.726,
|
||||
47.2348, 47.8342, 52.5503, 52.3864,
|
||||
126.483, 128.3, 131.625, 133.707}),
|
||||
getValues<T>({0.826641, 0.566372, 0.559075, 0.479851, 0.267999,
|
||||
0.826641, 0.566372, 0.559075, 0.479851, 0.267999}),
|
||||
{5, 5}
|
||||
},
|
||||
{
|
||||
0.0f, 0.7f, 14, 6, true, 1.0,
|
||||
getValues<T>({108.129, 109.37, 111.623, 111.468,
|
||||
12.9725, 11.6102, 16.4918, 16.9624,
|
||||
112.883, 113.124, 115.17, 118.213,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
24.9778, 25.0318, 27.2283, 28.495,
|
||||
100.126, 101.409, 102.354, 106.125,
|
||||
108.129, 109.37, 111.623, 111.468,
|
||||
12.9725, 11.6102, 16.4918, 16.9624,
|
||||
112.883, 113.124, 115.17, 118.213,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
24.9778, 25.0318, 27.2283, 28.495,
|
||||
100.126, 101.409, 102.354, 106.125}),
|
||||
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
|
||||
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
|
||||
{6, 6}
|
||||
},
|
||||
|
||||
{
|
||||
0.1f, 0.7f, 1000, 6, true, 1.0,
|
||||
getValues<T>({108.129, 109.37, 111.623, 111.468,
|
||||
12.9725, 11.6102, 16.4918, 16.9624,
|
||||
112.883, 113.124, 115.17, 118.213,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
24.9778, 25.0318, 27.2283, 28.495,
|
||||
100.126, 101.409, 102.354, 106.125,
|
||||
108.129, 109.37, 111.623, 111.468,
|
||||
12.9725, 11.6102, 16.4918, 16.9624,
|
||||
112.883, 113.124, 115.17, 118.213,
|
||||
4.49132, 4.30537, 8.75027, 8.8035,
|
||||
24.9778, 25.0318, 27.2283, 28.495,
|
||||
100.126, 101.409, 102.354, 106.125}),
|
||||
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
|
||||
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
|
||||
{6, 6}
|
||||
},
|
||||
{
|
||||
1.0f, 0.7f, 14, 6, false, 1.0,
|
||||
getValues<T>({13.4588, 10.9153, 17.7377, 17.9436,
|
||||
4.73698, 3.95806, 10.1254, 9.70525,
|
||||
89.5773, 90.0053, 92.9476, 95.3396,
|
||||
0, 1.02093, 6.00364, 6.21505,
|
||||
92.3608, 94.306, 96.3198, 98.4288,
|
||||
135.252, 137.7, 140.716, 143.09,
|
||||
13.4588, 10.9153, 17.7377, 17.9436,
|
||||
4.73698, 3.95806, 10.1254, 9.70525,
|
||||
89.5773, 90.0053, 92.9476, 95.3396,
|
||||
0, 1.02093, 6.00364, 6.21505,
|
||||
92.3608, 94.306, 96.3198, 98.4288,
|
||||
135.252, 137.7, 140.716, 143.09}),
|
||||
getValues<T>({0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075,
|
||||
0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075}),
|
||||
{6, 6}
|
||||
},
|
||||
{
|
||||
0.0f, 0.7f, 1000, 6, false, 1.0,
|
||||
getValues<T>({108.194, 109.556, 112.435, 111.701,
|
||||
13.4588, 10.9153, 17.7377, 17.9436,
|
||||
113.324, 113.186, 115.755, 119.82,
|
||||
4.73698, 3.95806, 10.1254, 9.70525,
|
||||
25.4666, 25.0477, 27.8424, 29.2425,
|
||||
100.188, 101.614, 102.532, 107.687,
|
||||
108.194, 109.556, 112.435, 111.701,
|
||||
13.4588, 10.9153, 17.7377, 17.9436,
|
||||
113.324, 113.186, 115.755, 119.82,
|
||||
4.73698, 3.95806, 10.1254, 9.70525,
|
||||
25.4666, 25.0477, 27.8424, 29.2425,
|
||||
100.188, 101.614, 102.532, 107.687}),
|
||||
getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
|
||||
0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
|
||||
{6, 6}
|
||||
}
|
||||
};
|
||||
return params;
|
||||
}
|
||||
}; // namespace
|
||||
|
||||
template <typename T, typename ROIS_NUM_T>
|
||||
struct generate_proposals_test
|
||||
: public ::testing::TestWithParam<GenerateProposalsParamsWithLayout<T> > {
|
||||
public:
|
||||
void test() {
|
||||
GenerateProposalsParams<T> param;
|
||||
format::type data_layout;
|
||||
std::tie(param, data_layout) = this->GetParam();
|
||||
const bool need_reorder = data_layout != format::bfyx;
|
||||
|
||||
const auto data_type = type_to_data_type<T>::value;
|
||||
const auto rois_num_type = type_to_data_type<ROIS_NUM_T>::value;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const primitive_id input_im_info_id = "InputImInfo";
|
||||
const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}});
|
||||
set_values(input_im_info, getValues<T>(im_info));
|
||||
|
||||
const primitive_id input_anchors_id = "InputAnchors";
|
||||
auto input_anchors = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(height), feature(width), spatial(4, number_of_anchors)}});
|
||||
set_values(input_anchors, getValues<T>(anchors));
|
||||
|
||||
const primitive_id input_deltas_id = "InputDeltas";
|
||||
auto input_deltas = engine.allocate_memory(
|
||||
{data_type, format::bfyx,
|
||||
tensor{batch(num_batches), feature(number_of_anchors * 4), spatial(width, height)}});
|
||||
set_values(input_deltas, getValues<T>(deltas));
|
||||
|
||||
const primitive_id input_scores_id = "InputScores";
|
||||
auto input_scores = engine.allocate_memory(
|
||||
{data_type, format::bfyx, tensor{batch(num_batches), feature(number_of_anchors), spatial(width, height)}});
|
||||
set_values(input_scores, getValues<T>(scores));
|
||||
|
||||
const primitive_id output_roi_scores_id = "OutputRoiScores";
|
||||
const layout rois_scores_layout{data_type, data_layout, tensor{batch(num_batches * param.post_nms_count)}};
|
||||
auto output_roi_scores = engine.allocate_memory(rois_scores_layout);
|
||||
|
||||
const primitive_id output_rois_num_id = "OutputRoisNum";
|
||||
const layout rois_num_layout{rois_num_type, data_layout, tensor{batch(num_batches)}};
|
||||
auto output_rois_num = engine.allocate_memory(rois_num_layout);
|
||||
|
||||
const primitive_id reorder_im_info_id = input_im_info_id + "Reordered";
|
||||
const primitive_id reorder_anchors_id = input_anchors_id + "Reordered";
|
||||
const primitive_id reorder_deltas_id = input_deltas_id + "Reordered";
|
||||
const primitive_id reorder_scores_id = input_scores_id + "Reordered";
|
||||
|
||||
topology topology;
|
||||
|
||||
topology.add(input_layout{input_im_info_id, input_im_info->get_layout()});
|
||||
topology.add(input_layout{input_anchors_id, input_anchors->get_layout()});
|
||||
topology.add(input_layout{input_deltas_id, input_deltas->get_layout()});
|
||||
topology.add(input_layout{input_scores_id, input_scores->get_layout()});
|
||||
topology.add(mutable_data{output_roi_scores_id, output_roi_scores});
|
||||
topology.add(mutable_data{output_rois_num_id, output_rois_num});
|
||||
|
||||
topology.add(reorder(reorder_im_info_id, input_im_info_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_anchors_id, input_anchors_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_deltas_id, input_deltas_id, data_layout, data_type));
|
||||
topology.add(reorder(reorder_scores_id, input_scores_id, data_layout, data_type));
|
||||
|
||||
const primitive_id generate_proposals_id = "generate_proposals";
|
||||
const std::vector<primitive_id> inputs{ reorder_im_info_id, reorder_anchors_id, reorder_deltas_id,
|
||||
reorder_scores_id, output_roi_scores_id, output_rois_num_id};
|
||||
const auto generate_proposals_primitive = generate_proposals{
|
||||
generate_proposals_id,
|
||||
inputs,
|
||||
param.min_size,
|
||||
param.nms_threshold,
|
||||
param.pre_nms_count,
|
||||
param.post_nms_count,
|
||||
param.normalized,
|
||||
param.nms_eta,
|
||||
rois_num_type};
|
||||
|
||||
topology.add(generate_proposals_primitive);
|
||||
const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
|
||||
topology.add(reorder(reorder_result_id, generate_proposals_id, format::bfyx, data_type));
|
||||
|
||||
network network{engine, topology};
|
||||
|
||||
network.set_input_data(input_im_info_id, input_im_info);
|
||||
network.set_input_data(input_anchors_id, input_anchors);
|
||||
network.set_input_data(input_deltas_id, input_deltas);
|
||||
network.set_input_data(input_scores_id, input_scores);
|
||||
|
||||
const auto outputs = network.execute();
|
||||
|
||||
const auto rois = outputs.at(reorder_result_id).get_memory();
|
||||
|
||||
const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
|
||||
ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4);
|
||||
|
||||
const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) {
|
||||
if (!need_reorder) {
|
||||
return mem;
|
||||
}
|
||||
cldnn::topology reorder_topology;
|
||||
reorder_topology.add(input_layout("data", from_layout));
|
||||
reorder_topology.add(reorder("plane_data", "data", format::bfyx, data_type));
|
||||
cldnn::network reorder_net{engine, reorder_topology};
|
||||
reorder_net.set_input_data("data", mem);
|
||||
const auto second_output_result = reorder_net.execute();
|
||||
const auto plane_data_mem = second_output_result.at("plane_data").get_memory();
|
||||
return plane_data_mem;
|
||||
};
|
||||
|
||||
const cldnn::mem_lock<T> roi_scores_ptr(
|
||||
get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream());
|
||||
ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count);
|
||||
|
||||
const cldnn::mem_lock<ROIS_NUM_T> rois_num_ptr(
|
||||
get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream());
|
||||
ASSERT_EQ(rois_num_ptr.size(), num_batches);
|
||||
|
||||
const auto& expected_rois = param.expected_rois;
|
||||
const auto& expected_roi_scores = param.expected_roi_scores;
|
||||
const auto& expected_rois_num = param.expected_rois_num;
|
||||
|
||||
for (size_t j = 0; j < expected_rois_num.size(); ++j) {
|
||||
EXPECT_EQ(expected_rois_num[j], rois_num_ptr[j]) << "j=" << j;
|
||||
}
|
||||
|
||||
for (auto i = 0; i < param.post_nms_count; ++i) {
|
||||
EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], getError<T>()) << "i=" << i;
|
||||
|
||||
if (static_cast<float>(expected_roi_scores[i]) != 0.0f) {
|
||||
for (size_t coord = 0; coord < 4; ++coord) {
|
||||
const auto roi_idx = i * 4 + coord;
|
||||
EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError<T>()) << "i=" << i << ", coord=" << coord;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using f32_i32 = generate_proposals_test<float, int32_t>;
|
||||
TEST_P(f32_i32, f32_i32) {
|
||||
test();
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
generate_proposals_gpu_test,
|
||||
f32_i32,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getGenerateProposalsParams<float>()),
|
||||
::testing::ValuesIn(layouts)
|
||||
));
|
||||
|
||||
using f32_i64 = generate_proposals_test<float, int64_t>;
|
||||
TEST_P(f32_i64, f32_i64) {
|
||||
test();
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
generate_proposals_gpu_test,
|
||||
f32_i64,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getGenerateProposalsParams<float>()),
|
||||
::testing::ValuesIn(layouts)
|
||||
));
|
||||
|
||||
using f16_i32 = generate_proposals_test<half_t, int32_t>;
|
||||
TEST_P(f16_i32, f16_i32) {
|
||||
test();
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
generate_proposals_gpu_test,
|
||||
f16_i32,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
|
||||
::testing::ValuesIn(layouts)
|
||||
));
|
||||
|
||||
using f16_i64 = generate_proposals_test<half_t, int64_t>;
|
||||
TEST_P(f16_i64, f16_i64) {
|
||||
test();
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
generate_proposals_gpu_test,
|
||||
f16_i64,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
|
||||
::testing::ValuesIn(layouts)
|
||||
));
|
@ -0,0 +1,197 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/generate_proposals.hpp"
|
||||
#include "common_test_utils/ov_tensor_utils.hpp"
|
||||
|
||||
using namespace ov::test;
|
||||
using namespace ov::test::subgraph;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<float> min_size = { 1.0f, 0.0f };
|
||||
const std::vector<float> nms_threshold = { 0.7f };
|
||||
const std::vector<int64_t> post_nms_count = { 6 };
|
||||
const std::vector<int64_t> pre_nms_count = { 14, 1000 };
|
||||
|
||||
template <typename T>
|
||||
const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> getInputTensors() {
|
||||
const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> input_tensors = {
|
||||
{
|
||||
"empty",
|
||||
{
|
||||
// 3
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f}),
|
||||
// 2 x 6 x 3 x 4 = 144
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, std::vector<T>(144, 1.0f)),
|
||||
// 2 x 12 x 2 x 6 = 144 * 2
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, std::vector<T>(288, 1.0f)),
|
||||
// {2 x 3 x 2 x 6} = 36 * 2
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
|
||||
5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f,
|
||||
5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f})
|
||||
}
|
||||
},
|
||||
{
|
||||
"filled2",
|
||||
{
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {200.0, 200.0, 4.0, 200.0, 200.0, 4.0}),
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, {0.0f, 1.0f, 2.0f, 3.0f,
|
||||
4.0f, 5.0f, 6.0f, 7.0f,
|
||||
8.0f, 9.0f, 10.0f, 11.0f,
|
||||
12.0f, 13.0f, 14.0f, 15.0f,
|
||||
16.0f, 17.0f, 18.0f, 19.0f,
|
||||
20.0f, 21.0f, 22.0f, 23.0f,
|
||||
24.0f, 25.0f, 26.0f, 27.0f,
|
||||
28.0f, 29.0f, 30.0f, 31.0f,
|
||||
32.0f, 33.0f, 34.0f, 35.0f,
|
||||
36.0f, 37.0f, 38.0f, 39.0f,
|
||||
40.0f, 41.0f, 42.0f, 43.0f,
|
||||
44.0f, 45.0f, 46.0f, 47.0f,
|
||||
48.0f, 49.0f, 50.0f, 51.0f,
|
||||
52.0f, 53.0f, 54.0f, 55.0f,
|
||||
56.0f, 57.0f, 58.0f, 59.0f,
|
||||
60.0f, 61.0f, 62.0f, 63.0f,
|
||||
64.0f, 65.0f, 66.0f, 67.0f,
|
||||
68.0f, 69.0f, 70.0f, 71.0f,
|
||||
72.0f, 73.0f, 74.0f, 75.0f,
|
||||
76.0f, 77.0f, 78.0f, 79.0f,
|
||||
80.0f, 81.0f, 82.0f, 83.0f,
|
||||
84.0f, 85.0f, 86.0f, 87.0f,
|
||||
88.0f, 89.0f, 90.0f, 91.0f,
|
||||
92.0f, 93.0f, 94.0f, 95.0f,
|
||||
96.0f, 97.0f, 98.0f, 99.0f,
|
||||
100.0f, 101.0f, 102.0f, 103.0f,
|
||||
104.0f, 105.0f, 106.0f, 107.0f,
|
||||
108.0f, 109.0f, 110.0f, 111.0f,
|
||||
112.0f, 113.0f, 114.0f, 115.0f,
|
||||
116.0f, 117.0f, 118.0f, 119.0f,
|
||||
120.0f, 121.0f, 122.0f, 123.0f,
|
||||
124.0f, 125.0f, 126.0f, 127.0f,
|
||||
128.0f, 129.0f, 130.0f, 131.0f,
|
||||
132.0f, 133.0f, 134.0f, 135.0f,
|
||||
136.0f, 137.0f, 138.0f, 139.0f,
|
||||
140.0f, 141.0f, 142.0f, 143.0f}),
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, {
|
||||
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
|
||||
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
|
||||
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
|
||||
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
|
||||
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
|
||||
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
|
||||
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
|
||||
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
|
||||
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
|
||||
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
|
||||
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
|
||||
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
|
||||
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
|
||||
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
|
||||
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
|
||||
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
|
||||
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
|
||||
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
|
||||
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
|
||||
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
|
||||
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
|
||||
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
|
||||
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
|
||||
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207,
|
||||
0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678,
|
||||
0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064,
|
||||
0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484,
|
||||
0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538,
|
||||
0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464,
|
||||
0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179,
|
||||
0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008,
|
||||
0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054,
|
||||
0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
|
||||
0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888,
|
||||
0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187,
|
||||
0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525,
|
||||
0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824,
|
||||
0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379,
|
||||
0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908,
|
||||
0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993,
|
||||
0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655,
|
||||
0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654,
|
||||
0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776,
|
||||
0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778,
|
||||
0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
|
||||
0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132,
|
||||
0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166,
|
||||
0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}),
|
||||
ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
|
||||
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
|
||||
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
|
||||
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
|
||||
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
|
||||
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
|
||||
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978,
|
||||
0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508,
|
||||
0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586,
|
||||
0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355,
|
||||
0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753,
|
||||
0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172,
|
||||
0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}),
|
||||
}
|
||||
}
|
||||
};
|
||||
return input_tensors;
|
||||
}
|
||||
|
||||
constexpr size_t num_batches = 2;
|
||||
constexpr size_t height = 2;
|
||||
constexpr size_t width = 6;
|
||||
constexpr size_t number_of_anchors = 3;
|
||||
|
||||
const std::vector<std::vector<InputShape>> input_shape = {
|
||||
// im_info / anchors / boxesdeltas / scores
|
||||
static_shapes_to_test_representation({{num_batches, 3},
|
||||
{height, width, number_of_anchors, 4},
|
||||
{num_batches, number_of_anchors * 4, height, width},
|
||||
{num_batches, number_of_anchors, height, width}}),
|
||||
};
|
||||
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_GenerateProposalsLayerTest_f16,
|
||||
GenerateProposalsLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(input_shape),
|
||||
::testing::ValuesIn(min_size),
|
||||
::testing::ValuesIn(nms_threshold),
|
||||
::testing::ValuesIn(post_nms_count),
|
||||
::testing::ValuesIn(pre_nms_count),
|
||||
::testing::ValuesIn({true}),
|
||||
::testing::ValuesIn(getInputTensors<ov::float16>()),
|
||||
::testing::ValuesIn({ov::element::Type_t::f16}),
|
||||
::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
GenerateProposalsLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_GenerateProposalsLayerTest_f32,
|
||||
GenerateProposalsLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(input_shape),
|
||||
::testing::ValuesIn(min_size),
|
||||
::testing::ValuesIn(nms_threshold),
|
||||
::testing::ValuesIn(post_nms_count),
|
||||
::testing::ValuesIn(pre_nms_count),
|
||||
::testing::ValuesIn({false}),
|
||||
::testing::ValuesIn(getInputTensors<float>()),
|
||||
::testing::ValuesIn({ov::element::Type_t::f32}),
|
||||
::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
GenerateProposalsLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
@ -16,6 +16,7 @@ typedef std::tuple<
|
||||
float, // nms_threshold: specifies NMS threshold
|
||||
int64_t, // post_nms_count: number of top-n proposals after NMS
|
||||
int64_t, // pre_nms_count: number of top-n proposals after NMS
|
||||
bool, // normalized: specifies whether box is normalized or not
|
||||
std::pair<std::string, std::vector<ov::Tensor>>, // input tensors
|
||||
ElementType, // Network precision
|
||||
ElementType, // roi_num precision
|
||||
@ -28,6 +29,7 @@ class GenerateProposalsLayerTest :
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
|
||||
void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual) override;
|
||||
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<GenerateProposalsTestParams>& obj);
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "shared_test_classes/single_layer/generate_proposals.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "common_test_utils/ov_tensor_utils.hpp"
|
||||
|
||||
@ -16,8 +17,10 @@ std::ostream& operator <<(
|
||||
const ov::op::v9::GenerateProposals::Attributes& attributes) {
|
||||
ss << "score_threshold=" << attributes.min_size << "_";
|
||||
ss << "nms_threshold=" << attributes.nms_threshold << "_";
|
||||
ss << "max_delta_log_wh=" << attributes.post_nms_count << "_";
|
||||
ss << "num_classes=" << attributes.pre_nms_count;
|
||||
ss << "post_nms_count=" << attributes.post_nms_count << "_";
|
||||
ss << "pre_nms_count=" << attributes.pre_nms_count;
|
||||
ss << "normalized=" << attributes.normalized;
|
||||
ss << "nms_eta=" << attributes.nms_eta;
|
||||
return ss;
|
||||
}
|
||||
} // namespace
|
||||
@ -36,6 +39,7 @@ std::string GenerateProposalsLayerTest::getTestCaseName(
|
||||
attributes.nms_threshold,
|
||||
attributes.post_nms_count,
|
||||
attributes.pre_nms_count,
|
||||
attributes.normalized,
|
||||
inputTensors,
|
||||
netPrecision,
|
||||
roiNumPrecision,
|
||||
@ -70,6 +74,7 @@ void GenerateProposalsLayerTest::SetUp() {
|
||||
attributes.nms_threshold,
|
||||
attributes.post_nms_count,
|
||||
attributes.pre_nms_count,
|
||||
attributes.normalized,
|
||||
inputTensors,
|
||||
netPrecision,
|
||||
roiNumPrecision,
|
||||
@ -77,6 +82,13 @@ void GenerateProposalsLayerTest::SetUp() {
|
||||
|
||||
inType = outType = netPrecision;
|
||||
targetDevice = targetName;
|
||||
if (targetDevice == CommonTestUtils::DEVICE_GPU) {
|
||||
if (netPrecision == element::Type_t::f16) {
|
||||
abs_threshold = 0.2;
|
||||
} else {
|
||||
abs_threshold = 0.00009;
|
||||
}
|
||||
}
|
||||
|
||||
init_input_shapes(inputShapes);
|
||||
|
||||
@ -97,7 +109,7 @@ void GenerateProposalsLayerTest::SetUp() {
|
||||
}
|
||||
|
||||
void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
|
||||
auto inputTensors = std::get<5>(GetParam());
|
||||
auto inputTensors = std::get<6>(GetParam());
|
||||
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
@ -110,6 +122,63 @@ void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape
|
||||
}
|
||||
}
|
||||
|
||||
void GenerateProposalsLayerTest::compare(const std::vector<ov::Tensor>& expected,
|
||||
const std::vector<ov::Tensor>& actual) {
|
||||
if (targetDevice != CommonTestUtils::DEVICE_GPU) {
|
||||
SubgraphBaseTest::compare(expected, actual);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto outputsNum = expected.size();
|
||||
ASSERT_EQ(outputsNum, 3);
|
||||
ASSERT_EQ(outputsNum, actual.size());
|
||||
ASSERT_EQ(outputsNum, function->get_results().size());
|
||||
|
||||
// actual outputs 0 (rois) and 1 (roi_scores) may be padded with zeros
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
const auto expectedNumRois = expected[i].get_shape()[0];
|
||||
const auto actualNumRois = actual[i].get_shape()[0];
|
||||
ASSERT_LE(expectedNumRois, actualNumRois);
|
||||
|
||||
const auto actualBuffer = static_cast<uint8_t*>(actual[i].data());
|
||||
const auto expectedBuffer = static_cast<uint8_t*>(expected[i].data());
|
||||
const auto outputSize = i == 0 ? 4 : 1;
|
||||
|
||||
if (outType == element::Type_t::f32) {
|
||||
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer),
|
||||
reinterpret_cast<const float*>(actualBuffer),
|
||||
expectedNumRois * outputSize,
|
||||
rel_threshold,
|
||||
abs_threshold);
|
||||
} else {
|
||||
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float16*>(expectedBuffer),
|
||||
reinterpret_cast<const float16*>(actualBuffer),
|
||||
expectedNumRois * outputSize,
|
||||
rel_threshold,
|
||||
abs_threshold);
|
||||
}
|
||||
|
||||
if (expectedNumRois < actualNumRois) {
|
||||
if (outType == element::Type_t::f32) {
|
||||
const auto fBuffer = static_cast<const float*>(actual[i].data());
|
||||
for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
|
||||
ASSERT_TRUE(fBuffer[j] == 0.0f)
|
||||
<< "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
|
||||
}
|
||||
} else {
|
||||
const float16 zero{0};
|
||||
const auto fBuffer = static_cast<const float16*>(actual[i].data());
|
||||
for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
|
||||
ASSERT_TRUE(fBuffer[j] == zero)
|
||||
<< "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// output 2 - rois_num
|
||||
ov::test::utils::compare(expected[2], actual[2], abs_threshold, rel_threshold);
|
||||
}
|
||||
} // namespace subgraph
|
||||
} // namespace test
|
||||
} // namespace ov
|
||||
|
Loading…
Reference in New Issue
Block a user