diff --git a/src/common/transformations/include/ngraph_ops/generate_proposals_ie_internal.hpp b/src/common/transformations/include/ngraph_ops/generate_proposals_ie_internal.hpp new file mode 100644 index 00000000000..0d0b3f2bf2f --- /dev/null +++ b/src/common/transformations/include/ngraph_ops/generate_proposals_ie_internal.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "ngraph/op/generate_proposals.hpp" +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +class TRANSFORMATIONS_API GenerateProposalsIEInternal : public op::v9::GenerateProposals { + using Base = op::v9::GenerateProposals; + +public: + OPENVINO_OP("GenerateProposalsIEInternal", "ie_internal_opset"); + BWDCMP_RTTI_DECLARATION; + + GenerateProposalsIEInternal() = default; + + GenerateProposalsIEInternal(const Output& im_info, + const Output& anchors, + const Output& deltas, + const Output& scores, + const Attributes& attrs, + const element::Type& roi_num_type = element::i64); + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ngraph::OutputVector& new_args) const override; +}; + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/src/common/transformations/include/transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp b/src/common/transformations/include/transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp new file mode 100644 index 00000000000..bf1d0afe58d --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertGP9ToGPIEInternal; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertGP9ToGPIEInternal : public ngraph::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertGP9ToGPIEInternal", "0"); + ConvertGP9ToGPIEInternal(); +}; diff --git a/src/common/transformations/src/ngraph_ops/generate_proposals_ie_internal.cpp b/src/common/transformations/src/ngraph_ops/generate_proposals_ie_internal.cpp new file mode 100644 index 00000000000..78883c5ce92 --- /dev/null +++ b/src/common/transformations/src/ngraph_ops/generate_proposals_ie_internal.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph_ops/generate_proposals_ie_internal.hpp" + +#include +#include + +#include "itt.hpp" + +using namespace std; +using namespace ngraph; + +BWDCMP_RTTI_DEFINITION(op::internal::GenerateProposalsIEInternal); + +op::internal::GenerateProposalsIEInternal::GenerateProposalsIEInternal(const Output& im_info, + const Output& anchors, + const Output& deltas, + const Output& scores, + const Attributes& attrs, + const element::Type& roi_num_type) + : Base(im_info, anchors, deltas, scores, attrs, roi_num_type) { + validate_and_infer_types(); +} + +std::shared_ptr op::internal::GenerateProposalsIEInternal::clone_with_new_inputs( + const ngraph::OutputVector& new_args) const { + INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_clone_with_new_inputs); + check_new_args_count(this, new_args); + return make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + get_attrs(), + get_roi_num_type()); +} + +void op::internal::GenerateProposalsIEInternal::validate_and_infer_types() { + INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_validate_and_infer_types); + Base::validate_and_infer_types(); + + const auto im_info_shape = get_input_partial_shape(0); + const auto num_batches = im_info_shape[0]; + NODE_VALIDATION_CHECK(this, num_batches.is_static(), "Number of batches must be static"); + + const Dimension post_nms_count{get_attrs().post_nms_count}; + const auto first_dim_shape = num_batches * post_nms_count; + + const auto rois_shape = ov::PartialShape({first_dim_shape, 4}); + const auto scores_shape = ov::PartialShape({first_dim_shape}); + const auto roisnum_shape = ov::PartialShape({num_batches}); + + const auto input_type = get_input_element_type(0); + set_output_type(0, input_type, rois_shape); + set_output_type(1, input_type, scores_shape); + set_output_type(2, get_roi_num_type(), roisnum_shape); +} diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gp9_to_gp_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gp9_to_gp_ie_internal.cpp new file mode 100644 index 00000000000..9daf2aead63 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_gp9_to_gp_ie_internal.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp" + +#include +#include +#include +#include + +#include "itt.hpp" +#include "ngraph_ops/generate_proposals_ie_internal.hpp" +#include "transformations/utils/utils.hpp" + +ngraph::pass::ConvertGP9ToGPIEInternal::ConvertGP9ToGPIEInternal() { + ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { + const auto root = m.get_match_root(); + const auto old_node = std::dynamic_pointer_cast(root); + if (!old_node) { + return false; + } + + for (const auto& i : old_node->inputs()) { + if (i.get_partial_shape().is_dynamic()) { + return false; + } + } + + NodeVector new_ops; + + auto new_node = + std::make_shared(old_node->input_value(0), + old_node->input_value(1), + old_node->input_value(2), + old_node->input_value(3), + old_node->get_attrs(), + old_node->get_roi_num_type()); + + new_ops.push_back(new_node); + Output output_0 = new_node->output(0); + new_ops.emplace_back(output_0.get_node_shared_ptr()); + Output output_1 = new_node->output(1); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + Output output_2 = new_node->output(2); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + + new_node->set_friendly_name(old_node->get_friendly_name()); + copy_runtime_info(old_node, new_ops); + replace_node(old_node, {output_0, output_1, output_2}); + return true; + }; + + const auto generate_proposals = ngraph::pattern::wrap_type(); + const auto matcher = std::make_shared(generate_proposals, "ConvertGP9ToGPIEInternal"); + register_matcher(matcher, callback); +} diff --git a/src/inference/src/cnn_network_ngraph_impl.cpp b/src/inference/src/cnn_network_ngraph_impl.cpp index c11768bd143..4984e70d2b8 100644 --- a/src/inference/src/cnn_network_ngraph_impl.cpp +++ b/src/inference/src/cnn_network_ngraph_impl.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -458,6 +459,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map(false); manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false); manager.register_pass<::ngraph::pass::ConvertNMS9ToNMSIEInternal>(); + manager.register_pass<::ngraph::pass::ConvertGP9ToGPIEInternal>(); manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>(); manager.register_pass<::ov::pass::DisableDecompressionConvertConstantFolding>(); manager.register_pass<::ngraph::pass::ConstantFolding>(); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp index ab3a091cbe8..87a1519de93 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp @@ -226,6 +226,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::ValuesIn(nms_threshold), ::testing::ValuesIn(post_nms_count), ::testing::ValuesIn(pre_nms_count), + ::testing::ValuesIn({true, false}), ::testing::ValuesIn(inputTensors), ::testing::Values(ov::element::Type_t::f32), ::testing::Values(ov::element::Type_t::i32), diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 0d3c225a4a9..294f21a6aca 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -245,3 +245,4 @@ REGISTER_FACTORY(v9, Eye); // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); +REGISTER_FACTORY(internal, GenerateProposalsIEInternal); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp new file mode 100644 index 00000000000..03be590c55e --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp @@ -0,0 +1,82 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "primitive.hpp" +#include + +namespace cldnn { +/// @addtogroup cpp_api C++ API +/// @{ +/// @addtogroup cpp_topology Network Topology +/// @{ +/// @addtogroup cpp_primitives Primitives +/// @{ + +/// @brief generate proposals +struct generate_proposals + : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(generate_proposals) + + /// @brief Constructs generate_proposals primitive + /// @param id This primitive id + /// @param input_im_info image size info + /// @param input_anchors anchors + /// @param input_deltas deltas for anchors + /// @param input_scores proposal scores + /// @param output_rois_scores ROIs scores + /// @param output_rois_num number of proposed ROIs + /// @param min_size minimum box width and height + /// @param nms_threshold threshold to be used in NonMaxSuppression stage + /// @param pre_nms_count number of top-n proposals before NMS + /// @param post_nms_count number of top-n proposals after NMS + /// @param normalized indicates whether proposal bboxes are normalized + /// @param nms_eta eta parameter for adaptive NMS + /// @param roi_num_type type of 3rd output elements + generate_proposals(const primitive_id& id, + const std::vector& inputs, + float min_size, + float nms_threshold, + int64_t pre_nms_count, + int64_t post_nms_count, + bool normalized, + float nms_eta, + const data_types roi_num_type, + const padding& output_padding = {}) : + primitive_base{id, inputs, output_padding}, + output_rois_scores{inputs[4]}, + output_rois_num{inputs[5]}, + min_size{min_size}, + nms_threshold{nms_threshold}, + pre_nms_count{pre_nms_count}, + post_nms_count{post_nms_count}, + normalized{normalized}, + nms_eta{nms_eta}, + roi_num_type{roi_num_type} {} + + primitive_id output_rois_scores; + primitive_id output_rois_num; + float min_size; + float nms_threshold; + int64_t pre_nms_count; + int64_t post_nms_count; + bool normalized; + float nms_eta; + data_types roi_num_type; + +protected: + std::vector> get_dependencies() const override { + std::vector> ret; + if (!output_rois_scores.empty()) + ret.push_back(output_rois_scores); + if (!output_rois_num.empty()) + ret.push_back(output_rois_num); + return ret; + } +}; +/// @} +/// @} +/// @} +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/generate_proposals.cpp b/src/plugins/intel_gpu/src/graph/generate_proposals.cpp new file mode 100644 index 00000000000..e27505884ca --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/generate_proposals.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "generate_proposals_inst.h" +#include "primitive_type_base.h" +#include "intel_gpu/runtime/error_handler.hpp" +#include "json_object.h" +#include + +namespace cldnn { +primitive_type_id generate_proposals::type_id() { + static primitive_type_base instance; + return &instance; +} + +layout generate_proposals_inst::calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param) { + const layout data_layout = impl_param.get_input_layout(); + const auto num_batches = data_layout.batch(); + const auto desc = impl_param.typed_desc(); + return layout(data_layout.data_type, data_layout.format, {static_cast(num_batches * desc->post_nms_count), 4, 1, 1}); +} + +std::string generate_proposals_inst::to_string(const generate_proposals_node& node) { + auto desc = node.get_primitive(); + + std::stringstream primitive_description; + + json_composite info; + info.add("min_size", desc->min_size); + info.add("nms_threshold", desc->nms_threshold); + info.add("pre_nms_count", desc->pre_nms_count); + info.add("post_nms_count", desc->post_nms_count); + info.add("normalized", desc->normalized); + info.add("nms_eta", desc->nms_eta); + + auto node_info = node.desc_to_json(); + node_info->add("generate_proposals_info", info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp new file mode 100644 index 00000000000..23abd87e31a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "generate_proposals_inst.h" +#include "primitive_base.hpp" +#include "impls/implementation_map.hpp" +#include "kernel_selector_helper.h" +#include "generate_proposals/generate_proposals_kernel_selector.h" +#include "generate_proposals/generate_proposals_kernel_ref.h" + + + +namespace cldnn { +namespace ocl { +struct generate_proposals_impl + : public typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + + std::unique_ptr clone() const override { + return make_unique(*this); + } + +protected: + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + auto args = parent::get_arguments(instance, 0); + args.inputs.push_back(instance.output_rois_scores_memory()); + args.inputs.push_back(instance.output_rois_nums_memory()); + return args; + } + +public: + static primitive_impl* create(const generate_proposals_node& arg, const kernel_impl_params& impl_param) { + auto params = get_default_params(impl_param); + auto optional_params = get_default_optional_params< + kernel_selector::generate_proposals_optional_params>(arg.get_program()); + + const auto& primitive = arg.get_primitive(); + + params.min_size = primitive->min_size; + params.nms_threshold = primitive->nms_threshold; + params.pre_nms_count = primitive->pre_nms_count; + params.post_nms_count = primitive->post_nms_count; + params.normalized = primitive->normalized; + params.nms_eta = primitive->nms_eta; + params.roi_num_type = primitive->roi_num_type == cldnn::data_types::i32 ? + kernel_selector::Datatype::INT32 : kernel_selector::Datatype::INT64; + + params.inputs.push_back(convert_data_tensor(arg.anchors().get_output_layout())); + params.inputs.push_back(convert_data_tensor(arg.deltas().get_output_layout())); + params.inputs.push_back(convert_data_tensor(arg.scores().get_output_layout())); + + params.inputs.push_back(convert_data_tensor(arg.output_rois_scores_node().get_output_layout())); + params.inputs.push_back(convert_data_tensor(arg.output_rois_nums_node().get_output_layout())); + + const auto& kernel_selector = kernel_selector::generate_proposals_kernel_selector::Instance(); + const auto best_kernels = kernel_selector.GetBestKernels(params, optional_params); + + CLDNN_ERROR_BOOL(arg.id(), + "best_kernels.empty()", + best_kernels.empty(), + "Cannot find a proper kernel with this arguments"); + + return new generate_proposals_impl(arg, best_kernels[0]); + } +}; + +namespace detail { + attach_generate_proposals_impl::attach_generate_proposals_impl() { + implementation_map::add(impl_types::ocl, + generate_proposals_impl::create, { + std::make_tuple(data_types::f16, format::bfyx), + std::make_tuple(data_types::f16, format::b_fs_yx_fsv16), + std::make_tuple(data_types::f16, format::b_fs_yx_fsv32), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32), + + std::make_tuple(data_types::f32, format::bfyx), + std::make_tuple(data_types::f32, format::b_fs_yx_fsv16), + std::make_tuple(data_types::f32, format::b_fs_yx_fsv32), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32)}); + } +} // namespace detail +} // namespace ocl +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp index 9ea4b73f2b1..e8ef5315b35 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp @@ -41,6 +41,7 @@ void register_implementations() { REGISTER_OCL(gather_elements); REGISTER_OCL(gather_nd); REGISTER_OCL(gemm); + REGISTER_OCL(generate_proposals); REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp index efc6d9bbabd..50ab1eb3dcf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp @@ -119,6 +119,7 @@ REGISTER_OCL(gather); REGISTER_OCL(gather_nd); REGISTER_OCL(gather_elements); REGISTER_OCL(gemm); +REGISTER_OCL(generate_proposals); REGISTER_OCL(lrn); REGISTER_OCL(lstm_gemm); REGISTER_OCL(lstm_elt); diff --git a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h new file mode 100644 index 00000000000..46789b1fad8 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h @@ -0,0 +1,50 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +#pragma once +#include "intel_gpu/primitives/generate_proposals.hpp" +#include "primitive_inst.h" + +namespace cldnn { +template <> +struct typed_program_node + : public typed_program_node_base { + using parent = typed_program_node_base; + +public: + using parent::parent; + + program_node& input() const { return get_dependency(0); } + + program_node& anchors() const { return get_dependency(1); } + program_node& deltas() const { return get_dependency(2); } + program_node& scores() const { return get_dependency(3); } + + program_node& output_rois_scores_node() const { return get_dependency(4); } + program_node& output_rois_nums_node() const { return get_dependency(5); } +}; + +using generate_proposals_node = typed_program_node; + +template <> +class typed_primitive_inst + : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + +public: + static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param); + static std::string to_string(const generate_proposals_node& node); + + typed_primitive_inst(network& network, const generate_proposals_node& node) + : parent(network, node) + {} + + memory::ptr output_rois_scores_memory() const { return dep_memory_ptr(4); } + memory::ptr output_rois_nums_memory() const { return dep_memory_ptr(5); } +}; + +using generate_proposals_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index cdc03e89e29..d40bab55c9c 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -38,6 +38,7 @@ #include "data_inst.h" #include "deconvolution_inst.h" #include "detection_output_inst.h" +#include "generate_proposals_inst.h" #include "input_layout_inst.h" #include "shuffle_channels_inst.h" #include "arg_max_min_inst.h" @@ -1432,7 +1433,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::roll::type_id() && prim.type() != cldnn::prior_box::type_id() && prim.type() != cldnn::resample::type_id() && - prim.type() != cldnn::eye::type_id()) { + prim.type() != cldnn::eye::type_id() && + prim.type() != cldnn::generate_proposals::type_id()) { can_use_fsv16 = false; } @@ -1468,7 +1470,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::roll::type_id() && prim.type() != cldnn::resample::type_id() && prim.type() != cldnn::prior_box::type_id() && - prim.type() != cldnn::eye::type_id()) { + prim.type() != cldnn::eye::type_id() && + prim.type() != cldnn::generate_proposals::type_id()) { can_use_bs_fs_yx_bsv16_fsv16 = false; } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h b/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h index c85754256e3..513cf344a94 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h +++ b/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h @@ -91,6 +91,7 @@ enum class KernelType { REVERSE, PRIOR_BOX, EYE, + GENERATE_PROPOSALS }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.cpp new file mode 100644 index 00000000000..d63f4618a10 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.cpp @@ -0,0 +1,196 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "generate_proposals_kernel_ref.h" +#include "kernel_selector_utils.h" + +namespace kernel_selector { + +ParamsKey GenerateProposalsRef::GetSupportedKey() const { + ParamsKey k; + + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); + k.EnableBatching(); + k.EnableDifferentTypes(); + k.EnableTensorPitches(); + + return k; +} + +KernelsPriority GenerateProposalsRef::GetKernelsPriority(const Params&, const optional_params&) const { + return DONT_USE_IF_HAVE_SOMETHING_ELSE; +} + +bool GenerateProposalsRef::Validate(const Params& p, const optional_params& o) const { + if (p.GetType() != KernelType::GENERATE_PROPOSALS + || o.GetType() != KernelType::GENERATE_PROPOSALS) { + return false; + } + + return true; +} + +namespace { +constexpr size_t kImInfoInputIdx = 0; +constexpr size_t kAnchorsInputIdx = 1; +constexpr size_t kDeltasInputIdx = 2; +constexpr size_t kScoresInputIdx = 3; +constexpr size_t kRoisScoresOutputIdx = 4; +constexpr size_t kRoisNumsOutputIdx = 5; + +GenerateProposalsRef::DispatchData SetDefault(const generate_proposals_params& params, size_t idx) { + GenerateProposalsRef::DispatchData dispatch_data; + + const auto& inputs = params.inputs; + const auto num_batches = inputs[kScoresInputIdx].Batch().v; + const auto anchors_num = inputs[kScoresInputIdx].Feature().v; + const auto bottom_H = inputs[kDeltasInputIdx].Y().v; + const auto bottom_W = inputs[kDeltasInputIdx].X().v; + + if (idx == 0) { + dispatch_data.gws = {bottom_H, bottom_W, num_batches * anchors_num}; + } else if (idx == 1 || idx == 2) { + dispatch_data.gws = {num_batches, 1, 1}; + } else if (idx == 3) { + dispatch_data.gws = {1, 1, 1}; + } + + dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo); + + return dispatch_data; +} +} // namespace + +void GenerateProposalsRef::SetKernelArguments( + const generate_proposals_params& params, + size_t idx, cldnn::arguments_desc& arguments) const { + switch (idx) { + case 0: { // refine anchors + arguments.push_back({ArgumentDescriptor::Types::INPUT, kImInfoInputIdx}); + arguments.push_back({ArgumentDescriptor::Types::INPUT, kAnchorsInputIdx}); + arguments.push_back({ArgumentDescriptor::Types::INPUT, kDeltasInputIdx}); + arguments.push_back({ArgumentDescriptor::Types::INPUT, kScoresInputIdx}); + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals + break; + } + case 1: { // sort proposals by score + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals + break; + } + case 2: { // NMS + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices + arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num + break; + } + case 3: { // convert proposals to rois and roi scores + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals + arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices + arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num + arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); // rois + arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisScoresOutputIdx}); // roi scores + break; + } + default: + throw std::invalid_argument("generate_proposals has 4 kernels. valid index is 0 ~ 3."); + } +} + +KernelsData GenerateProposalsRef::GetKernelsData(const Params& params, const optional_params& options) const { + if (!Validate(params, options)) { + return {}; + } + + constexpr size_t kKernelsNum = 4; + KernelData kd = KernelData::Default(params, kKernelsNum); + const generate_proposals_params& new_params = static_cast(params); + const auto& inputs = new_params.inputs; + + const auto anchors_num = inputs[kScoresInputIdx].Feature().v; + const auto bottom_H = inputs[kDeltasInputIdx].Y().v; + const auto bottom_W = inputs[kDeltasInputIdx].X().v; + const auto scale_w_index = inputs[kImInfoInputIdx].Feature().v == 3 ? 2 : 3; + const auto num_proposals = anchors_num * bottom_H * bottom_W; + const auto pre_nms_topn = std::min(num_proposals, new_params.pre_nms_count); + const auto max_delta_log_wh = static_cast(std::log(1000.0 / 16.0)); + + kd.internalBufferDataType = Datatype::F32; + + const auto num_batches = inputs[kScoresInputIdx].Batch().v; + constexpr size_t kProposalBoxSize = 6; // 6 values: {x0, y0, x1, y1, score, keep} + const auto proposals_buffer_size = num_batches * num_proposals * sizeof(float) * kProposalBoxSize; + kd.internalBufferSizes.push_back(proposals_buffer_size); + + const auto out_indices_size = num_batches * new_params.post_nms_count * sizeof(float); + kd.internalBufferSizes.push_back(out_indices_size); + + for (size_t i = 0; i < kKernelsNum; ++i) { + const auto dispatchData = SetDefault(new_params, i); + const auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i); + auto cldnn_jit = MakeBaseParamsJitConstants(new_params); + + cldnn_jit.AddConstant(MakeJitConstant("GENERATE_PROPOSALS_STAGE_" + std::to_string(i), "true")); + cldnn_jit.AddConstant(MakeJitConstant("PROPOSAL_SIZE", kProposalBoxSize)); + cldnn_jit.Merge(MakeTypeJitConstants(new_params.roi_num_type, "ROI_NUM")); + if (new_params.normalized) { + cldnn_jit.AddConstant(MakeJitConstant("NORMALIZED", 1)); + } + + switch (i) { + case 0: { + cldnn_jit.AddConstants({MakeJitConstant("MIN_SIZE", new_params.min_size), + MakeJitConstant("ANCHORS_NUM", anchors_num), + MakeJitConstant("NUM_PROPOSALS", num_proposals), + MakeJitConstant("BOTTOM_H", bottom_H), + MakeJitConstant("BOTTOM_W", bottom_W), + MakeJitConstant("BOTTOM_AREA", bottom_H * bottom_W), + MakeJitConstant("SCALE_W_INDEX", scale_w_index), + MakeJitConstant("MAX_DELTA_LOG_WH", max_delta_log_wh) + }); + break; + } + case 1: { + cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals)}); + break; + } + case 2: { + cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals), + MakeJitConstant("PRE_NMS_TOPN", pre_nms_topn), + MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count), + MakeJitConstant("NMS_THRESHOLD", new_params.nms_threshold), + }); + break; + } + case 3: { + cldnn_jit.AddConstants({MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count), + MakeJitConstant("NUM_PROPOSALS", num_proposals) + }); + break; + } + default: + throw std::invalid_argument("GENERATE_PROPOSALS has 4 kernels. valid index is 0 ~ 3."); + } + + const auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + auto& kernel = kd.kernels[i]; + + KernelBase::CheckDispatchData(kernelName, dispatchData, params.engineInfo.maxWorkGroupSize); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); + SetKernelArguments(new_params, i, kernel.params.arguments); + } + + return {kd}; +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.h new file mode 100644 index 00000000000..31e33b0459b --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.h @@ -0,0 +1,45 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" + +namespace kernel_selector { +struct generate_proposals_params : public base_params { + generate_proposals_params() + : base_params(KernelType::GENERATE_PROPOSALS) {} + + float min_size{0.0f}; + float nms_threshold{0.0f}; + size_t pre_nms_count{0}; + size_t post_nms_count{0}; + bool normalized{true}; + float nms_eta{1.0f}; + Datatype roi_num_type = Datatype::INT64; +}; + +struct generate_proposals_optional_params : public optional_params { + generate_proposals_optional_params() + : optional_params(KernelType::GENERATE_PROPOSALS) {} +}; + +class GenerateProposalsRef : public KernelBaseOpenCL { +public: + GenerateProposalsRef() + : KernelBaseOpenCL("generate_proposals_ref") {} + + ~GenerateProposalsRef() = default; + + using DispatchData = CommonDispatchData; + + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; +protected: + bool Validate(const Params& p, const optional_params& o) const override; + void SetKernelArguments(const generate_proposals_params& params, + size_t idx, cldnn::arguments_desc& kernel) const; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.cpp new file mode 100644 index 00000000000..4cd337bbf65 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "generate_proposals_kernel_selector.h" +#include "generate_proposals_kernel_ref.h" + +namespace kernel_selector { +generate_proposals_kernel_selector::generate_proposals_kernel_selector() { + Attach(); +} + +generate_proposals_kernel_selector& generate_proposals_kernel_selector::Instance() { + static generate_proposals_kernel_selector instance_; + return instance_; +} + +KernelsData generate_proposals_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::GENERATE_PROPOSALS); +} +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.h new file mode 100644 index 00000000000..e8cb30cabd0 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.h @@ -0,0 +1,18 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class generate_proposals_kernel_selector : public kernel_selector_base { +public: + static generate_proposals_kernel_selector& Instance(); + + generate_proposals_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/generate_proposals_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/generate_proposals_ref.cl new file mode 100644 index 00000000000..5ccec1157e8 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/generate_proposals_ref.cl @@ -0,0 +1,306 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#if INPUT0_TYPE_SIZE == 2 //f16 + #define HALF_ONE 0.5h +#else + #define HALF_ONE 0.5f +#endif + +#define ZERO INPUT0_VAL_ZERO + +#ifdef NORMALIZED + #define COORDINATES_OFFSET INPUT0_VAL_ZERO +#else + #define COORDINATES_OFFSET INPUT0_VAL_ONE +#endif + +#ifdef GENERATE_PROPOSALS_STAGE_0 + +// 0. Refine anchors +KERNEL(generate_proposals_ref_stage_0) +(const __global INPUT0_TYPE* im_info, + const __global INPUT1_TYPE* anchors, + const __global INPUT2_TYPE* deltas, + const __global INPUT3_TYPE* scores, + __global OUTPUT_TYPE* proposals) { + const uint h = get_global_id(0); + const uint w = get_global_id(1); + const uint ba = (uint)get_global_id(2); + const uint batch = ba / INPUT0_FEATURE_NUM; + const uint anchor = ba % INPUT0_FEATURE_NUM; + + const INPUT0_TYPE img_H = im_info[INPUT0_GET_INDEX(batch, 0, 0, 0)]; + const INPUT0_TYPE img_W = im_info[INPUT0_GET_INDEX(batch, 1, 0, 0)]; + const INPUT0_TYPE scale_H = im_info[INPUT0_GET_INDEX(batch, 2, 0, 0)]; + const INPUT0_TYPE scale_W = im_info[INPUT0_GET_INDEX(batch, SCALE_W_INDEX, 0, 0)]; + const float min_box_H = MIN_SIZE * scale_H; + const float min_box_W = MIN_SIZE * scale_W; + + INPUT0_TYPE x0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 0)]; + INPUT0_TYPE y0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 1)]; + INPUT0_TYPE x1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 2)]; + INPUT0_TYPE y1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 3)]; + + const INPUT0_TYPE dx = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 0 , h, w)]; + const INPUT0_TYPE dy = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 1 , h , w)]; + const INPUT0_TYPE d_log_w = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 2 , h, w)]; + const INPUT0_TYPE d_log_h = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 3 , h, w)]; + + const INPUT0_TYPE score = scores[INPUT3_GET_INDEX(batch, anchor, h, w)]; + + // width & height of box + const INPUT0_TYPE ww = x1 - x0 + COORDINATES_OFFSET; + const INPUT0_TYPE hh = y1 - y0 + COORDINATES_OFFSET; + // center location of box + const INPUT0_TYPE ctr_x = x0 + HALF_ONE * ww; + const INPUT0_TYPE ctr_y = y0 + HALF_ONE * hh; + // new center location according to deltas (dx, dy) + const INPUT0_TYPE pred_ctr_x = dx * ww + ctr_x; + const INPUT0_TYPE pred_ctr_y = dy * hh + ctr_y; + // new width & height according to deltas d(log w), d(log h) + const INPUT0_TYPE pred_w = exp(min(d_log_w, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * ww; + const INPUT0_TYPE pred_h = exp(min(d_log_h, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * hh; + // update upper-left corner location + x0 = pred_ctr_x - HALF_ONE * pred_w; + y0 = pred_ctr_y - HALF_ONE * pred_h; + // update lower-right corner location + x1 = pred_ctr_x + HALF_ONE * pred_w - COORDINATES_OFFSET; + y1 = pred_ctr_y + HALF_ONE * pred_h - COORDINATES_OFFSET; + // adjust new corner locations to be within the image region + x0 = max(ZERO, min(x0, img_W - COORDINATES_OFFSET)); + y0 = max(ZERO, min(y0, img_H - COORDINATES_OFFSET)); + x1 = max(ZERO, min(x1, img_W - COORDINATES_OFFSET)); + y1 = max(ZERO, min(y1, img_H - COORDINATES_OFFSET)); + // recompute new width & height + const INPUT0_TYPE box_w = x1 - x0 + COORDINATES_OFFSET; + const INPUT0_TYPE box_h = y1 - y0 + COORDINATES_OFFSET; + + const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE; + const uint offset = h * BOTTOM_W + w; + const uint proposal_idx = batch_offset + (offset * ANCHORS_NUM + anchor) * PROPOSAL_SIZE; + + proposals[proposal_idx + 0] = x0; + proposals[proposal_idx + 1] = y0; + proposals[proposal_idx + 2] = x1; + proposals[proposal_idx + 3] = y1; + proposals[proposal_idx + 4] = score; + proposals[proposal_idx + 5] = ((min_box_W <= box_w) && (min_box_H <= box_h)) ? 1 : 0; +} + +#endif /* GENERATE_PROPOSALS_STAGE_0 */ + +#ifdef GENERATE_PROPOSALS_STAGE_1 + +typedef struct __attribute__((__packed__)) { + INPUT0_TYPE x0; + INPUT0_TYPE y0; + INPUT0_TYPE x1; + INPUT0_TYPE y1; + INPUT0_TYPE score; + INPUT0_TYPE keep; +} Box; + +inline void FUNC(swap_box)(__global Box* a, __global Box* b) { + const Box temp = *a; + *a = *b; + *b = temp; +} + +inline int FUNC(partition)(__global Box* arr, int l, int h) { + INPUT0_TYPE pivotScore = arr[h].score; + int i = (l - 1); + for (int j = l; j <= h - 1; j++) { + if (arr[j].score > pivotScore) { + i++; + FUNC_CALL(swap_box)(&arr[i], &arr[j]); + } + } + FUNC_CALL(swap_box)(&arr[i + 1], &arr[h]); + return (i + 1); +} + +inline void FUNC(bubbleSortIterative)(__global Box* arr, int l, int h) { + for (int i = 0; i < h - l; i++) { + bool swapped = false; + for (int j = l; j < h - i; j++) { + if ((arr[j].score > arr[j + 1].score)) { + FUNC_CALL(swap_box)(&arr[j], &arr[j + 1]); + swapped = true; + } + } + + if (!swapped) + break; + } +} + +inline void FUNC(quickSortIterative)(__global Box* arr, int l, int h) { + // Create an auxiliary stack + const int kStackSize = 100; + int stack[kStackSize]; + + // initialize top of stack + int top = -1; + + // push initial values of l and h to stack + stack[++top] = l; + stack[++top] = h; + + // Keep popping from stack while is not empty + while (top >= 0) { + // Pop h and l + h = stack[top--]; + l = stack[top--]; + + // Set pivot element at its correct position + // in sorted array + int p = FUNC_CALL(partition)(arr, l, h); + + // If there are elements on left side of pivot, + // then push left side to stack + if (p - 1 > l) { + if (top >= (kStackSize - 1)) { + FUNC_CALL(bubbleSortIterative)(arr, l, p - 1); + } else { + stack[++top] = l; + stack[++top] = p - 1; + } + } + + // If there are elements on right side of pivot, + // then push right side to stack + if (p + 1 < h) { + if (top >= (kStackSize - 1)) { + FUNC_CALL(bubbleSortIterative)(arr, p + 1, h); + } else { + stack[++top] = p + 1; + stack[++top] = h; + } + } + } +} + +// 1. Sort boxes by scores +KERNEL(generate_proposals_ref_stage_1)(__global OUTPUT_TYPE* proposals) { + const uint batch = get_global_id(0); + + __global Box* boxes = (__global Box*)(proposals + batch * NUM_PROPOSALS * PROPOSAL_SIZE); + + FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1); +} +#endif /* GENERATE_PROPOSALS_STAGE_1 */ + +#ifdef GENERATE_PROPOSALS_STAGE_2 + +// 2. NMS +KERNEL(generate_proposals_ref_stage_2) +(const __global INPUT0_TYPE* boxes, + __global size_t* out_indices, + __global ROI_NUM_TYPE* num_outputs) { + const uint batch = get_global_id(0); + const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE; + + uint count = 0; + __local uint index_out[INPUT0_BATCH_NUM * POST_NMS_COUNT]; + __local bool is_dead[INPUT0_BATCH_NUM * PRE_NMS_TOPN]; + + for (uint box = 0; box < PRE_NMS_TOPN; ++box) { + is_dead[batch * PRE_NMS_TOPN + box] = boxes[batch_offset + PROPOSAL_SIZE * box + 5] == 0.0f; + } + + for (uint box = 0; box < PRE_NMS_TOPN; ++box) { + if (is_dead[batch * PRE_NMS_TOPN + box]) + continue; + + index_out[batch * POST_NMS_COUNT + count++] = box; + if (count == POST_NMS_COUNT) + break; + + const uint box_offset = batch_offset + box * PROPOSAL_SIZE; + const INPUT0_TYPE x0i = boxes[box_offset + 0]; + const INPUT0_TYPE y0i = boxes[box_offset + 1]; + const INPUT0_TYPE x1i = boxes[box_offset + 2]; + const INPUT0_TYPE y1i = boxes[box_offset + 3]; + + const INPUT0_TYPE a_width = x1i - x0i; + const INPUT0_TYPE a_height = y1i - y0i; + const INPUT0_TYPE a_area = (a_width + COORDINATES_OFFSET) * (a_height + COORDINATES_OFFSET); + + for (uint tail = box + 1; tail < PRE_NMS_TOPN; ++tail) { + const uint tail_offset = batch_offset + tail * PROPOSAL_SIZE; + const INPUT0_TYPE x0j = boxes[tail_offset + 0]; + const INPUT0_TYPE y0j = boxes[tail_offset + 1]; + const INPUT0_TYPE x1j = boxes[tail_offset + 2]; + const INPUT0_TYPE y1j = boxes[tail_offset + 3]; + + const INPUT0_TYPE x0 = max(x0i, x0j); + const INPUT0_TYPE y0 = max(y0i, y0j); + const INPUT0_TYPE x1 = min(x1i, x1j); + const INPUT0_TYPE y1 = min(y1i, y1j); + + const INPUT0_TYPE width = x1 - x0 + COORDINATES_OFFSET; + const INPUT0_TYPE height = y1 - y0 + COORDINATES_OFFSET; + const INPUT0_TYPE area = max(ZERO, width) * max(ZERO, height); + + const INPUT0_TYPE b_width = x1j - x0j; + const INPUT0_TYPE b_height = y1j - y0j; + const INPUT0_TYPE b_area = (b_width + COORDINATES_OFFSET) * (b_height + COORDINATES_OFFSET); + + const INPUT0_TYPE intersection_area = area / (a_area + b_area - area); + + if ( (NMS_THRESHOLD < intersection_area) && (x0i <= x1j) && (y0i <= y1j) && (x0j <= x1i) && (y0j <= y1i) ) { + is_dead[batch * PRE_NMS_TOPN + tail] = true; + } + } + } + + num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)] = count; + + for (uint i = 0; i < count; ++i) { + out_indices[batch * POST_NMS_COUNT + i] = index_out[batch * POST_NMS_COUNT + i]; + } +} +#endif /* GENERATE_PROPOSALS_STAGE_2 */ + +#ifdef GENERATE_PROPOSALS_STAGE_3 + +// 3. Convert proposals to rois and roi_scores +KERNEL(generate_proposals_ref_stage_3) +(const __global INPUT0_TYPE* boxes, + const __global size_t* out_indices, + const __global ROI_NUM_TYPE* num_outputs, + __global OUTPUT_TYPE* rois, + __global INPUT4_TYPE* roi_scores) { + + uint roi_index = 0; + for (uint batch = 0; batch < INPUT0_BATCH_NUM; ++batch) { + for (uint i = 0; i < num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)]; ++i) { + const uint box_index = (batch * NUM_PROPOSALS + out_indices[batch * POST_NMS_COUNT + i]) * PROPOSAL_SIZE; + + rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 0]; + rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = boxes[box_index + 1]; + rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = boxes[box_index + 2]; + rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = boxes[box_index + 3]; + roi_scores[INPUT4_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 4]; + ++roi_index; + } + } + + // fill the rest of outputs with zeros + while(roi_index < INPUT0_BATCH_NUM * POST_NMS_COUNT) { + rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = 0.0f; + rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = 0.0f; + rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = 0.0f; + rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = 0.0f; + + roi_scores[roi_index] = 0.0f; + ++roi_index; + } +} +#endif /* GENERATE_PROPOSALS_STAGE_3 */ + +#undef HALF_ONE +#undef ZERO +#undef COORDINATES_OFFSET diff --git a/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp new file mode 100644 index 00000000000..f8f8e8dcdde --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/primitives/generate_proposals.hpp" + +#include + +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/primitives/mutable_data.hpp" + +namespace ov { +namespace intel_gpu { + +static void CreateGenerateProposalsIEInternalOp( + Program& p, + const std::shared_ptr& op) { + validate_inputs_count(op, {4}); + if (op->get_output_size() != 3) { + IE_THROW() << "GenerateProposals requires 3 outputs"; + } + + auto inputs = p.GetInputPrimitiveIDs(op); + const auto& attrs = op->get_attrs(); + const auto layer_type_name = layer_type_name_ID(op); + const auto layer_name = layer_type_name + ".out0"; + + // output 2 - scores + const auto mutable_precision_1 = op->get_output_element_type(1); + const auto output_shape_1 = op->get_output_shape(1); + const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1), + cldnn::format::get_default_format(output_shape_1.size()), + tensor_from_dims(output_shape_1)}; + cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)}; + + const auto mutable_id_w_1 = layer_type_name + "_md_write.1"; + const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1}; + p.add_primitive(*op, mutable_prim_w_1); + inputs.push_back(mutable_id_w_1); + + // output 3 - roisNum + const auto output_shape_2 = op->get_output_shape(2); + const auto mutable_precision_2 = op->get_output_element_type(2); + const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2), + cldnn::format::get_default_format(output_shape_2.size()), + tensor_from_dims(output_shape_2)}; + cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)}; + + const auto mutable_id_w_2 = layer_type_name + "_md_write.2"; + const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2}; + p.add_primitive(*op, mutable_prim_w_2); + inputs.push_back(mutable_id_w_2); + + const cldnn::generate_proposals prim{layer_name, + inputs, + attrs.min_size, + attrs.nms_threshold, + attrs.pre_nms_count, + attrs.post_nms_count, + attrs.normalized, + attrs.nms_eta, + cldnn::element_type_to_data_type(op->get_roi_num_type())}; + + p.add_primitive(*op, prim); + + const auto mutable_id_r_1 = layer_type_name + ".out1"; + const cldnn::mutable_data mutable_prim_r_1{mutable_id_r_1, {layer_name}, shared_memory_1}; + p.add_primitive(*op, mutable_prim_r_1); + + const auto mutable_id_r_2 = layer_type_name + ".out2"; + const cldnn::mutable_data mutable_prim_r_2{mutable_id_r_2, {layer_name}, shared_memory_2}; + p.add_primitive(*op, mutable_prim_r_2); +} + +REGISTER_FACTORY_IMPL(internal, GenerateProposalsIEInternal); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index aef07b58cf0..f758ec1c007 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -74,6 +74,7 @@ #include #include #include +#include #include #include "transformations/op_conversions/softmax_decomposition.hpp" #include @@ -163,6 +164,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp new file mode 100644 index 00000000000..dcc8bf7d88a --- /dev/null +++ b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp @@ -0,0 +1,461 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include + +using namespace cldnn; +using namespace ::tests; + +namespace { +template +struct GenerateProposalsParams { + float min_size; + float nms_threshold; + int64_t pre_nms_count; + int64_t post_nms_count; + bool normalized; + float nms_eta; + std::vector expected_rois; + std::vector expected_roi_scores; + std::vector expected_rois_num; +}; + +template +using GenerateProposalsParamsWithLayout = std::tuple, format::type>; + +constexpr size_t num_batches = 2; +constexpr size_t image_height = 200; +constexpr size_t image_width = 200; +constexpr float image_scale = 4.0f; +constexpr size_t height = 2; +constexpr size_t width = 6; +constexpr size_t number_of_channels = 3; +constexpr size_t number_of_anchors = 3; + +const std::vector im_info{image_height, image_width, image_scale, image_height, image_width, image_scale}; + +const std::vector anchors{ + 0.0f, 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, + 20.0f, 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, 31.0f, + 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, + 44.0f, 45.0f, 46.0f, 47.0f, + 48.0f, 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, + 60.0f, 61.0f, 62.0f, 63.0f, + 64.0f, 65.0f, 66.0f, 67.0f, + 68.0f, 69.0f, 70.0f, 71.0f, + 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, + 80.0f, 81.0f, 82.0f, 83.0f, + 84.0f, 85.0f, 86.0f, 87.0f, + 88.0f, 89.0f, 90.0f, 91.0f, + 92.0f, 93.0f, 94.0f, 95.0f, + 96.0f, 97.0f, 98.0f, 99.0f, + 100.0f, 101.0f, 102.0f, 103.0f, + 104.0f, 105.0f, 106.0f, 107.0f, + 108.0f, 109.0f, 110.0f, 111.0f, + 112.0f, 113.0f, 114.0f, 115.0f, + 116.0f, 117.0f, 118.0f, 119.0f, + 120.0f, 121.0f, 122.0f, 123.0f, + 124.0f, 125.0f, 126.0f, 127.0f, + 128.0f, 129.0f, 130.0f, 131.0f, + 132.0f, 133.0f, 134.0f, 135.0f, + 136.0f, 137.0f, 138.0f, 139.0f, + 140.0f, 141.0f, 142.0f, 143.0f}; + +const std::vector deltas{ + 0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, + 0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064, + 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, + 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, + 0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464, + 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, + 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, + 0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054, + 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, + 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, + 0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187, + 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, + 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, + 0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379, + 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, + 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, + 0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655, + 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, + 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, + 0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778, + 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, + 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, + 0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166, + 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207, + 0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, + 0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064, + 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, + 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, + 0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464, + 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, + 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, + 0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054, + 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, + 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, + 0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187, + 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, + 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, + 0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379, + 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, + 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, + 0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655, + 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, + 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, + 0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778, + 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, + 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, + 0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166, + 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}; + +const std::vector scores{ + 0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, + 0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586, + 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, + 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, + 0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172, + 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978, + 0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, + 0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586, + 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, + 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, + 0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172, + 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}; + +const std::vector layouts{ + format::bfyx, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv16, + format::bs_fs_yx_bsv32_fsv32}; + +template +std::vector getValues(const std::vector& values) { + std::vector result(values.begin(), values.end()); + return result; +} +template float getError(); + +template<> +float getError() { + return 0.001; +} + +template<> +float getError() { + return 0.2; +} + +template +std::vector> getGenerateProposalsParams() { + std::vector> params = { + { + 1.0f, 0.7f, 14, 6, true, 1.0, + getValues({4.49132, 4.30537, 8.75027, 8.8035, + 0, 1.01395, 4.66909, 5.14337, + 135.501, 137.467, 139.81, 141.726, + 4.49132, 4.30537, 8.75027, 8.8035, + 0, 1.01395, 4.66909, 5.14337, + 135.501, 137.467, 139.81, 141.726}), + getValues({0.826641, 0.566372, 0.559075, + 0.826641, 0.566372, 0.559075}), + {3, 3} + }, + { + 1.0f, 0.7f, 1000, 6, true, 1.0, + getValues({4.49132, 4.30537, 8.75027, 8.8035, + 0, 1.01395, 4.66909, 5.14337, + 135.501, 137.467, 139.81, 141.726, + 47.2348, 47.8342, 52.5503, 52.3864, + 126.483, 128.3, 131.625, 133.707, + 4.49132, 4.30537, 8.75027, 8.8035, + 0, 1.01395, 4.66909, 5.14337, + 135.501, 137.467, 139.81, 141.726, + 47.2348, 47.8342, 52.5503, 52.3864, + 126.483, 128.3, 131.625, 133.707}), + getValues({0.826641, 0.566372, 0.559075, 0.479851, 0.267999, + 0.826641, 0.566372, 0.559075, 0.479851, 0.267999}), + {5, 5} + }, + { + 0.0f, 0.7f, 14, 6, true, 1.0, + getValues({108.129, 109.37, 111.623, 111.468, + 12.9725, 11.6102, 16.4918, 16.9624, + 112.883, 113.124, 115.17, 118.213, + 4.49132, 4.30537, 8.75027, 8.8035, + 24.9778, 25.0318, 27.2283, 28.495, + 100.126, 101.409, 102.354, 106.125, + 108.129, 109.37, 111.623, 111.468, + 12.9725, 11.6102, 16.4918, 16.9624, + 112.883, 113.124, 115.17, 118.213, + 4.49132, 4.30537, 8.75027, 8.8035, + 24.9778, 25.0318, 27.2283, 28.495, + 100.126, 101.409, 102.354, 106.125}), + getValues({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854, + 0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}), + {6, 6} + }, + + { + 0.1f, 0.7f, 1000, 6, true, 1.0, + getValues({108.129, 109.37, 111.623, 111.468, + 12.9725, 11.6102, 16.4918, 16.9624, + 112.883, 113.124, 115.17, 118.213, + 4.49132, 4.30537, 8.75027, 8.8035, + 24.9778, 25.0318, 27.2283, 28.495, + 100.126, 101.409, 102.354, 106.125, + 108.129, 109.37, 111.623, 111.468, + 12.9725, 11.6102, 16.4918, 16.9624, + 112.883, 113.124, 115.17, 118.213, + 4.49132, 4.30537, 8.75027, 8.8035, + 24.9778, 25.0318, 27.2283, 28.495, + 100.126, 101.409, 102.354, 106.125}), + getValues({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854, + 0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}), + {6, 6} + }, + { + 1.0f, 0.7f, 14, 6, false, 1.0, + getValues({13.4588, 10.9153, 17.7377, 17.9436, + 4.73698, 3.95806, 10.1254, 9.70525, + 89.5773, 90.0053, 92.9476, 95.3396, + 0, 1.02093, 6.00364, 6.21505, + 92.3608, 94.306, 96.3198, 98.4288, + 135.252, 137.7, 140.716, 143.09, + 13.4588, 10.9153, 17.7377, 17.9436, + 4.73698, 3.95806, 10.1254, 9.70525, + 89.5773, 90.0053, 92.9476, 95.3396, + 0, 1.02093, 6.00364, 6.21505, + 92.3608, 94.306, 96.3198, 98.4288, + 135.252, 137.7, 140.716, 143.09}), + getValues({0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075, + 0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075}), + {6, 6} + }, + { + 0.0f, 0.7f, 1000, 6, false, 1.0, + getValues({108.194, 109.556, 112.435, 111.701, + 13.4588, 10.9153, 17.7377, 17.9436, + 113.324, 113.186, 115.755, 119.82, + 4.73698, 3.95806, 10.1254, 9.70525, + 25.4666, 25.0477, 27.8424, 29.2425, + 100.188, 101.614, 102.532, 107.687, + 108.194, 109.556, 112.435, 111.701, + 13.4588, 10.9153, 17.7377, 17.9436, + 113.324, 113.186, 115.755, 119.82, + 4.73698, 3.95806, 10.1254, 9.70525, + 25.4666, 25.0477, 27.8424, 29.2425, + 100.188, 101.614, 102.532, 107.687}), + getValues({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854, + 0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}), + {6, 6} + } + }; + return params; +} +}; // namespace + +template +struct generate_proposals_test + : public ::testing::TestWithParam > { +public: + void test() { + GenerateProposalsParams param; + format::type data_layout; + std::tie(param, data_layout) = this->GetParam(); + const bool need_reorder = data_layout != format::bfyx; + + const auto data_type = type_to_data_type::value; + const auto rois_num_type = type_to_data_type::value; + + auto& engine = get_test_engine(); + + const primitive_id input_im_info_id = "InputImInfo"; + const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}}); + set_values(input_im_info, getValues(im_info)); + + const primitive_id input_anchors_id = "InputAnchors"; + auto input_anchors = engine.allocate_memory( + {data_type, format::bfyx, tensor{batch(height), feature(width), spatial(4, number_of_anchors)}}); + set_values(input_anchors, getValues(anchors)); + + const primitive_id input_deltas_id = "InputDeltas"; + auto input_deltas = engine.allocate_memory( + {data_type, format::bfyx, + tensor{batch(num_batches), feature(number_of_anchors * 4), spatial(width, height)}}); + set_values(input_deltas, getValues(deltas)); + + const primitive_id input_scores_id = "InputScores"; + auto input_scores = engine.allocate_memory( + {data_type, format::bfyx, tensor{batch(num_batches), feature(number_of_anchors), spatial(width, height)}}); + set_values(input_scores, getValues(scores)); + + const primitive_id output_roi_scores_id = "OutputRoiScores"; + const layout rois_scores_layout{data_type, data_layout, tensor{batch(num_batches * param.post_nms_count)}}; + auto output_roi_scores = engine.allocate_memory(rois_scores_layout); + + const primitive_id output_rois_num_id = "OutputRoisNum"; + const layout rois_num_layout{rois_num_type, data_layout, tensor{batch(num_batches)}}; + auto output_rois_num = engine.allocate_memory(rois_num_layout); + + const primitive_id reorder_im_info_id = input_im_info_id + "Reordered"; + const primitive_id reorder_anchors_id = input_anchors_id + "Reordered"; + const primitive_id reorder_deltas_id = input_deltas_id + "Reordered"; + const primitive_id reorder_scores_id = input_scores_id + "Reordered"; + + topology topology; + + topology.add(input_layout{input_im_info_id, input_im_info->get_layout()}); + topology.add(input_layout{input_anchors_id, input_anchors->get_layout()}); + topology.add(input_layout{input_deltas_id, input_deltas->get_layout()}); + topology.add(input_layout{input_scores_id, input_scores->get_layout()}); + topology.add(mutable_data{output_roi_scores_id, output_roi_scores}); + topology.add(mutable_data{output_rois_num_id, output_rois_num}); + + topology.add(reorder(reorder_im_info_id, input_im_info_id, data_layout, data_type)); + topology.add(reorder(reorder_anchors_id, input_anchors_id, data_layout, data_type)); + topology.add(reorder(reorder_deltas_id, input_deltas_id, data_layout, data_type)); + topology.add(reorder(reorder_scores_id, input_scores_id, data_layout, data_type)); + + const primitive_id generate_proposals_id = "generate_proposals"; + const std::vector inputs{ reorder_im_info_id, reorder_anchors_id, reorder_deltas_id, + reorder_scores_id, output_roi_scores_id, output_rois_num_id}; + const auto generate_proposals_primitive = generate_proposals{ + generate_proposals_id, + inputs, + param.min_size, + param.nms_threshold, + param.pre_nms_count, + param.post_nms_count, + param.normalized, + param.nms_eta, + rois_num_type}; + + topology.add(generate_proposals_primitive); + const primitive_id reorder_result_id = generate_proposals_id + "Reordered"; + topology.add(reorder(reorder_result_id, generate_proposals_id, format::bfyx, data_type)); + + network network{engine, topology}; + + network.set_input_data(input_im_info_id, input_im_info); + network.set_input_data(input_anchors_id, input_anchors); + network.set_input_data(input_deltas_id, input_deltas); + network.set_input_data(input_scores_id, input_scores); + + const auto outputs = network.execute(); + + const auto rois = outputs.at(reorder_result_id).get_memory(); + + const cldnn::mem_lock rois_ptr(rois, get_test_stream()); + ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4); + + const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) { + if (!need_reorder) { + return mem; + } + cldnn::topology reorder_topology; + reorder_topology.add(input_layout("data", from_layout)); + reorder_topology.add(reorder("plane_data", "data", format::bfyx, data_type)); + cldnn::network reorder_net{engine, reorder_topology}; + reorder_net.set_input_data("data", mem); + const auto second_output_result = reorder_net.execute(); + const auto plane_data_mem = second_output_result.at("plane_data").get_memory(); + return plane_data_mem; + }; + + const cldnn::mem_lock roi_scores_ptr( + get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream()); + ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count); + + const cldnn::mem_lock rois_num_ptr( + get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream()); + ASSERT_EQ(rois_num_ptr.size(), num_batches); + + const auto& expected_rois = param.expected_rois; + const auto& expected_roi_scores = param.expected_roi_scores; + const auto& expected_rois_num = param.expected_rois_num; + + for (size_t j = 0; j < expected_rois_num.size(); ++j) { + EXPECT_EQ(expected_rois_num[j], rois_num_ptr[j]) << "j=" << j; + } + + for (auto i = 0; i < param.post_nms_count; ++i) { + EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], getError()) << "i=" << i; + + if (static_cast(expected_roi_scores[i]) != 0.0f) { + for (size_t coord = 0; coord < 4; ++coord) { + const auto roi_idx = i * 4 + coord; + EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError()) << "i=" << i << ", coord=" << coord; + } + } + } + } +}; + +using f32_i32 = generate_proposals_test; +TEST_P(f32_i32, f32_i32) { + test(); +} +INSTANTIATE_TEST_SUITE_P( + generate_proposals_gpu_test, + f32_i32, + ::testing::Combine( + ::testing::ValuesIn(getGenerateProposalsParams()), + ::testing::ValuesIn(layouts) + )); + +using f32_i64 = generate_proposals_test; +TEST_P(f32_i64, f32_i64) { + test(); +} +INSTANTIATE_TEST_SUITE_P( + generate_proposals_gpu_test, + f32_i64, + ::testing::Combine( + ::testing::ValuesIn(getGenerateProposalsParams()), + ::testing::ValuesIn(layouts) + )); + +using f16_i32 = generate_proposals_test; +TEST_P(f16_i32, f16_i32) { + test(); +} +INSTANTIATE_TEST_SUITE_P( + generate_proposals_gpu_test, + f16_i32, + ::testing::Combine( + ::testing::ValuesIn(getGenerateProposalsParams()), + ::testing::ValuesIn(layouts) + )); + +using f16_i64 = generate_proposals_test; +TEST_P(f16_i64, f16_i64) { + test(); +} +INSTANTIATE_TEST_SUITE_P( + generate_proposals_gpu_test, + f16_i64, + ::testing::Combine( + ::testing::ValuesIn(getGenerateProposalsParams()), + ::testing::ValuesIn(layouts) + )); diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/generate_proposals.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/generate_proposals.cpp new file mode 100644 index 00000000000..ef4a5773866 --- /dev/null +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/generate_proposals.cpp @@ -0,0 +1,197 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/generate_proposals.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace ov::test; +using namespace ov::test::subgraph; + +namespace { + +const std::vector min_size = { 1.0f, 0.0f }; +const std::vector nms_threshold = { 0.7f }; +const std::vector post_nms_count = { 6 }; +const std::vector pre_nms_count = { 14, 1000 }; + +template +const std::vector>> getInputTensors() { + const std::vector>> input_tensors = { + { + "empty", + { + // 3 + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 3}, {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f}), + // 2 x 6 x 3 x 4 = 144 + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 6, 3, 4}, std::vector(144, 1.0f)), + // 2 x 12 x 2 x 6 = 144 * 2 + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 12, 2, 6}, std::vector(288, 1.0f)), + // {2 x 3 x 2 x 6} = 36 * 2 + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 3, 2, 6}, { + 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f, + 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f}) + } + }, + { + "filled2", + { + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 3}, {200.0, 200.0, 4.0, 200.0, 200.0, 4.0}), + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 6, 3, 4}, {0.0f, 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, 7.0f, + 8.0f, 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, + 20.0f, 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, 31.0f, + 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, 43.0f, + 44.0f, 45.0f, 46.0f, 47.0f, + 48.0f, 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, + 60.0f, 61.0f, 62.0f, 63.0f, + 64.0f, 65.0f, 66.0f, 67.0f, + 68.0f, 69.0f, 70.0f, 71.0f, + 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, + 80.0f, 81.0f, 82.0f, 83.0f, + 84.0f, 85.0f, 86.0f, 87.0f, + 88.0f, 89.0f, 90.0f, 91.0f, + 92.0f, 93.0f, 94.0f, 95.0f, + 96.0f, 97.0f, 98.0f, 99.0f, + 100.0f, 101.0f, 102.0f, 103.0f, + 104.0f, 105.0f, 106.0f, 107.0f, + 108.0f, 109.0f, 110.0f, 111.0f, + 112.0f, 113.0f, 114.0f, 115.0f, + 116.0f, 117.0f, 118.0f, 119.0f, + 120.0f, 121.0f, 122.0f, 123.0f, + 124.0f, 125.0f, 126.0f, 127.0f, + 128.0f, 129.0f, 130.0f, 131.0f, + 132.0f, 133.0f, 134.0f, 135.0f, + 136.0f, 137.0f, 138.0f, 139.0f, + 140.0f, 141.0f, 142.0f, 143.0f}), + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 12, 2, 6}, { + 0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, + 0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064, + 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, + 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, + 0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464, + 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, + 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, + 0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054, + 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, + 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, + 0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187, + 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, + 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, + 0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379, + 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, + 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, + 0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655, + 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, + 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, + 0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778, + 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, + 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, + 0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166, + 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207, + 0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, + 0.5157072, 0.9932138, 0.7565954, 0.43803605, 0.802818, 0.14834064, + 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, + 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, + 0.95615596, 0.5651084, 0.11801951, 0.05352486, 0.9774733, 0.14439464, + 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, + 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, + 0.39688024, 0.3286012, 0.5117038, 0.04743988, 0.9408995, 0.29885054, + 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, + 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, + 0.27721855, 0.6378373, 0.22795396, 0.4961256, 0.3215895, 0.15607187, + 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, + 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, + 0.20689541, 0.37622106, 0.1083321, 0.1342548, 0.05815459, 0.7676379, + 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, + 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, + 0.05307213, 0.39705503, 0.21003333, 0.5625373, 0.66188884, 0.80521655, + 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, + 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, + 0.36377597, 0.7891322, 0.40900692, 0.99139464, 0.50169915, 0.41435778, + 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, + 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, + 0.97486794, 0.2921785, 0.7633071, 0.88477814, 0.03563205, 0.50833166, + 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}), + ov::test::utils::create_tensor(ov::element::from(), ov::Shape{2, 3, 2, 6}, { + 0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, + 0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586, + 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, + 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, + 0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172, + 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978, + 0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, + 0.14516132, 0.08081771, 0.1799732, 0.9229515, 0.42420176, 0.50857586, + 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, + 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, + 0.5256446, 0.03342898, 0.1344396, 0.68642473, 0.37953874, 0.32575172, + 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}), + } + } + }; + return input_tensors; +} + +constexpr size_t num_batches = 2; +constexpr size_t height = 2; +constexpr size_t width = 6; +constexpr size_t number_of_anchors = 3; + +const std::vector> input_shape = { + // im_info / anchors / boxesdeltas / scores + static_shapes_to_test_representation({{num_batches, 3}, + {height, width, number_of_anchors, 4}, + {num_batches, number_of_anchors * 4, height, width}, + {num_batches, number_of_anchors, height, width}}), +}; + + + +INSTANTIATE_TEST_SUITE_P( + smoke_GenerateProposalsLayerTest_f16, + GenerateProposalsLayerTest, + ::testing::Combine( + ::testing::ValuesIn(input_shape), + ::testing::ValuesIn(min_size), + ::testing::ValuesIn(nms_threshold), + ::testing::ValuesIn(post_nms_count), + ::testing::ValuesIn(pre_nms_count), + ::testing::ValuesIn({true}), + ::testing::ValuesIn(getInputTensors()), + ::testing::ValuesIn({ov::element::Type_t::f16}), + ::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + GenerateProposalsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_GenerateProposalsLayerTest_f32, + GenerateProposalsLayerTest, + ::testing::Combine( + ::testing::ValuesIn(input_shape), + ::testing::ValuesIn(min_size), + ::testing::ValuesIn(nms_threshold), + ::testing::ValuesIn(post_nms_count), + ::testing::ValuesIn(pre_nms_count), + ::testing::ValuesIn({false}), + ::testing::ValuesIn(getInputTensors()), + ::testing::ValuesIn({ov::element::Type_t::f32}), + ::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + GenerateProposalsLayerTest::getTestCaseName); + +} // namespace diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp index 42b611c0cd5..3e1804444d1 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp @@ -16,6 +16,7 @@ typedef std::tuple< float, // nms_threshold: specifies NMS threshold int64_t, // post_nms_count: number of top-n proposals after NMS int64_t, // pre_nms_count: number of top-n proposals after NMS + bool, // normalized: specifies whether box is normalized or not std::pair>, // input tensors ElementType, // Network precision ElementType, // roi_num precision @@ -28,6 +29,7 @@ class GenerateProposalsLayerTest : protected: void SetUp() override; void generate_inputs(const std::vector& targetInputStaticShapes) override; + void compare(const std::vector& expected, const std::vector& actual) override; public: static std::string getTestCaseName(const testing::TestParamInfo& obj); diff --git a/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp b/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp index 85b5ee1f2b3..71dbebc41e8 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp @@ -3,6 +3,7 @@ // #include "shared_test_classes/single_layer/generate_proposals.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" #include "ngraph_functions/builders.hpp" #include "common_test_utils/ov_tensor_utils.hpp" @@ -16,8 +17,10 @@ std::ostream& operator <<( const ov::op::v9::GenerateProposals::Attributes& attributes) { ss << "score_threshold=" << attributes.min_size << "_"; ss << "nms_threshold=" << attributes.nms_threshold << "_"; - ss << "max_delta_log_wh=" << attributes.post_nms_count << "_"; - ss << "num_classes=" << attributes.pre_nms_count; + ss << "post_nms_count=" << attributes.post_nms_count << "_"; + ss << "pre_nms_count=" << attributes.pre_nms_count; + ss << "normalized=" << attributes.normalized; + ss << "nms_eta=" << attributes.nms_eta; return ss; } } // namespace @@ -36,6 +39,7 @@ std::string GenerateProposalsLayerTest::getTestCaseName( attributes.nms_threshold, attributes.post_nms_count, attributes.pre_nms_count, + attributes.normalized, inputTensors, netPrecision, roiNumPrecision, @@ -70,6 +74,7 @@ void GenerateProposalsLayerTest::SetUp() { attributes.nms_threshold, attributes.post_nms_count, attributes.pre_nms_count, + attributes.normalized, inputTensors, netPrecision, roiNumPrecision, @@ -77,6 +82,13 @@ void GenerateProposalsLayerTest::SetUp() { inType = outType = netPrecision; targetDevice = targetName; + if (targetDevice == CommonTestUtils::DEVICE_GPU) { + if (netPrecision == element::Type_t::f16) { + abs_threshold = 0.2; + } else { + abs_threshold = 0.00009; + } + } init_input_shapes(inputShapes); @@ -97,7 +109,7 @@ void GenerateProposalsLayerTest::SetUp() { } void GenerateProposalsLayerTest::generate_inputs(const std::vector& targetInputStaticShapes) { - auto inputTensors = std::get<5>(GetParam()); + auto inputTensors = std::get<6>(GetParam()); inputs.clear(); const auto& funcInputs = function->inputs(); @@ -110,6 +122,63 @@ void GenerateProposalsLayerTest::generate_inputs(const std::vector& expected, + const std::vector& actual) { + if (targetDevice != CommonTestUtils::DEVICE_GPU) { + SubgraphBaseTest::compare(expected, actual); + return; + } + + const auto outputsNum = expected.size(); + ASSERT_EQ(outputsNum, 3); + ASSERT_EQ(outputsNum, actual.size()); + ASSERT_EQ(outputsNum, function->get_results().size()); + + // actual outputs 0 (rois) and 1 (roi_scores) may be padded with zeros + for (size_t i = 0; i < 2; ++i) { + const auto expectedNumRois = expected[i].get_shape()[0]; + const auto actualNumRois = actual[i].get_shape()[0]; + ASSERT_LE(expectedNumRois, actualNumRois); + + const auto actualBuffer = static_cast(actual[i].data()); + const auto expectedBuffer = static_cast(expected[i].data()); + const auto outputSize = i == 0 ? 4 : 1; + + if (outType == element::Type_t::f32) { + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), + expectedNumRois * outputSize, + rel_threshold, + abs_threshold); + } else { + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), + expectedNumRois * outputSize, + rel_threshold, + abs_threshold); + } + + if (expectedNumRois < actualNumRois) { + if (outType == element::Type_t::f32) { + const auto fBuffer = static_cast(actual[i].data()); + for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) { + ASSERT_TRUE(fBuffer[j] == 0.0f) + << "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i; + } + } else { + const float16 zero{0}; + const auto fBuffer = static_cast(actual[i].data()); + for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) { + ASSERT_TRUE(fBuffer[j] == zero) + << "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i; + } + } + } + } + + // output 2 - rois_num + ov::test::utils::compare(expected[2], actual[2], abs_threshold, rel_threshold); +} } // namespace subgraph } // namespace test } // namespace ov