[GPU] GenerateProposals-9 (#11994)

* add static_output attribute to enable GPU implementation of GenerateProposals. This change may be reverted when dynamic shapes support will be implemented in GPU plugin. * - add GPU single layer test; - add normalized attribute to SLT. * add GPU primitive for GenerateProposals * add kernel for GenerateProposals * add unit test for GenerateProposals * add blocked layouts support * tidy up * support blocked layouts also for 2nd and 3d outputs * Transformation to deal with dynamic output in GPU - revert changes in GenerateProposals interface and shape inferenece; - add transformation; - custom compare method in single-layer test. * address review comments * fix after rebase * fix after rebase * review comment: added nms_eta to testcase name generation * - added input types combination to functional tests; - fix float16 comparison. * fix after rebase * use vector for input ids * fix after rebase
2022-10-11 14:10:32 +02:00 · 2022-10-11 14:10:32 +02:00 · 66b5f9acf2
commit 66b5f9acf2
parent b2e35592f7
26 changed files with 1853 additions and 5 deletions
--- a/src/common/transformations/include/ngraph_ops/generate_proposals_ie_internal.hpp
+++ b/src/common/transformations/include/ngraph_ops/generate_proposals_ie_internal.hpp
@ -0,0 +1,41 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/generate_proposals.hpp"
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace op {
+namespace internal {
+
+class TRANSFORMATIONS_API GenerateProposalsIEInternal : public op::v9::GenerateProposals {
+    using Base = op::v9::GenerateProposals;
+
+public:
+    OPENVINO_OP("GenerateProposalsIEInternal", "ie_internal_opset");
+    BWDCMP_RTTI_DECLARATION;
+
+    GenerateProposalsIEInternal() = default;
+
+    GenerateProposalsIEInternal(const Output<Node>& im_info,
+                                const Output<Node>& anchors,
+                                const Output<Node>& deltas,
+                                const Output<Node>& scores,
+                                const Attributes& attrs,
+                                const element::Type& roi_num_type = element::i64);
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
+};
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ngraph
--- a/src/common/transformations/include/transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp
+++ b/src/common/transformations/include/transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp
@ -0,0 +1,22 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertGP9ToGPIEInternal;
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertGP9ToGPIEInternal : public ngraph::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertGP9ToGPIEInternal", "0");
+    ConvertGP9ToGPIEInternal();
+};
--- a/src/common/transformations/src/ngraph_ops/generate_proposals_ie_internal.cpp
+++ b/src/common/transformations/src/ngraph_ops/generate_proposals_ie_internal.cpp
@ -0,0 +1,58 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_ops/generate_proposals_ie_internal.hpp"
+
+#include <memory>
+#include <ngraph/opsets/opset9.hpp>
+
+#include "itt.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+BWDCMP_RTTI_DEFINITION(op::internal::GenerateProposalsIEInternal);
+
+op::internal::GenerateProposalsIEInternal::GenerateProposalsIEInternal(const Output<Node>& im_info,
+                                                                       const Output<Node>& anchors,
+                                                                       const Output<Node>& deltas,
+                                                                       const Output<Node>& scores,
+                                                                       const Attributes& attrs,
+                                                                       const element::Type& roi_num_type)
+    : Base(im_info, anchors, deltas, scores, attrs, roi_num_type) {
+    validate_and_infer_types();
+}
+
+std::shared_ptr<Node> op::internal::GenerateProposalsIEInternal::clone_with_new_inputs(
+    const ngraph::OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    return make_shared<op::internal::GenerateProposalsIEInternal>(new_args.at(0),
+                                                                  new_args.at(1),
+                                                                  new_args.at(2),
+                                                                  new_args.at(3),
+                                                                  get_attrs(),
+                                                                  get_roi_num_type());
+}
+
+void op::internal::GenerateProposalsIEInternal::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(internal_GenerateProposalsIEInternal_validate_and_infer_types);
+    Base::validate_and_infer_types();
+
+    const auto im_info_shape = get_input_partial_shape(0);
+    const auto num_batches = im_info_shape[0];
+    NODE_VALIDATION_CHECK(this, num_batches.is_static(), "Number of batches must be static");
+
+    const Dimension post_nms_count{get_attrs().post_nms_count};
+    const auto first_dim_shape = num_batches * post_nms_count;
+
+    const auto rois_shape = ov::PartialShape({first_dim_shape, 4});
+    const auto scores_shape = ov::PartialShape({first_dim_shape});
+    const auto roisnum_shape = ov::PartialShape({num_batches});
+
+    const auto input_type = get_input_element_type(0);
+    set_output_type(0, input_type, rois_shape);
+    set_output_type(1, input_type, scores_shape);
+    set_output_type(2, get_roi_num_type(), roisnum_shape);
+}
--- a/src/common/transformations/src/transformations/op_conversions/convert_gp9_to_gp_ie_internal.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_gp9_to_gp_ie_internal.cpp
@ -0,0 +1,57 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+
+#include "itt.hpp"
+#include "ngraph_ops/generate_proposals_ie_internal.hpp"
+#include "transformations/utils/utils.hpp"
+
+ngraph::pass::ConvertGP9ToGPIEInternal::ConvertGP9ToGPIEInternal() {
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        const auto root = m.get_match_root();
+        const auto old_node = std::dynamic_pointer_cast<ngraph::opset9::GenerateProposals>(root);
+        if (!old_node) {
+            return false;
+        }
+
+        for (const auto& i : old_node->inputs()) {
+            if (i.get_partial_shape().is_dynamic()) {
+                return false;
+            }
+        }
+
+        NodeVector new_ops;
+
+        auto new_node =
+            std::make_shared<ngraph::op::internal::GenerateProposalsIEInternal>(old_node->input_value(0),
+                                                                                old_node->input_value(1),
+                                                                                old_node->input_value(2),
+                                                                                old_node->input_value(3),
+                                                                                old_node->get_attrs(),
+                                                                                old_node->get_roi_num_type());
+
+        new_ops.push_back(new_node);
+        Output<ngraph::Node> output_0 = new_node->output(0);
+        new_ops.emplace_back(output_0.get_node_shared_ptr());
+        Output<ngraph::Node> output_1 = new_node->output(1);
+        new_ops.emplace_back(output_1.get_node_shared_ptr());
+        Output<ngraph::Node> output_2 = new_node->output(2);
+        new_ops.emplace_back(output_2.get_node_shared_ptr());
+
+        new_node->set_friendly_name(old_node->get_friendly_name());
+        copy_runtime_info(old_node, new_ops);
+        replace_node(old_node, {output_0, output_1, output_2});
+        return true;
+    };
+
+    const auto generate_proposals = ngraph::pattern::wrap_type<ngraph::opset9::GenerateProposals>();
+    const auto matcher = std::make_shared<ngraph::pattern::Matcher>(generate_proposals, "ConvertGP9ToGPIEInternal");
+    register_matcher(matcher, callback);
+}
--- a/src/inference/src/cnn_network_ngraph_impl.cpp
+++ b/src/inference/src/cnn_network_ngraph_impl.cpp
@ -38,6 +38,7 @@
 #include <transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp>
 #include <transformations/disable_decompression_convert_constant_folding.hpp>
 #include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
+#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
 #include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
 #include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
 #include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
@ -458,6 +459,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
                manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(false);
                manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false);
                manager.register_pass<::ngraph::pass::ConvertNMS9ToNMSIEInternal>();
+                manager.register_pass<::ngraph::pass::ConvertGP9ToGPIEInternal>();
                manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
                manager.register_pass<::ov::pass::DisableDecompressionConvertConstantFolding>();
                manager.register_pass<::ngraph::pass::ConstantFolding>();
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/generate_proposals.cpp
@ -226,6 +226,7 @@ INSTANTIATE_TEST_SUITE_P(
        ::testing::ValuesIn(nms_threshold),
        ::testing::ValuesIn(post_nms_count),
        ::testing::ValuesIn(pre_nms_count),
+        ::testing::ValuesIn({true, false}),
        ::testing::ValuesIn(inputTensors),
        ::testing::Values(ov::element::Type_t::f32),
        ::testing::Values(ov::element::Type_t::i32),
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@ -245,3 +245,4 @@ REGISTER_FACTORY(v9, Eye);

 // --------------------------- Supported internal ops --------------------------- //
 REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
+REGISTER_FACTORY(internal, GenerateProposalsIEInternal);
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/generate_proposals.hpp
@ -0,0 +1,82 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+#include <vector>
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief generate proposals
+struct generate_proposals
+        : public primitive_base<generate_proposals> {
+    CLDNN_DECLARE_PRIMITIVE(generate_proposals)
+
+    /// @brief Constructs generate_proposals primitive
+    /// @param id This primitive id
+    /// @param input_im_info image size info
+    /// @param input_anchors anchors
+    /// @param input_deltas deltas for anchors
+    /// @param input_scores proposal scores
+    /// @param output_rois_scores ROIs scores
+    /// @param output_rois_num number of proposed ROIs
+    /// @param min_size  minimum box width and height
+    /// @param nms_threshold threshold to be used in NonMaxSuppression stage
+    /// @param pre_nms_count number of top-n proposals before NMS
+    /// @param post_nms_count number of top-n proposals after NMS
+    /// @param normalized indicates whether proposal bboxes are normalized
+    /// @param nms_eta eta parameter for adaptive NMS
+    /// @param roi_num_type type of 3rd output elements
+    generate_proposals(const primitive_id& id,
+                       const std::vector<primitive_id>& inputs,
+                       float min_size,
+                       float nms_threshold,
+                       int64_t pre_nms_count,
+                       int64_t post_nms_count,
+                       bool normalized,
+                       float nms_eta,
+                       const data_types roi_num_type,
+                       const padding& output_padding = {}) :
+            primitive_base{id, inputs, output_padding},
+            output_rois_scores{inputs[4]},
+            output_rois_num{inputs[5]},
+            min_size{min_size},
+            nms_threshold{nms_threshold},
+            pre_nms_count{pre_nms_count},
+            post_nms_count{post_nms_count},
+            normalized{normalized},
+            nms_eta{nms_eta},
+            roi_num_type{roi_num_type} {}
+
+    primitive_id output_rois_scores;
+    primitive_id output_rois_num;
+    float min_size;
+    float nms_threshold;
+    int64_t pre_nms_count;
+    int64_t post_nms_count;
+    bool normalized;
+    float nms_eta;
+    data_types roi_num_type;
+
+protected:
+    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
+        std::vector<std::reference_wrapper<const primitive_id>> ret;
+        if (!output_rois_scores.empty())
+            ret.push_back(output_rois_scores);
+        if (!output_rois_num.empty())
+            ret.push_back(output_rois_num);
+        return ret;
+    }
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/generate_proposals.cpp
+++ b/src/plugins/intel_gpu/src/graph/generate_proposals.cpp
@ -0,0 +1,43 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "generate_proposals_inst.h"
+#include "primitive_type_base.h"
+#include "intel_gpu/runtime/error_handler.hpp"
+#include "json_object.h"
+#include <string>
+
+namespace cldnn {
+primitive_type_id generate_proposals::type_id() {
+    static primitive_type_base<generate_proposals> instance;
+    return &instance;
+}
+
+layout generate_proposals_inst::calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param) {
+    const layout data_layout = impl_param.get_input_layout();
+    const auto num_batches = data_layout.batch();
+    const auto desc = impl_param.typed_desc<generate_proposals>();
+    return layout(data_layout.data_type, data_layout.format, {static_cast<int>(num_batches * desc->post_nms_count), 4, 1, 1});
+}
+
+std::string generate_proposals_inst::to_string(const generate_proposals_node& node) {
+    auto desc = node.get_primitive();
+
+    std::stringstream primitive_description;
+
+    json_composite info;
+    info.add("min_size", desc->min_size);
+    info.add("nms_threshold", desc->nms_threshold);
+    info.add("pre_nms_count", desc->pre_nms_count);
+    info.add("post_nms_count", desc->post_nms_count);
+    info.add("normalized", desc->normalized);
+    info.add("nms_eta", desc->nms_eta);
+
+    auto node_info = node.desc_to_json();
+    node_info->add("generate_proposals_info", info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/generate_proposals.cpp
@ -0,0 +1,89 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "generate_proposals_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "generate_proposals/generate_proposals_kernel_selector.h"
+#include "generate_proposals/generate_proposals_kernel_ref.h"
+
+
+
+namespace cldnn {
+namespace ocl {
+struct generate_proposals_impl
+        : public typed_primitive_impl_ocl<generate_proposals> {
+    using parent = typed_primitive_impl_ocl<generate_proposals>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<generate_proposals_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<generate_proposals>& instance, int32_t) const override {
+        auto args = parent::get_arguments(instance, 0);
+        args.inputs.push_back(instance.output_rois_scores_memory());
+        args.inputs.push_back(instance.output_rois_nums_memory());
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const generate_proposals_node& arg, const kernel_impl_params& impl_param) {
+        auto params = get_default_params<kernel_selector::generate_proposals_params>(impl_param);
+        auto optional_params = get_default_optional_params<
+                kernel_selector::generate_proposals_optional_params>(arg.get_program());
+
+        const auto& primitive = arg.get_primitive();
+
+        params.min_size = primitive->min_size;
+        params.nms_threshold  = primitive->nms_threshold;
+        params.pre_nms_count = primitive->pre_nms_count;
+        params.post_nms_count = primitive->post_nms_count;
+        params.normalized = primitive->normalized;
+        params.nms_eta = primitive->nms_eta;
+        params.roi_num_type = primitive->roi_num_type == cldnn::data_types::i32 ?
+                kernel_selector::Datatype::INT32 : kernel_selector::Datatype::INT64;
+
+        params.inputs.push_back(convert_data_tensor(arg.anchors().get_output_layout()));
+        params.inputs.push_back(convert_data_tensor(arg.deltas().get_output_layout()));
+        params.inputs.push_back(convert_data_tensor(arg.scores().get_output_layout()));
+
+        params.inputs.push_back(convert_data_tensor(arg.output_rois_scores_node().get_output_layout()));
+        params.inputs.push_back(convert_data_tensor(arg.output_rois_nums_node().get_output_layout()));
+
+        const auto& kernel_selector = kernel_selector::generate_proposals_kernel_selector::Instance();
+        const auto best_kernels = kernel_selector.GetBestKernels(params, optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "best_kernels.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        return new generate_proposals_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+    attach_generate_proposals_impl::attach_generate_proposals_impl() {
+        implementation_map<generate_proposals>::add(impl_types::ocl,
+                                                    generate_proposals_impl::create, {
+                                                            std::make_tuple(data_types::f16, format::bfyx),
+                                                            std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+                                                            std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+                                                            std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+                                                            std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
+                                                            std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
+
+                                                            std::make_tuple(data_types::f32, format::bfyx),
+                                                            std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+                                                            std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+                                                            std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+                                                            std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
+                                                            std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32)});
+    }
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
@ -41,6 +41,7 @@ void register_implementations() {
    REGISTER_OCL(gather_elements);
    REGISTER_OCL(gather_nd);
    REGISTER_OCL(gemm);
+    REGISTER_OCL(generate_proposals);
    REGISTER_OCL(lrn);
    REGISTER_OCL(lstm_gemm);
    REGISTER_OCL(lstm_elt);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
@ -119,6 +119,7 @@ REGISTER_OCL(gather);
 REGISTER_OCL(gather_nd);
 REGISTER_OCL(gather_elements);
 REGISTER_OCL(gemm);
+REGISTER_OCL(generate_proposals);
 REGISTER_OCL(lrn);
 REGISTER_OCL(lstm_gemm);
 REGISTER_OCL(lstm_elt);
--- a/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/generate_proposals_inst.h
@ -0,0 +1,50 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#include "intel_gpu/primitives/generate_proposals.hpp"
+#include "primitive_inst.h"
+
+namespace cldnn {
+template <>
+struct typed_program_node<generate_proposals>
+        : public typed_program_node_base<generate_proposals> {
+    using parent = typed_program_node_base<generate_proposals>;
+
+public:
+    using parent::parent;
+
+    program_node& input() const { return get_dependency(0); }
+
+    program_node& anchors() const { return get_dependency(1); }
+    program_node& deltas() const { return get_dependency(2); }
+    program_node& scores() const { return get_dependency(3); }
+
+    program_node& output_rois_scores_node() const { return get_dependency(4); }
+    program_node& output_rois_nums_node() const { return get_dependency(5); }
+};
+
+using generate_proposals_node = typed_program_node<generate_proposals>;
+
+template <>
+class typed_primitive_inst<generate_proposals>
+        : public typed_primitive_inst_base<generate_proposals> {
+    using parent = typed_primitive_inst_base<generate_proposals>;
+
+public:
+    static layout calc_output_layout(const generate_proposals_node& node, kernel_impl_params const& impl_param);
+    static std::string to_string(const generate_proposals_node& node);
+
+    typed_primitive_inst(network& network, const generate_proposals_node& node)
+            : parent(network, node)
+    {}
+
+    memory::ptr output_rois_scores_memory() const { return dep_memory_ptr(4); }
+    memory::ptr output_rois_nums_memory() const { return dep_memory_ptr(5); }
+};
+
+using generate_proposals_inst = typed_primitive_inst<generate_proposals>;
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -38,6 +38,7 @@
 #include "data_inst.h"
 #include "deconvolution_inst.h"
 #include "detection_output_inst.h"
+#include "generate_proposals_inst.h"
 #include "input_layout_inst.h"
 #include "shuffle_channels_inst.h"
 #include "arg_max_min_inst.h"
@ -1432,7 +1433,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
            prim.type() != cldnn::roll::type_id() &&
            prim.type() != cldnn::prior_box::type_id() &&
            prim.type() != cldnn::resample::type_id() &&
-            prim.type() != cldnn::eye::type_id()) {
+            prim.type() != cldnn::eye::type_id() &&
+            prim.type() != cldnn::generate_proposals::type_id()) {
            can_use_fsv16 = false;
        }

@ -1468,7 +1470,8 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
            prim.type() != cldnn::roll::type_id() &&
            prim.type() != cldnn::resample::type_id() &&
            prim.type() != cldnn::prior_box::type_id() &&
-            prim.type() != cldnn::eye::type_id()) {
+            prim.type() != cldnn::eye::type_id() &&
+            prim.type() != cldnn::generate_proposals::type_id()) {
            can_use_bs_fs_yx_bsv16_fsv16 = false;
        }
    }
--- a/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
@ -91,6 +91,7 @@ enum class KernelType {
    REVERSE,
    PRIOR_BOX,
    EYE,
+    GENERATE_PROPOSALS
 };

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.cpp
@ -0,0 +1,196 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "generate_proposals_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+ParamsKey GenerateProposalsRef::GetSupportedKey() const {
+    ParamsKey k;
+
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::INT64);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::INT64);
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableAllInputLayout();
+    k.EnableAllOutputLayout();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    k.EnableTensorPitches();
+
+    return k;
+}
+
+KernelsPriority GenerateProposalsRef::GetKernelsPriority(const Params&, const optional_params&) const {
+    return DONT_USE_IF_HAVE_SOMETHING_ELSE;
+}
+
+bool GenerateProposalsRef::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::GENERATE_PROPOSALS
+        || o.GetType() != KernelType::GENERATE_PROPOSALS) {
+        return false;
+    }
+
+    return true;
+}
+
+namespace {
+constexpr size_t kImInfoInputIdx = 0;
+constexpr size_t kAnchorsInputIdx = 1;
+constexpr size_t kDeltasInputIdx = 2;
+constexpr size_t kScoresInputIdx = 3;
+constexpr size_t kRoisScoresOutputIdx = 4;
+constexpr size_t kRoisNumsOutputIdx = 5;
+
+GenerateProposalsRef::DispatchData SetDefault(const generate_proposals_params& params, size_t idx) {
+    GenerateProposalsRef::DispatchData dispatch_data;
+
+    const auto& inputs = params.inputs;
+    const auto num_batches = inputs[kScoresInputIdx].Batch().v;
+    const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
+    const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
+    const auto bottom_W = inputs[kDeltasInputIdx].X().v;
+
+    if (idx == 0) {
+        dispatch_data.gws = {bottom_H, bottom_W, num_batches * anchors_num};
+    } else if (idx == 1 || idx == 2) {
+        dispatch_data.gws = {num_batches, 1, 1};
+    } else if (idx == 3) {
+        dispatch_data.gws = {1, 1, 1};
+    }
+
+    dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, params.engineInfo);
+
+    return dispatch_data;
+}
+}  // namespace
+
+void GenerateProposalsRef::SetKernelArguments(
+        const generate_proposals_params& params,
+        size_t idx, cldnn::arguments_desc& arguments) const {
+    switch (idx) {
+        case 0: { // refine anchors
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kImInfoInputIdx});
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kAnchorsInputIdx});
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kDeltasInputIdx});
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kScoresInputIdx});
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
+            break;
+        }
+        case 1: { // sort proposals by score
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
+            break;
+        }
+        case 2: { // NMS
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
+            break;
+        }
+        case 3: { // convert proposals to rois and roi scores
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); // proposals
+            arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); // nms_out_indices
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisNumsOutputIdx}); // rois num
+            arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0});          // rois
+            arguments.push_back({ArgumentDescriptor::Types::INPUT, kRoisScoresOutputIdx}); // roi scores
+            break;
+        }
+        default:
+            throw std::invalid_argument("generate_proposals has 4 kernels. valid index is 0 ~ 3.");
+    }
+}
+
+KernelsData GenerateProposalsRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    constexpr size_t kKernelsNum = 4;
+    KernelData kd = KernelData::Default<generate_proposals_params>(params, kKernelsNum);
+    const generate_proposals_params& new_params = static_cast<const generate_proposals_params&>(params);
+    const auto& inputs = new_params.inputs;
+
+    const auto anchors_num = inputs[kScoresInputIdx].Feature().v;
+    const auto bottom_H = inputs[kDeltasInputIdx].Y().v;
+    const auto bottom_W = inputs[kDeltasInputIdx].X().v;
+    const auto scale_w_index = inputs[kImInfoInputIdx].Feature().v == 3 ? 2 : 3;
+    const auto num_proposals = anchors_num * bottom_H * bottom_W;
+    const auto pre_nms_topn = std::min(num_proposals, new_params.pre_nms_count);
+    const auto max_delta_log_wh = static_cast<float>(std::log(1000.0 / 16.0));
+
+    kd.internalBufferDataType = Datatype::F32;
+
+    const auto num_batches = inputs[kScoresInputIdx].Batch().v;
+    constexpr size_t kProposalBoxSize = 6; // 6 values: {x0, y0, x1, y1, score, keep}
+    const auto proposals_buffer_size = num_batches * num_proposals * sizeof(float) * kProposalBoxSize;
+    kd.internalBufferSizes.push_back(proposals_buffer_size);
+
+    const auto out_indices_size = num_batches * new_params.post_nms_count * sizeof(float);
+    kd.internalBufferSizes.push_back(out_indices_size);
+
+    for (size_t i = 0; i < kKernelsNum; ++i) {
+        const auto dispatchData = SetDefault(new_params, i);
+        const auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options, i);
+        auto cldnn_jit = MakeBaseParamsJitConstants(new_params);
+
+        cldnn_jit.AddConstant(MakeJitConstant("GENERATE_PROPOSALS_STAGE_" + std::to_string(i), "true"));
+        cldnn_jit.AddConstant(MakeJitConstant("PROPOSAL_SIZE", kProposalBoxSize));
+        cldnn_jit.Merge(MakeTypeJitConstants(new_params.roi_num_type, "ROI_NUM"));
+        if (new_params.normalized) {
+            cldnn_jit.AddConstant(MakeJitConstant("NORMALIZED", 1));
+        }
+
+        switch (i) {
+            case 0: {
+                cldnn_jit.AddConstants({MakeJitConstant("MIN_SIZE", new_params.min_size),
+                                        MakeJitConstant("ANCHORS_NUM", anchors_num),
+                                        MakeJitConstant("NUM_PROPOSALS", num_proposals),
+                                        MakeJitConstant("BOTTOM_H", bottom_H),
+                                        MakeJitConstant("BOTTOM_W", bottom_W),
+                                        MakeJitConstant("BOTTOM_AREA", bottom_H * bottom_W),
+                                        MakeJitConstant("SCALE_W_INDEX", scale_w_index),
+                                        MakeJitConstant("MAX_DELTA_LOG_WH", max_delta_log_wh)
+                                       });
+                break;
+            }
+            case 1: {
+                cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals)});
+                break;
+            }
+            case 2: {
+                cldnn_jit.AddConstants({MakeJitConstant("NUM_PROPOSALS", num_proposals),
+                                        MakeJitConstant("PRE_NMS_TOPN", pre_nms_topn),
+                                        MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
+                                        MakeJitConstant("NMS_THRESHOLD", new_params.nms_threshold),
+                                       });
+                break;
+            }
+            case 3: {
+                cldnn_jit.AddConstants({MakeJitConstant("POST_NMS_COUNT", new_params.post_nms_count),
+                                        MakeJitConstant("NUM_PROPOSALS", num_proposals)
+                                       });
+                break;
+            }
+            default:
+                throw std::invalid_argument("GENERATE_PROPOSALS has 4 kernels. valid index is 0 ~ 3.");
+        }
+
+        const auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[i];
+
+        KernelBase::CheckDispatchData(kernelName, dispatchData, params.engineInfo.maxWorkGroupSize);
+        kernel.params.workGroups.global = dispatchData.gws;
+        kernel.params.workGroups.local  = dispatchData.lws;
+        kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo);
+        SetKernelArguments(new_params, i, kernel.params.arguments);
+    }
+
+    return {kd};
+}
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_ref.h
@ -0,0 +1,45 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+
+namespace kernel_selector {
+struct generate_proposals_params : public base_params {
+    generate_proposals_params()
+            : base_params(KernelType::GENERATE_PROPOSALS) {}
+
+    float min_size{0.0f};
+    float nms_threshold{0.0f};
+    size_t pre_nms_count{0};
+    size_t post_nms_count{0};
+    bool normalized{true};
+    float nms_eta{1.0f};
+    Datatype roi_num_type = Datatype::INT64;
+};
+
+struct generate_proposals_optional_params : public optional_params {
+    generate_proposals_optional_params()
+            : optional_params(KernelType::GENERATE_PROPOSALS) {}
+};
+
+class GenerateProposalsRef : public KernelBaseOpenCL {
+public:
+    GenerateProposalsRef()
+            : KernelBaseOpenCL("generate_proposals_ref") {}
+
+    ~GenerateProposalsRef() = default;
+
+    using DispatchData = CommonDispatchData;
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+protected:
+    bool Validate(const Params& p, const optional_params& o) const override;
+    void SetKernelArguments(const generate_proposals_params& params,
+                            size_t idx, cldnn::arguments_desc& kernel) const;
+};
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.cpp
@ -0,0 +1,21 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "generate_proposals_kernel_selector.h"
+#include "generate_proposals_kernel_ref.h"
+
+namespace kernel_selector {
+generate_proposals_kernel_selector::generate_proposals_kernel_selector() {
+    Attach<GenerateProposalsRef>();
+}
+
+generate_proposals_kernel_selector& generate_proposals_kernel_selector::Instance() {
+    static generate_proposals_kernel_selector instance_;
+    return instance_;
+}
+
+KernelsData generate_proposals_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+    return GetNaiveBestKernel(params, options, KernelType::GENERATE_PROPOSALS);
+}
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/generate_proposals/generate_proposals_kernel_selector.h
@ -0,0 +1,18 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class generate_proposals_kernel_selector : public kernel_selector_base {
+public:
+    static generate_proposals_kernel_selector& Instance();
+
+    generate_proposals_kernel_selector();
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/generate_proposals_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/generate_proposals_ref.cl
@ -0,0 +1,306 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#if INPUT0_TYPE_SIZE == 2 //f16
+    #define HALF_ONE 0.5h
+#else
+    #define HALF_ONE 0.5f
+#endif
+
+#define ZERO INPUT0_VAL_ZERO
+
+#ifdef NORMALIZED
+    #define COORDINATES_OFFSET INPUT0_VAL_ZERO
+#else
+    #define COORDINATES_OFFSET INPUT0_VAL_ONE
+#endif
+
+#ifdef GENERATE_PROPOSALS_STAGE_0
+
+// 0. Refine anchors
+KERNEL(generate_proposals_ref_stage_0)
+(const __global INPUT0_TYPE* im_info,
+ const __global INPUT1_TYPE* anchors,
+ const __global INPUT2_TYPE* deltas,
+ const __global INPUT3_TYPE* scores,
+ __global OUTPUT_TYPE* proposals) {
+    const uint h = get_global_id(0);
+    const uint w = get_global_id(1);
+    const uint ba = (uint)get_global_id(2);
+    const uint batch = ba / INPUT0_FEATURE_NUM;
+    const uint anchor = ba % INPUT0_FEATURE_NUM;
+
+    const INPUT0_TYPE img_H = im_info[INPUT0_GET_INDEX(batch, 0, 0, 0)];
+    const INPUT0_TYPE img_W = im_info[INPUT0_GET_INDEX(batch, 1, 0, 0)];
+    const INPUT0_TYPE scale_H = im_info[INPUT0_GET_INDEX(batch, 2, 0, 0)];
+    const INPUT0_TYPE scale_W = im_info[INPUT0_GET_INDEX(batch, SCALE_W_INDEX, 0, 0)];
+    const float min_box_H = MIN_SIZE * scale_H;
+    const float min_box_W = MIN_SIZE * scale_W;
+
+    INPUT0_TYPE x0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 0)];
+    INPUT0_TYPE y0 = anchors[INPUT1_GET_INDEX(h, w, anchor, 1)];
+    INPUT0_TYPE x1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 2)];
+    INPUT0_TYPE y1 = anchors[INPUT1_GET_INDEX(h, w, anchor, 3)];
+
+    const INPUT0_TYPE dx = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 0 , h, w)];
+    const INPUT0_TYPE dy = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 1 , h , w)];
+    const INPUT0_TYPE d_log_w = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 2 , h, w)];
+    const INPUT0_TYPE d_log_h = deltas[INPUT2_GET_INDEX(batch, anchor * 4 + 3 , h, w)];
+
+    const INPUT0_TYPE score = scores[INPUT3_GET_INDEX(batch, anchor, h, w)];
+
+    // width & height of box
+    const INPUT0_TYPE ww = x1 - x0 + COORDINATES_OFFSET;
+    const INPUT0_TYPE hh = y1 - y0 + COORDINATES_OFFSET;
+    // center location of box
+    const INPUT0_TYPE ctr_x = x0 + HALF_ONE * ww;
+    const INPUT0_TYPE ctr_y = y0 + HALF_ONE * hh;
+    // new center location according to deltas (dx, dy)
+    const INPUT0_TYPE pred_ctr_x = dx * ww + ctr_x;
+    const INPUT0_TYPE pred_ctr_y = dy * hh + ctr_y;
+    // new width & height according to deltas d(log w), d(log h)
+    const INPUT0_TYPE pred_w = exp(min(d_log_w, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * ww;
+    const INPUT0_TYPE pred_h = exp(min(d_log_h, TO_INPUT0_TYPE(MAX_DELTA_LOG_WH))) * hh;
+    // update upper-left corner location
+    x0 = pred_ctr_x - HALF_ONE * pred_w;
+    y0 = pred_ctr_y - HALF_ONE * pred_h;
+    // update lower-right corner location
+    x1 = pred_ctr_x + HALF_ONE * pred_w - COORDINATES_OFFSET;
+    y1 = pred_ctr_y + HALF_ONE * pred_h - COORDINATES_OFFSET;
+    // adjust new corner locations to be within the image region
+    x0 = max(ZERO, min(x0, img_W - COORDINATES_OFFSET));
+    y0 = max(ZERO, min(y0, img_H - COORDINATES_OFFSET));
+    x1 = max(ZERO, min(x1, img_W - COORDINATES_OFFSET));
+    y1 = max(ZERO, min(y1, img_H - COORDINATES_OFFSET));
+    // recompute new width & height
+    const INPUT0_TYPE box_w = x1 - x0 + COORDINATES_OFFSET;
+    const INPUT0_TYPE box_h = y1 - y0 + COORDINATES_OFFSET;
+
+    const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
+    const uint offset = h * BOTTOM_W + w;
+    const uint proposal_idx = batch_offset + (offset * ANCHORS_NUM + anchor) * PROPOSAL_SIZE;
+
+    proposals[proposal_idx + 0] = x0;
+    proposals[proposal_idx + 1] = y0;
+    proposals[proposal_idx + 2] = x1;
+    proposals[proposal_idx + 3] = y1;
+    proposals[proposal_idx + 4] = score;
+    proposals[proposal_idx + 5] = ((min_box_W <= box_w) && (min_box_H <= box_h)) ? 1 : 0;
+}
+
+#endif /* GENERATE_PROPOSALS_STAGE_0 */
+
+#ifdef GENERATE_PROPOSALS_STAGE_1
+
+typedef struct __attribute__((__packed__)) {
+    INPUT0_TYPE x0;
+    INPUT0_TYPE y0;
+    INPUT0_TYPE x1;
+    INPUT0_TYPE y1;
+    INPUT0_TYPE score;
+    INPUT0_TYPE keep;
+} Box;
+
+inline void FUNC(swap_box)(__global Box* a, __global Box* b) {
+    const Box temp = *a;
+    *a = *b;
+    *b = temp;
+}
+
+inline int FUNC(partition)(__global Box* arr, int l, int h) {
+    INPUT0_TYPE pivotScore = arr[h].score;
+    int i = (l - 1);
+    for (int j = l; j <= h - 1; j++) {
+        if (arr[j].score > pivotScore) {
+            i++;
+            FUNC_CALL(swap_box)(&arr[i], &arr[j]);
+        }
+    }
+    FUNC_CALL(swap_box)(&arr[i + 1], &arr[h]);
+    return (i + 1);
+}
+
+inline void FUNC(bubbleSortIterative)(__global Box* arr, int l, int h) {
+    for (int i = 0; i < h - l; i++) {
+        bool swapped = false;
+        for (int j = l; j < h - i; j++) {
+            if ((arr[j].score > arr[j + 1].score)) {
+                FUNC_CALL(swap_box)(&arr[j], &arr[j + 1]);
+                swapped = true;
+            }
+        }
+
+        if (!swapped)
+            break;
+    }
+}
+
+inline void FUNC(quickSortIterative)(__global Box* arr, int l, int h) {
+    // Create an auxiliary stack
+    const int kStackSize = 100;
+    int stack[kStackSize];
+
+    // initialize top of stack
+    int top = -1;
+
+    // push initial values of l and h to stack
+    stack[++top] = l;
+    stack[++top] = h;
+
+    // Keep popping from stack while is not empty
+    while (top >= 0) {
+        // Pop h and l
+        h = stack[top--];
+        l = stack[top--];
+
+        // Set pivot element at its correct position
+        // in sorted array
+        int p = FUNC_CALL(partition)(arr, l, h);
+
+        // If there are elements on left side of pivot,
+        // then push left side to stack
+        if (p - 1 > l) {
+            if (top >= (kStackSize - 1)) {
+                FUNC_CALL(bubbleSortIterative)(arr, l, p - 1);
+            } else {
+                stack[++top] = l;
+                stack[++top] = p - 1;
+            }
+        }
+
+        // If there are elements on right side of pivot,
+        // then push right side to stack
+        if (p + 1 < h) {
+            if (top >= (kStackSize - 1)) {
+                FUNC_CALL(bubbleSortIterative)(arr, p + 1, h);
+            } else {
+                stack[++top] = p + 1;
+                stack[++top] = h;
+            }
+        }
+    }
+}
+
+// 1. Sort boxes by scores
+KERNEL(generate_proposals_ref_stage_1)(__global OUTPUT_TYPE* proposals) {
+    const uint batch = get_global_id(0);
+
+    __global Box* boxes = (__global Box*)(proposals + batch * NUM_PROPOSALS * PROPOSAL_SIZE);
+
+    FUNC_CALL(quickSortIterative)(boxes, 0, NUM_PROPOSALS-1);
+}
+#endif /* GENERATE_PROPOSALS_STAGE_1 */
+
+#ifdef GENERATE_PROPOSALS_STAGE_2
+
+// 2. NMS
+KERNEL(generate_proposals_ref_stage_2)
+(const __global INPUT0_TYPE* boxes,
+       __global size_t* out_indices,
+       __global ROI_NUM_TYPE* num_outputs) {
+    const uint batch = get_global_id(0);
+    const uint batch_offset = batch * NUM_PROPOSALS * PROPOSAL_SIZE;
+
+    uint count = 0;
+    __local uint index_out[INPUT0_BATCH_NUM * POST_NMS_COUNT];
+    __local bool is_dead[INPUT0_BATCH_NUM * PRE_NMS_TOPN];
+
+    for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
+        is_dead[batch * PRE_NMS_TOPN + box] = boxes[batch_offset + PROPOSAL_SIZE * box + 5] == 0.0f;
+    }
+
+    for (uint box = 0; box < PRE_NMS_TOPN; ++box) {
+        if (is_dead[batch * PRE_NMS_TOPN + box])
+            continue;
+
+        index_out[batch * POST_NMS_COUNT + count++] = box;
+        if (count == POST_NMS_COUNT)
+            break;
+
+        const uint box_offset = batch_offset + box * PROPOSAL_SIZE;
+        const INPUT0_TYPE x0i = boxes[box_offset + 0];
+        const INPUT0_TYPE y0i = boxes[box_offset + 1];
+        const INPUT0_TYPE x1i = boxes[box_offset + 2];
+        const INPUT0_TYPE y1i = boxes[box_offset + 3];
+
+        const INPUT0_TYPE a_width = x1i - x0i;
+        const INPUT0_TYPE a_height = y1i - y0i;
+        const INPUT0_TYPE a_area = (a_width + COORDINATES_OFFSET) * (a_height + COORDINATES_OFFSET);
+
+        for (uint tail = box + 1; tail < PRE_NMS_TOPN; ++tail) {
+            const uint tail_offset = batch_offset + tail * PROPOSAL_SIZE;
+            const INPUT0_TYPE x0j = boxes[tail_offset + 0];
+            const INPUT0_TYPE y0j = boxes[tail_offset + 1];
+            const INPUT0_TYPE x1j = boxes[tail_offset + 2];
+            const INPUT0_TYPE y1j = boxes[tail_offset + 3];
+
+            const INPUT0_TYPE x0 = max(x0i, x0j);
+            const INPUT0_TYPE y0 = max(y0i, y0j);
+            const INPUT0_TYPE x1 = min(x1i, x1j);
+            const INPUT0_TYPE y1 = min(y1i, y1j);
+
+            const INPUT0_TYPE width = x1 - x0 + COORDINATES_OFFSET;
+            const INPUT0_TYPE height = y1 - y0 + COORDINATES_OFFSET;
+            const INPUT0_TYPE area = max(ZERO, width) * max(ZERO, height);
+
+            const INPUT0_TYPE b_width = x1j - x0j;
+            const INPUT0_TYPE b_height = y1j - y0j;
+            const INPUT0_TYPE b_area = (b_width + COORDINATES_OFFSET) * (b_height + COORDINATES_OFFSET);
+
+            const INPUT0_TYPE intersection_area = area / (a_area + b_area - area);
+
+            if ( (NMS_THRESHOLD < intersection_area) && (x0i <= x1j) && (y0i <= y1j) && (x0j <= x1i) && (y0j <= y1i) ) {
+                is_dead[batch * PRE_NMS_TOPN + tail] = true;
+            }
+        }
+    }
+
+    num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)] = count;
+
+    for (uint i = 0; i < count; ++i) {
+        out_indices[batch * POST_NMS_COUNT + i] = index_out[batch * POST_NMS_COUNT + i];
+    }
+}
+#endif /* GENERATE_PROPOSALS_STAGE_2 */
+
+#ifdef GENERATE_PROPOSALS_STAGE_3
+
+// 3. Convert proposals to rois and roi_scores
+KERNEL(generate_proposals_ref_stage_3)
+(const __global INPUT0_TYPE* boxes,
+ const __global size_t* out_indices,
+ const __global ROI_NUM_TYPE* num_outputs,
+ __global OUTPUT_TYPE* rois,
+ __global INPUT4_TYPE* roi_scores) {
+
+    uint roi_index = 0;
+    for (uint batch = 0; batch < INPUT0_BATCH_NUM; ++batch) {
+        for (uint i = 0; i < num_outputs[INPUT5_GET_INDEX(batch, 0, 0, 0)]; ++i) {
+            const uint box_index = (batch * NUM_PROPOSALS + out_indices[batch * POST_NMS_COUNT + i]) * PROPOSAL_SIZE;
+
+            rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 0];
+            rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = boxes[box_index + 1];
+            rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = boxes[box_index + 2];
+            rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = boxes[box_index + 3];
+            roi_scores[INPUT4_GET_INDEX(roi_index, 0, 0, 0)] = boxes[box_index + 4];
+            ++roi_index;
+        }
+    }
+
+    // fill the rest of outputs with zeros
+    while(roi_index < INPUT0_BATCH_NUM * POST_NMS_COUNT) {
+        rois[OUTPUT_GET_INDEX(roi_index, 0, 0, 0)] = 0.0f;
+        rois[OUTPUT_GET_INDEX(roi_index, 1, 0, 0)] = 0.0f;
+        rois[OUTPUT_GET_INDEX(roi_index, 2, 0, 0)] = 0.0f;
+        rois[OUTPUT_GET_INDEX(roi_index, 3, 0, 0)] = 0.0f;
+
+        roi_scores[roi_index] = 0.0f;
+        ++roi_index;
+    }
+}
+#endif /* GENERATE_PROPOSALS_STAGE_3 */
+
+#undef HALF_ONE
+#undef ZERO
+#undef COORDINATES_OFFSET
--- a/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/generate_proposals.cpp
@ -0,0 +1,79 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/primitives/generate_proposals.hpp"
+
+#include <ngraph_ops/generate_proposals_ie_internal.hpp>
+
+#include "intel_gpu/plugin/common_utils.hpp"
+#include "intel_gpu/plugin/program.hpp"
+#include "intel_gpu/primitives/mutable_data.hpp"
+
+namespace ov {
+namespace intel_gpu {
+
+static void CreateGenerateProposalsIEInternalOp(
+    Program& p,
+    const std::shared_ptr<ngraph::op::internal::GenerateProposalsIEInternal>& op) {
+    validate_inputs_count(op, {4});
+    if (op->get_output_size() != 3) {
+        IE_THROW() << "GenerateProposals requires 3 outputs";
+    }
+
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    const auto& attrs = op->get_attrs();
+    const auto layer_type_name = layer_type_name_ID(op);
+    const auto layer_name = layer_type_name + ".out0";
+
+    // output 2 - scores
+    const auto mutable_precision_1 = op->get_output_element_type(1);
+    const auto output_shape_1 = op->get_output_shape(1);
+    const cldnn::layout mutable_layout_1{cldnn::element_type_to_data_type(mutable_precision_1),
+                                         cldnn::format::get_default_format(output_shape_1.size()),
+                                         tensor_from_dims(output_shape_1)};
+    cldnn::memory::ptr shared_memory_1{p.GetEngine().allocate_memory(mutable_layout_1)};
+
+    const auto mutable_id_w_1 = layer_type_name + "_md_write.1";
+    const cldnn::mutable_data mutable_prim_w_1{mutable_id_w_1, shared_memory_1};
+    p.add_primitive(*op, mutable_prim_w_1);
+    inputs.push_back(mutable_id_w_1);
+
+    // output 3 - roisNum
+    const auto output_shape_2 = op->get_output_shape(2);
+    const auto mutable_precision_2 = op->get_output_element_type(2);
+    const cldnn::layout mutable_layout_2{cldnn::element_type_to_data_type(mutable_precision_2),
+                                         cldnn::format::get_default_format(output_shape_2.size()),
+                                         tensor_from_dims(output_shape_2)};
+    cldnn::memory::ptr shared_memory_2{p.GetEngine().allocate_memory(mutable_layout_2)};
+
+    const auto mutable_id_w_2 = layer_type_name + "_md_write.2";
+    const cldnn::mutable_data mutable_prim_w_2{mutable_id_w_2, shared_memory_2};
+    p.add_primitive(*op, mutable_prim_w_2);
+    inputs.push_back(mutable_id_w_2);
+
+    const cldnn::generate_proposals prim{layer_name,
+                                         inputs,
+                                         attrs.min_size,
+                                         attrs.nms_threshold,
+                                         attrs.pre_nms_count,
+                                         attrs.post_nms_count,
+                                         attrs.normalized,
+                                         attrs.nms_eta,
+                                         cldnn::element_type_to_data_type(op->get_roi_num_type())};
+
+    p.add_primitive(*op, prim);
+
+    const auto mutable_id_r_1 = layer_type_name + ".out1";
+    const cldnn::mutable_data mutable_prim_r_1{mutable_id_r_1, {layer_name}, shared_memory_1};
+    p.add_primitive(*op, mutable_prim_r_1);
+
+    const auto mutable_id_r_2 = layer_type_name + ".out2";
+    const cldnn::mutable_data mutable_prim_r_2{mutable_id_r_2, {layer_name}, shared_memory_2};
+    p.add_primitive(*op, mutable_prim_r_2);
+}
+
+REGISTER_FACTORY_IMPL(internal, GenerateProposalsIEInternal);
+
+}  // namespace intel_gpu
+}  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@ -74,6 +74,7 @@
 #include <transformations/op_conversions/convert_gather_downgrade.hpp>
 #include <transformations/op_conversions/convert_gather_0d.hpp>
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
+#include <transformations/op_conversions/convert_gp9_to_gp_ie_internal.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
 #include "transformations/op_conversions/softmax_decomposition.hpp"
 #include <transformations/op_conversions/gelu7_downgrade.hpp>
@ -163,6 +164,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
        manager.register_pass<ngraph::pass::ConvertNMS4ToNMS9>();
        manager.register_pass<ngraph::pass::ConvertNMS5ToNMS9>();
        manager.register_pass<ngraph::pass::ConvertNMS9ToNMSIEInternal>();
+        manager.register_pass<ngraph::pass::ConvertGP9ToGPIEInternal>();
        manager.register_pass<ngraph::pass::ConvertGather0D>();
        manager.register_pass<ngraph::pass::ConvertPriorBox8To0, false>();

--- a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp
@ -0,0 +1,461 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/generate_proposals.hpp>
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/mutable_data.hpp>
+
+using namespace cldnn;
+using namespace ::tests;
+
+namespace {
+template <typename T>
+struct GenerateProposalsParams {
+    float min_size;
+    float nms_threshold;
+    int64_t pre_nms_count;
+    int64_t post_nms_count;
+    bool normalized;
+    float nms_eta;
+    std::vector<T> expected_rois;
+    std::vector<T> expected_roi_scores;
+    std::vector<size_t> expected_rois_num;
+};
+
+template <typename T>
+using GenerateProposalsParamsWithLayout = std::tuple<GenerateProposalsParams<T>, format::type>;
+
+constexpr size_t num_batches = 2;
+constexpr size_t image_height = 200;
+constexpr size_t image_width = 200;
+constexpr float image_scale = 4.0f;
+constexpr size_t height = 2;
+constexpr size_t width = 6;
+constexpr size_t number_of_channels = 3;
+constexpr size_t number_of_anchors = 3;
+
+const std::vector<float> im_info{image_height, image_width, image_scale, image_height, image_width, image_scale};
+
+const std::vector<float> anchors{
+        0.0f,  1.0f,  2.0f,  3.0f,
+        4.0f,  5.0f,  6.0f,  7.0f,
+        8.0f,  9.0f, 10.0f, 11.0f,
+        12.0f, 13.0f, 14.0f, 15.0f,
+        16.0f, 17.0f, 18.0f, 19.0f,
+        20.0f, 21.0f, 22.0f, 23.0f,
+        24.0f, 25.0f, 26.0f, 27.0f,
+        28.0f, 29.0f, 30.0f, 31.0f,
+        32.0f, 33.0f, 34.0f, 35.0f,
+        36.0f, 37.0f, 38.0f, 39.0f,
+        40.0f, 41.0f, 42.0f, 43.0f,
+        44.0f, 45.0f, 46.0f, 47.0f,
+        48.0f, 49.0f, 50.0f, 51.0f,
+        52.0f, 53.0f, 54.0f, 55.0f,
+        56.0f, 57.0f, 58.0f, 59.0f,
+        60.0f, 61.0f, 62.0f, 63.0f,
+        64.0f, 65.0f, 66.0f, 67.0f,
+        68.0f, 69.0f, 70.0f, 71.0f,
+        72.0f, 73.0f, 74.0f, 75.0f,
+        76.0f, 77.0f, 78.0f, 79.0f,
+        80.0f, 81.0f, 82.0f, 83.0f,
+        84.0f, 85.0f, 86.0f, 87.0f,
+        88.0f, 89.0f, 90.0f, 91.0f,
+        92.0f, 93.0f, 94.0f, 95.0f,
+        96.0f,  97.0f,  98.0f,  99.0f,
+        100.0f, 101.0f, 102.0f, 103.0f,
+        104.0f, 105.0f, 106.0f, 107.0f,
+        108.0f, 109.0f, 110.0f, 111.0f,
+        112.0f, 113.0f, 114.0f, 115.0f,
+        116.0f, 117.0f, 118.0f, 119.0f,
+        120.0f, 121.0f, 122.0f, 123.0f,
+        124.0f, 125.0f, 126.0f, 127.0f,
+        128.0f, 129.0f, 130.0f, 131.0f,
+        132.0f, 133.0f, 134.0f, 135.0f,
+        136.0f, 137.0f, 138.0f, 139.0f,
+        140.0f, 141.0f, 142.0f, 143.0f};
+
+const std::vector<float> deltas{
+        0.5337073,  0.86607957, 0.55151343, 0.21626699, 0.4462629,  0.03985678,
+        0.5157072,  0.9932138,  0.7565954,  0.43803605, 0.802818,   0.14834064,
+        0.53932905, 0.14314,    0.3817048,  0.95075196, 0.05516243, 0.2567484,
+        0.25508744, 0.77438325, 0.43561,    0.2094628,  0.8299043,  0.44982538,
+        0.95615596, 0.5651084,  0.11801951, 0.05352486, 0.9774733,  0.14439464,
+        0.62644225, 0.14370479, 0.54161614, 0.557915,   0.53102225, 0.0840179,
+        0.7249888,  0.9843559,  0.5490522,  0.53788143, 0.822474,   0.3278008,
+        0.39688024, 0.3286012,  0.5117038,  0.04743988, 0.9408995,  0.29885054,
+        0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
+        0.5401001,  0.7541649,  0.11998601, 0.5131829,  0.40606487, 0.327888,
+        0.27721855, 0.6378373,  0.22795396, 0.4961256,  0.3215895,  0.15607187,
+        0.14782153, 0.8908137,  0.8835288,  0.834191,   0.29907143, 0.7983525,
+        0.755875,   0.30837986, 0.0839176,  0.26624718, 0.04371626, 0.09472824,
+        0.20689541, 0.37622106, 0.1083321,  0.1342548,  0.05815459, 0.7676379,
+        0.8105144,  0.92348766, 0.26761323, 0.7183306,  0.8947588,  0.19020908,
+        0.42731014, 0.7473663,  0.85775334, 0.9340091,  0.3278848,  0.755993,
+        0.05307213, 0.39705503, 0.21003333, 0.5625373,  0.66188884, 0.80521655,
+        0.6125863,  0.44678232, 0.97802377, 0.0204936,  0.02686367, 0.7390654,
+        0.74631,    0.58399844, 0.5988792,  0.37413648, 0.5946692,  0.6955776,
+        0.36377597, 0.7891322,  0.40900692, 0.99139464, 0.50169915, 0.41435778,
+        0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
+        0.6523203,  0.50259084, 0.7379765,  0.01171071, 0.94423133, 0.00841132,
+        0.97486794, 0.2921785,  0.7633071,  0.88477814, 0.03563205, 0.50833166,
+        0.01354555, 0.535081,   0.41366324, 0.0694767,  0.9944055,  0.9981207,
+        0.5337073,  0.86607957, 0.55151343, 0.21626699, 0.4462629,  0.03985678,
+        0.5157072,  0.9932138,  0.7565954,  0.43803605, 0.802818,   0.14834064,
+        0.53932905, 0.14314,    0.3817048,  0.95075196, 0.05516243, 0.2567484,
+        0.25508744, 0.77438325, 0.43561,    0.2094628,  0.8299043,  0.44982538,
+        0.95615596, 0.5651084,  0.11801951, 0.05352486, 0.9774733,  0.14439464,
+        0.62644225, 0.14370479, 0.54161614, 0.557915,   0.53102225, 0.0840179,
+        0.7249888,  0.9843559,  0.5490522,  0.53788143, 0.822474,   0.3278008,
+        0.39688024, 0.3286012,  0.5117038,  0.04743988, 0.9408995,  0.29885054,
+        0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
+        0.5401001,  0.7541649,  0.11998601, 0.5131829,  0.40606487, 0.327888,
+        0.27721855, 0.6378373,  0.22795396, 0.4961256,  0.3215895,  0.15607187,
+        0.14782153, 0.8908137,  0.8835288,  0.834191,   0.29907143, 0.7983525,
+        0.755875,   0.30837986, 0.0839176,  0.26624718, 0.04371626, 0.09472824,
+        0.20689541, 0.37622106, 0.1083321,  0.1342548,  0.05815459, 0.7676379,
+        0.8105144,  0.92348766, 0.26761323, 0.7183306,  0.8947588,  0.19020908,
+        0.42731014, 0.7473663,  0.85775334, 0.9340091,  0.3278848,  0.755993,
+        0.05307213, 0.39705503, 0.21003333, 0.5625373,  0.66188884, 0.80521655,
+        0.6125863,  0.44678232, 0.97802377, 0.0204936,  0.02686367, 0.7390654,
+        0.74631,    0.58399844, 0.5988792,  0.37413648, 0.5946692,  0.6955776,
+        0.36377597, 0.7891322,  0.40900692, 0.99139464, 0.50169915, 0.41435778,
+        0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
+        0.6523203,  0.50259084, 0.7379765,  0.01171071, 0.94423133, 0.00841132,
+        0.97486794, 0.2921785,  0.7633071,  0.88477814, 0.03563205, 0.50833166,
+        0.01354555, 0.535081,   0.41366324, 0.0694767,  0.9944055,  0.9981207};
+
+const std::vector<float> scores{
+        0.56637216, 0.90457034, 0.69827306, 0.4353543,  0.47985056, 0.42658508,
+        0.14516132, 0.08081771, 0.1799732,  0.9229515,  0.42420176, 0.50857586,
+        0.82664067, 0.4972319,  0.3752427,  0.56731623, 0.18241242, 0.33252355,
+        0.30608943, 0.6572437,  0.69185436, 0.88646156, 0.36985755, 0.5590753,
+        0.5256446,  0.03342898, 0.1344396,  0.68642473, 0.37953874, 0.32575172,
+        0.21108444, 0.5661886,  0.45378175, 0.62126315, 0.26799858, 0.37272978,
+        0.56637216, 0.90457034, 0.69827306, 0.4353543,  0.47985056, 0.42658508,
+        0.14516132, 0.08081771, 0.1799732,  0.9229515,  0.42420176, 0.50857586,
+        0.82664067, 0.4972319,  0.3752427,  0.56731623, 0.18241242, 0.33252355,
+        0.30608943, 0.6572437,  0.69185436, 0.88646156, 0.36985755, 0.5590753,
+        0.5256446,  0.03342898, 0.1344396,  0.68642473, 0.37953874, 0.32575172,
+        0.21108444, 0.5661886,  0.45378175, 0.62126315, 0.26799858, 0.37272978};
+
+const std::vector<format::type> layouts{
+    format::bfyx,
+    format::b_fs_yx_fsv16,
+    format::b_fs_yx_fsv32,
+    format::bs_fs_yx_bsv16_fsv16,
+    format::bs_fs_yx_bsv32_fsv16,
+    format::bs_fs_yx_bsv32_fsv32};
+
+template <typename T>
+std::vector<T> getValues(const std::vector<float>& values) {
+    std::vector<T> result(values.begin(), values.end());
+    return result;
+}
+template <typename T> float getError();
+
+template<>
+float getError<float>() {
+    return 0.001;
+}
+
+template<>
+float getError<half_t>() {
+    return 0.2;
+}
+
+template <typename T>
+std::vector<GenerateProposalsParams<T>> getGenerateProposalsParams() {
+    std::vector<GenerateProposalsParams<T>> params = {
+            {
+                    1.0f, 0.7f, 14, 6, true, 1.0,
+                    getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
+                                  0, 1.01395, 4.66909, 5.14337,
+                                  135.501, 137.467, 139.81, 141.726,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  0, 1.01395, 4.66909, 5.14337,
+                                  135.501, 137.467, 139.81, 141.726}),
+                    getValues<T>({0.826641, 0.566372, 0.559075,
+                                  0.826641, 0.566372, 0.559075}),
+                    {3, 3}
+            },
+            {
+                    1.0f, 0.7f, 1000, 6, true, 1.0,
+                    getValues<T>({4.49132, 4.30537, 8.75027, 8.8035,
+                                  0, 1.01395, 4.66909, 5.14337,
+                                  135.501, 137.467, 139.81, 141.726,
+                                  47.2348, 47.8342, 52.5503, 52.3864,
+                                  126.483, 128.3, 131.625, 133.707,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  0, 1.01395, 4.66909, 5.14337,
+                                  135.501, 137.467, 139.81, 141.726,
+                                  47.2348, 47.8342, 52.5503, 52.3864,
+                                  126.483, 128.3, 131.625, 133.707}),
+                    getValues<T>({0.826641, 0.566372, 0.559075, 0.479851, 0.267999,
+                                  0.826641, 0.566372, 0.559075, 0.479851, 0.267999}),
+                    {5, 5}
+            },
+            {
+                    0.0f, 0.7f, 14, 6, true, 1.0,
+                    getValues<T>({108.129, 109.37, 111.623, 111.468,
+                                  12.9725, 11.6102, 16.4918, 16.9624,
+                                  112.883, 113.124, 115.17, 118.213,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  24.9778, 25.0318, 27.2283, 28.495,
+                                  100.126, 101.409, 102.354, 106.125,
+                                  108.129, 109.37, 111.623, 111.468,
+                                  12.9725, 11.6102, 16.4918, 16.9624,
+                                  112.883, 113.124, 115.17, 118.213,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  24.9778, 25.0318, 27.2283, 28.495,
+                                  100.126, 101.409, 102.354, 106.125}),
+                    getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
+                                  0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
+                    {6, 6}
+            },
+
+            {
+                    0.1f, 0.7f, 1000, 6, true, 1.0,
+                    getValues<T>({108.129, 109.37, 111.623, 111.468,
+                                  12.9725, 11.6102, 16.4918, 16.9624,
+                                  112.883, 113.124, 115.17, 118.213,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  24.9778, 25.0318, 27.2283, 28.495,
+                                  100.126, 101.409, 102.354, 106.125,
+                                  108.129, 109.37, 111.623, 111.468,
+                                  12.9725, 11.6102, 16.4918, 16.9624,
+                                  112.883, 113.124, 115.17, 118.213,
+                                  4.49132, 4.30537, 8.75027, 8.8035,
+                                  24.9778, 25.0318, 27.2283, 28.495,
+                                  100.126, 101.409, 102.354, 106.125}),
+                    getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
+                                  0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
+                    {6, 6}
+            },
+            {
+                    1.0f, 0.7f, 14, 6, false, 1.0,
+                    getValues<T>({13.4588, 10.9153, 17.7377, 17.9436,
+                                  4.73698, 3.95806, 10.1254, 9.70525,
+                                  89.5773, 90.0053, 92.9476, 95.3396,
+                                  0, 1.02093, 6.00364, 6.21505,
+                                  92.3608, 94.306, 96.3198, 98.4288,
+                                  135.252, 137.7, 140.716, 143.09,
+                                  13.4588, 10.9153, 17.7377, 17.9436,
+                                  4.73698, 3.95806, 10.1254, 9.70525,
+                                  89.5773, 90.0053, 92.9476, 95.3396,
+                                  0, 1.02093, 6.00364, 6.21505,
+                                  92.3608, 94.306, 96.3198, 98.4288,
+                                  135.252, 137.7, 140.716, 143.09}),
+                    getValues<T>({0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075,
+                                  0.90457, 0.826641, 0.657244, 0.566372, 0.566189, 0.559075}),
+                    {6, 6}
+            },
+            {
+                    0.0f, 0.7f, 1000, 6, false, 1.0,
+                    getValues<T>({108.194, 109.556, 112.435, 111.701,
+                                  13.4588, 10.9153, 17.7377, 17.9436,
+                                  113.324, 113.186, 115.755, 119.82,
+                                  4.73698, 3.95806, 10.1254, 9.70525,
+                                  25.4666, 25.0477, 27.8424, 29.2425,
+                                  100.188, 101.614, 102.532, 107.687,
+                                  108.194, 109.556, 112.435, 111.701,
+                                  13.4588, 10.9153, 17.7377, 17.9436,
+                                  113.324, 113.186, 115.755, 119.82,
+                                  4.73698, 3.95806, 10.1254, 9.70525,
+                                  25.4666, 25.0477, 27.8424, 29.2425,
+                                  100.188, 101.614, 102.532, 107.687}),
+                    getValues<T>({0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854,
+                                  0.922952, 0.90457, 0.886462, 0.826641, 0.698273, 0.691854}),
+                    {6, 6}
+            }
+    };
+    return params;
+}
+};  // namespace
+
+template <typename T, typename ROIS_NUM_T>
+struct generate_proposals_test
+        : public ::testing::TestWithParam<GenerateProposalsParamsWithLayout<T> > {
+public:
+    void test() {
+        GenerateProposalsParams<T> param;
+        format::type data_layout;
+        std::tie(param, data_layout) = this->GetParam();
+        const bool need_reorder = data_layout != format::bfyx;
+
+        const auto data_type = type_to_data_type<T>::value;
+        const auto rois_num_type = type_to_data_type<ROIS_NUM_T>::value;
+
+        auto& engine = get_test_engine();
+
+        const primitive_id input_im_info_id = "InputImInfo";
+        const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}});
+        set_values(input_im_info, getValues<T>(im_info));
+
+        const primitive_id input_anchors_id = "InputAnchors";
+        auto input_anchors = engine.allocate_memory(
+                {data_type, format::bfyx, tensor{batch(height), feature(width), spatial(4, number_of_anchors)}});
+        set_values(input_anchors, getValues<T>(anchors));
+
+        const primitive_id input_deltas_id = "InputDeltas";
+        auto input_deltas = engine.allocate_memory(
+                {data_type, format::bfyx,
+                 tensor{batch(num_batches), feature(number_of_anchors * 4), spatial(width, height)}});
+        set_values(input_deltas, getValues<T>(deltas));
+
+        const primitive_id input_scores_id = "InputScores";
+        auto input_scores = engine.allocate_memory(
+                {data_type, format::bfyx, tensor{batch(num_batches), feature(number_of_anchors), spatial(width, height)}});
+        set_values(input_scores, getValues<T>(scores));
+
+        const primitive_id output_roi_scores_id = "OutputRoiScores";
+        const layout rois_scores_layout{data_type, data_layout, tensor{batch(num_batches * param.post_nms_count)}};
+        auto output_roi_scores = engine.allocate_memory(rois_scores_layout);
+
+        const primitive_id output_rois_num_id = "OutputRoisNum";
+        const layout rois_num_layout{rois_num_type, data_layout, tensor{batch(num_batches)}};
+        auto output_rois_num = engine.allocate_memory(rois_num_layout);
+
+        const primitive_id reorder_im_info_id = input_im_info_id + "Reordered";
+        const primitive_id reorder_anchors_id = input_anchors_id + "Reordered";
+        const primitive_id reorder_deltas_id = input_deltas_id + "Reordered";
+        const primitive_id reorder_scores_id = input_scores_id + "Reordered";
+
+        topology topology;
+
+        topology.add(input_layout{input_im_info_id, input_im_info->get_layout()});
+        topology.add(input_layout{input_anchors_id, input_anchors->get_layout()});
+        topology.add(input_layout{input_deltas_id, input_deltas->get_layout()});
+        topology.add(input_layout{input_scores_id, input_scores->get_layout()});
+        topology.add(mutable_data{output_roi_scores_id, output_roi_scores});
+        topology.add(mutable_data{output_rois_num_id, output_rois_num});
+
+        topology.add(reorder(reorder_im_info_id, input_im_info_id, data_layout, data_type));
+        topology.add(reorder(reorder_anchors_id, input_anchors_id, data_layout, data_type));
+        topology.add(reorder(reorder_deltas_id, input_deltas_id, data_layout, data_type));
+        topology.add(reorder(reorder_scores_id, input_scores_id, data_layout, data_type));
+
+        const primitive_id generate_proposals_id = "generate_proposals";
+        const std::vector<primitive_id> inputs{ reorder_im_info_id, reorder_anchors_id, reorder_deltas_id,
+                                                reorder_scores_id, output_roi_scores_id, output_rois_num_id};
+        const auto generate_proposals_primitive = generate_proposals{
+            generate_proposals_id,
+            inputs,
+            param.min_size,
+            param.nms_threshold,
+            param.pre_nms_count,
+            param.post_nms_count,
+            param.normalized,
+            param.nms_eta,
+            rois_num_type};
+
+        topology.add(generate_proposals_primitive);
+        const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
+        topology.add(reorder(reorder_result_id, generate_proposals_id, format::bfyx, data_type));
+
+        network network{engine, topology};
+
+        network.set_input_data(input_im_info_id, input_im_info);
+        network.set_input_data(input_anchors_id, input_anchors);
+        network.set_input_data(input_deltas_id, input_deltas);
+        network.set_input_data(input_scores_id, input_scores);
+
+        const auto outputs = network.execute();
+
+        const auto rois = outputs.at(reorder_result_id).get_memory();
+
+        const cldnn::mem_lock<T> rois_ptr(rois, get_test_stream());
+        ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4);
+
+        const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) {
+            if (!need_reorder) {
+                return mem;
+            }
+            cldnn::topology reorder_topology;
+            reorder_topology.add(input_layout("data", from_layout));
+            reorder_topology.add(reorder("plane_data", "data", format::bfyx, data_type));
+            cldnn::network reorder_net{engine, reorder_topology};
+            reorder_net.set_input_data("data", mem);
+            const auto second_output_result = reorder_net.execute();
+            const auto plane_data_mem = second_output_result.at("plane_data").get_memory();
+            return plane_data_mem;
+        };
+
+        const cldnn::mem_lock<T> roi_scores_ptr(
+                get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream());
+        ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count);
+
+        const cldnn::mem_lock<ROIS_NUM_T> rois_num_ptr(
+                get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream());
+        ASSERT_EQ(rois_num_ptr.size(), num_batches);
+
+        const auto& expected_rois = param.expected_rois;
+        const auto& expected_roi_scores = param.expected_roi_scores;
+        const auto& expected_rois_num = param.expected_rois_num;
+
+        for (size_t j = 0; j < expected_rois_num.size(); ++j) {
+            EXPECT_EQ(expected_rois_num[j], rois_num_ptr[j]) << "j=" << j;
+        }
+
+        for (auto i = 0; i < param.post_nms_count; ++i) {
+            EXPECT_NEAR(expected_roi_scores[i], roi_scores_ptr[i], getError<T>()) << "i=" << i;
+
+            if (static_cast<float>(expected_roi_scores[i]) != 0.0f) {
+                for (size_t coord = 0; coord < 4; ++coord) {
+                    const auto roi_idx = i * 4 + coord;
+                    EXPECT_NEAR(expected_rois[roi_idx], rois_ptr[roi_idx], getError<T>()) << "i=" << i << ", coord=" << coord;
+                }
+            }
+        }
+    }
+};
+
+using f32_i32 = generate_proposals_test<float, int32_t>;
+TEST_P(f32_i32, f32_i32) {
+    test();
+}
+INSTANTIATE_TEST_SUITE_P(
+        generate_proposals_gpu_test,
+        f32_i32,
+        ::testing::Combine(
+            ::testing::ValuesIn(getGenerateProposalsParams<float>()),
+            ::testing::ValuesIn(layouts)
+            ));
+
+using f32_i64 = generate_proposals_test<float, int64_t>;
+TEST_P(f32_i64, f32_i64) {
+    test();
+}
+INSTANTIATE_TEST_SUITE_P(
+        generate_proposals_gpu_test,
+        f32_i64,
+        ::testing::Combine(
+                ::testing::ValuesIn(getGenerateProposalsParams<float>()),
+                ::testing::ValuesIn(layouts)
+        ));
+
+using f16_i32 = generate_proposals_test<half_t, int32_t>;
+TEST_P(f16_i32, f16_i32) {
+    test();
+}
+INSTANTIATE_TEST_SUITE_P(
+        generate_proposals_gpu_test,
+        f16_i32,
+        ::testing::Combine(
+                ::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
+                ::testing::ValuesIn(layouts)
+        ));
+
+using f16_i64 = generate_proposals_test<half_t, int64_t>;
+TEST_P(f16_i64, f16_i64) {
+    test();
+}
+INSTANTIATE_TEST_SUITE_P(
+        generate_proposals_gpu_test,
+        f16_i64,
+        ::testing::Combine(
+                ::testing::ValuesIn(getGenerateProposalsParams<half_t>()),
+                ::testing::ValuesIn(layouts)
+        ));
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/generate_proposals.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/generate_proposals.cpp
@ -0,0 +1,197 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/generate_proposals.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+
+using namespace ov::test;
+using namespace ov::test::subgraph;
+
+namespace {
+
+const std::vector<float> min_size = { 1.0f, 0.0f };
+const std::vector<float> nms_threshold = { 0.7f };
+const std::vector<int64_t> post_nms_count = { 6 };
+const std::vector<int64_t> pre_nms_count = { 14, 1000 };
+
+template <typename T>
+const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> getInputTensors() {
+    const std::vector<std::pair<std::string, std::vector<ov::Tensor>>> input_tensors = {
+        {
+            "empty",
+            {
+                // 3
+                ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f}),
+                // 2 x 6 x 3 x 4 = 144
+                ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, std::vector<T>(144, 1.0f)),
+                // 2 x 12 x 2 x 6 = 144 * 2
+                ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, std::vector<T>(288, 1.0f)),
+                // {2 x 3 x 2 x 6} = 36 * 2
+                ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
+                        5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
+                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f,
+                        5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f,
+                        1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f})
+            }
+    },
+    {
+        "filled2",
+        {
+            ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3}, {200.0, 200.0, 4.0, 200.0, 200.0, 4.0}),
+            ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 6, 3, 4}, {0.0f,  1.0f,  2.0f,  3.0f,
+                                                                                            4.0f,  5.0f,  6.0f,  7.0f,
+                                                                                            8.0f,  9.0f, 10.0f, 11.0f,
+                                                                                            12.0f, 13.0f, 14.0f, 15.0f,
+                                                                                            16.0f, 17.0f, 18.0f, 19.0f,
+                                                                                            20.0f, 21.0f, 22.0f, 23.0f,
+                                                                                            24.0f, 25.0f, 26.0f, 27.0f,
+                                                                                            28.0f, 29.0f, 30.0f, 31.0f,
+                                                                                            32.0f, 33.0f, 34.0f, 35.0f,
+                                                                                            36.0f, 37.0f, 38.0f, 39.0f,
+                                                                                            40.0f, 41.0f, 42.0f, 43.0f,
+                                                                                            44.0f, 45.0f, 46.0f, 47.0f,
+                                                                                            48.0f, 49.0f, 50.0f, 51.0f,
+                                                                                            52.0f, 53.0f, 54.0f, 55.0f,
+                                                                                            56.0f, 57.0f, 58.0f, 59.0f,
+                                                                                            60.0f, 61.0f, 62.0f, 63.0f,
+                                                                                            64.0f, 65.0f, 66.0f, 67.0f,
+                                                                                            68.0f, 69.0f, 70.0f, 71.0f,
+                                                                                            72.0f, 73.0f, 74.0f, 75.0f,
+                                                                                            76.0f, 77.0f, 78.0f, 79.0f,
+                                                                                            80.0f, 81.0f, 82.0f, 83.0f,
+                                                                                            84.0f, 85.0f, 86.0f, 87.0f,
+                                                                                            88.0f, 89.0f, 90.0f, 91.0f,
+                                                                                            92.0f, 93.0f, 94.0f, 95.0f,
+                                                                                            96.0f,  97.0f,  98.0f,  99.0f,
+                                                                                            100.0f, 101.0f, 102.0f, 103.0f,
+                                                                                            104.0f, 105.0f, 106.0f, 107.0f,
+                                                                                            108.0f, 109.0f, 110.0f, 111.0f,
+                                                                                            112.0f, 113.0f, 114.0f, 115.0f,
+                                                                                            116.0f, 117.0f, 118.0f, 119.0f,
+                                                                                            120.0f, 121.0f, 122.0f, 123.0f,
+                                                                                            124.0f, 125.0f, 126.0f, 127.0f,
+                                                                                            128.0f, 129.0f, 130.0f, 131.0f,
+                                                                                            132.0f, 133.0f, 134.0f, 135.0f,
+                                                                                            136.0f, 137.0f, 138.0f, 139.0f,
+                                                                                            140.0f, 141.0f, 142.0f, 143.0f}),
+            ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 12, 2, 6}, {
+                    0.5337073,  0.86607957, 0.55151343, 0.21626699, 0.4462629,  0.03985678,
+                    0.5157072,  0.9932138,  0.7565954,  0.43803605, 0.802818,   0.14834064,
+                    0.53932905, 0.14314,    0.3817048,  0.95075196, 0.05516243, 0.2567484,
+                    0.25508744, 0.77438325, 0.43561,    0.2094628,  0.8299043,  0.44982538,
+                    0.95615596, 0.5651084,  0.11801951, 0.05352486, 0.9774733,  0.14439464,
+                    0.62644225, 0.14370479, 0.54161614, 0.557915,   0.53102225, 0.0840179,
+                    0.7249888,  0.9843559,  0.5490522,  0.53788143, 0.822474,   0.3278008,
+                    0.39688024, 0.3286012,  0.5117038,  0.04743988, 0.9408995,  0.29885054,
+                    0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
+                    0.5401001,  0.7541649,  0.11998601, 0.5131829,  0.40606487, 0.327888,
+                    0.27721855, 0.6378373,  0.22795396, 0.4961256,  0.3215895,  0.15607187,
+                    0.14782153, 0.8908137,  0.8835288,  0.834191,   0.29907143, 0.7983525,
+                    0.755875,   0.30837986, 0.0839176,  0.26624718, 0.04371626, 0.09472824,
+                    0.20689541, 0.37622106, 0.1083321,  0.1342548,  0.05815459, 0.7676379,
+                    0.8105144,  0.92348766, 0.26761323, 0.7183306,  0.8947588,  0.19020908,
+                    0.42731014, 0.7473663,  0.85775334, 0.9340091,  0.3278848,  0.755993,
+                    0.05307213, 0.39705503, 0.21003333, 0.5625373,  0.66188884, 0.80521655,
+                    0.6125863,  0.44678232, 0.97802377, 0.0204936,  0.02686367, 0.7390654,
+                    0.74631,    0.58399844, 0.5988792,  0.37413648, 0.5946692,  0.6955776,
+                    0.36377597, 0.7891322,  0.40900692, 0.99139464, 0.50169915, 0.41435778,
+                    0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
+                    0.6523203,  0.50259084, 0.7379765,  0.01171071, 0.94423133, 0.00841132,
+                    0.97486794, 0.2921785,  0.7633071,  0.88477814, 0.03563205, 0.50833166,
+                    0.01354555, 0.535081,   0.41366324, 0.0694767,  0.9944055,  0.9981207,
+                    0.5337073,  0.86607957, 0.55151343, 0.21626699, 0.4462629,  0.03985678,
+                    0.5157072,  0.9932138,  0.7565954,  0.43803605, 0.802818,   0.14834064,
+                    0.53932905, 0.14314,    0.3817048,  0.95075196, 0.05516243, 0.2567484,
+                    0.25508744, 0.77438325, 0.43561,    0.2094628,  0.8299043,  0.44982538,
+                    0.95615596, 0.5651084,  0.11801951, 0.05352486, 0.9774733,  0.14439464,
+                    0.62644225, 0.14370479, 0.54161614, 0.557915,   0.53102225, 0.0840179,
+                    0.7249888,  0.9843559,  0.5490522,  0.53788143, 0.822474,   0.3278008,
+                    0.39688024, 0.3286012,  0.5117038,  0.04743988, 0.9408995,  0.29885054,
+                    0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331,
+                    0.5401001,  0.7541649,  0.11998601, 0.5131829,  0.40606487, 0.327888,
+                    0.27721855, 0.6378373,  0.22795396, 0.4961256,  0.3215895,  0.15607187,
+                    0.14782153, 0.8908137,  0.8835288,  0.834191,   0.29907143, 0.7983525,
+                    0.755875,   0.30837986, 0.0839176,  0.26624718, 0.04371626, 0.09472824,
+                    0.20689541, 0.37622106, 0.1083321,  0.1342548,  0.05815459, 0.7676379,
+                    0.8105144,  0.92348766, 0.26761323, 0.7183306,  0.8947588,  0.19020908,
+                    0.42731014, 0.7473663,  0.85775334, 0.9340091,  0.3278848,  0.755993,
+                    0.05307213, 0.39705503, 0.21003333, 0.5625373,  0.66188884, 0.80521655,
+                    0.6125863,  0.44678232, 0.97802377, 0.0204936,  0.02686367, 0.7390654,
+                    0.74631,    0.58399844, 0.5988792,  0.37413648, 0.5946692,  0.6955776,
+                    0.36377597, 0.7891322,  0.40900692, 0.99139464, 0.50169915, 0.41435778,
+                    0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711,
+                    0.6523203,  0.50259084, 0.7379765,  0.01171071, 0.94423133, 0.00841132,
+                    0.97486794, 0.2921785,  0.7633071,  0.88477814, 0.03563205, 0.50833166,
+                    0.01354555, 0.535081,   0.41366324, 0.0694767,  0.9944055,  0.9981207}),
+            ov::test::utils::create_tensor<T>(ov::element::from<T>(), ov::Shape{2, 3, 2, 6}, {
+                    0.56637216, 0.90457034, 0.69827306, 0.4353543,  0.47985056, 0.42658508,
+                    0.14516132, 0.08081771, 0.1799732,  0.9229515,  0.42420176, 0.50857586,
+                    0.82664067, 0.4972319,  0.3752427,  0.56731623, 0.18241242, 0.33252355,
+                    0.30608943, 0.6572437,  0.69185436, 0.88646156, 0.36985755, 0.5590753,
+                    0.5256446,  0.03342898, 0.1344396,  0.68642473, 0.37953874, 0.32575172,
+                    0.21108444, 0.5661886,  0.45378175, 0.62126315, 0.26799858, 0.37272978,
+                    0.56637216, 0.90457034, 0.69827306, 0.4353543,  0.47985056, 0.42658508,
+                    0.14516132, 0.08081771, 0.1799732,  0.9229515,  0.42420176, 0.50857586,
+                    0.82664067, 0.4972319,  0.3752427,  0.56731623, 0.18241242, 0.33252355,
+                    0.30608943, 0.6572437,  0.69185436, 0.88646156, 0.36985755, 0.5590753,
+                    0.5256446,  0.03342898, 0.1344396,  0.68642473, 0.37953874, 0.32575172,
+                    0.21108444, 0.5661886,  0.45378175, 0.62126315, 0.26799858, 0.37272978}),
+            }
+        }
+    };
+    return input_tensors;
+}
+
+constexpr size_t num_batches = 2;
+constexpr size_t height = 2;
+constexpr size_t width = 6;
+constexpr size_t number_of_anchors = 3;
+
+const std::vector<std::vector<InputShape>> input_shape = {
+        // im_info / anchors / boxesdeltas / scores
+        static_shapes_to_test_representation({{num_batches, 3},
+                                              {height, width, number_of_anchors, 4},
+                                              {num_batches, number_of_anchors * 4, height, width},
+                                              {num_batches, number_of_anchors, height, width}}),
+};
+
+
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_GenerateProposalsLayerTest_f16,
+        GenerateProposalsLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(input_shape),
+                ::testing::ValuesIn(min_size),
+                ::testing::ValuesIn(nms_threshold),
+                ::testing::ValuesIn(post_nms_count),
+                ::testing::ValuesIn(pre_nms_count),
+                ::testing::ValuesIn({true}),
+                ::testing::ValuesIn(getInputTensors<ov::float16>()),
+                ::testing::ValuesIn({ov::element::Type_t::f16}),
+                ::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        GenerateProposalsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_GenerateProposalsLayerTest_f32,
+        GenerateProposalsLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(input_shape),
+                ::testing::ValuesIn(min_size),
+                ::testing::ValuesIn(nms_threshold),
+                ::testing::ValuesIn(post_nms_count),
+                ::testing::ValuesIn(pre_nms_count),
+                ::testing::ValuesIn({false}),
+                ::testing::ValuesIn(getInputTensors<float>()),
+                ::testing::ValuesIn({ov::element::Type_t::f32}),
+                ::testing::ValuesIn({ov::element::Type_t::i32, ov::element::Type_t::i64}),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        GenerateProposalsLayerTest::getTestCaseName);
+
+} // namespace
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/generate_proposals.hpp
@ -16,6 +16,7 @@ typedef std::tuple<
        float,                                  // nms_threshold: specifies NMS threshold
        int64_t,                                // post_nms_count: number of top-n proposals after NMS
        int64_t,                                // pre_nms_count: number of top-n proposals after NMS
+        bool,                                   // normalized: specifies whether box is normalized or not
        std::pair<std::string, std::vector<ov::Tensor>>, // input tensors
        ElementType,                            // Network precision
        ElementType,                            // roi_num precision
@ -28,6 +29,7 @@ class GenerateProposalsLayerTest :
 protected:
    void SetUp() override;
    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void compare(const std::vector<ov::Tensor>& expected, const std::vector<ov::Tensor>& actual) override;

 public:
    static std::string getTestCaseName(const testing::TestParamInfo<GenerateProposalsTestParams>& obj);
--- a/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/generate_proposals.cpp
@ -3,6 +3,7 @@
 //

 #include "shared_test_classes/single_layer/generate_proposals.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "common_test_utils/ov_tensor_utils.hpp"

@ -16,8 +17,10 @@ std::ostream& operator <<(
        const ov::op::v9::GenerateProposals::Attributes& attributes) {
    ss << "score_threshold=" << attributes.min_size << "_";
    ss << "nms_threshold=" << attributes.nms_threshold << "_";
-    ss << "max_delta_log_wh=" << attributes.post_nms_count << "_";
-    ss << "num_classes=" << attributes.pre_nms_count;
+    ss << "post_nms_count=" << attributes.post_nms_count << "_";
+    ss << "pre_nms_count=" << attributes.pre_nms_count;
+    ss << "normalized=" << attributes.normalized;
+    ss << "nms_eta=" << attributes.nms_eta;
    return ss;
 }
 } // namespace
@ -36,6 +39,7 @@ std::string GenerateProposalsLayerTest::getTestCaseName(
        attributes.nms_threshold,
        attributes.post_nms_count,
        attributes.pre_nms_count,
+        attributes.normalized,
        inputTensors,
        netPrecision,
        roiNumPrecision,
@ -70,6 +74,7 @@ void GenerateProposalsLayerTest::SetUp() {
        attributes.nms_threshold,
        attributes.post_nms_count,
        attributes.pre_nms_count,
+        attributes.normalized,
        inputTensors,
        netPrecision,
        roiNumPrecision,
@ -77,6 +82,13 @@ void GenerateProposalsLayerTest::SetUp() {

    inType = outType = netPrecision;
    targetDevice = targetName;
+    if (targetDevice == CommonTestUtils::DEVICE_GPU) {
+        if (netPrecision == element::Type_t::f16) {
+            abs_threshold = 0.2;
+        } else {
+            abs_threshold = 0.00009;
+        }
+    }

    init_input_shapes(inputShapes);

@ -97,7 +109,7 @@ void GenerateProposalsLayerTest::SetUp() {
 }

 void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
-    auto inputTensors = std::get<5>(GetParam());
+    auto inputTensors = std::get<6>(GetParam());

    inputs.clear();
    const auto& funcInputs = function->inputs();
@ -110,6 +122,63 @@ void GenerateProposalsLayerTest::generate_inputs(const std::vector<ngraph::Shape
    }
 }

+void GenerateProposalsLayerTest::compare(const std::vector<ov::Tensor>& expected,
+                                         const std::vector<ov::Tensor>& actual) {
+    if (targetDevice != CommonTestUtils::DEVICE_GPU) {
+        SubgraphBaseTest::compare(expected, actual);
+        return;
+    }
+
+    const auto outputsNum = expected.size();
+    ASSERT_EQ(outputsNum, 3);
+    ASSERT_EQ(outputsNum, actual.size());
+    ASSERT_EQ(outputsNum, function->get_results().size());
+
+    // actual outputs 0 (rois) and 1 (roi_scores) may be padded with zeros
+    for (size_t i = 0; i < 2; ++i) {
+        const auto expectedNumRois = expected[i].get_shape()[0];
+        const auto actualNumRois = actual[i].get_shape()[0];
+        ASSERT_LE(expectedNumRois, actualNumRois);
+
+        const auto actualBuffer = static_cast<uint8_t*>(actual[i].data());
+        const auto expectedBuffer = static_cast<uint8_t*>(expected[i].data());
+        const auto outputSize = i == 0 ? 4 : 1;
+
+        if (outType == element::Type_t::f32) {
+            LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer),
+                                                       reinterpret_cast<const float*>(actualBuffer),
+                                                       expectedNumRois * outputSize,
+                                                       rel_threshold,
+                                                       abs_threshold);
+        } else {
+            LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float16*>(expectedBuffer),
+                                                       reinterpret_cast<const float16*>(actualBuffer),
+                                                       expectedNumRois * outputSize,
+                                                       rel_threshold,
+                                                       abs_threshold);
+        }
+
+        if (expectedNumRois < actualNumRois) {
+            if (outType == element::Type_t::f32) {
+                const auto fBuffer = static_cast<const float*>(actual[i].data());
+                for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
+                    ASSERT_TRUE(fBuffer[j] == 0.0f)
+                        << "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
+                }
+            } else {
+                const float16 zero{0};
+                const auto fBuffer = static_cast<const float16*>(actual[i].data());
+                for (size_t j = expectedNumRois * outputSize; j < actualNumRois * outputSize; ++j) {
+                    ASSERT_TRUE(fBuffer[j] == zero)
+                        << "Expected 0.0, actual: " << fBuffer[j] << " at index: " << j << ", output: " << i;
+                }
+            }
+        }
+    }
+
+    // output 2 - rois_num
+    ov::test::utils::compare(expected[2], actual[2], abs_threshold, rel_threshold);
+}
 } // namespace subgraph
 } // namespace test
 } // namespace ov