[GPU] Implement ExperimentalDetectronTopKROIs operation (#10208)

2022-02-11 12:32:49 +02:00
parent dc1e9aa9bd
commit 04c1b9760c
18 changed files with 656 additions and 0 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -207,6 +207,7 @@ REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen);
 REGISTER_FACTORY(v6, MVN);
 REGISTER_FACTORY(v6, GatherElements);
 REGISTER_FACTORY(v6, ExperimentalDetectronROIFeatureExtractor);
+REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)

 // ------------------------------ Supported v7 ops ------------------------------ //
 REGISTER_FACTORY(v7, Gather);
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_topk_rois.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/experimental_detectron_topk_rois.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "primitive.hpp"
+
+namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief ExperimentalDetectronTopKROIs-6 primitive
+/// @details
+struct experimental_detectron_topk_rois : public primitive_base<experimental_detectron_topk_rois> {
+    CLDNN_DECLARE_PRIMITIVE(experimental_detectron_topk_rois)
+
+    /**
+     * Construct ExperimentalDetectronTopKROIs privitive.
+     * @param id primitive id
+     * @param inputs inputs parameters ids
+     * @param max_rois maximal numbers of output ROIs.
+     */
+    experimental_detectron_topk_rois(const primitive_id &id, const std::vector<primitive_id> &inputs,
+                                     const size_t max_rois,
+                                     const padding &output_padding = padding())
+            : primitive_base(id, inputs, "", output_padding),
+              max_rois(max_rois) {}
+
+    /// maximal numbers of output ROIs.
+    size_t max_rois;
+};
+
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp
+++ b/src/plugins/intel_gpu/src/graph/experimental_detectron_topk_rois.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <experimental_detectron_topk_rois_inst.h>
+#include "primitive_type_base.h"
+#include <sstream>
+#include <json_object.h>
+#include <data_inst.h>
+
+namespace cldnn {
+
+primitive_type_id experimental_detectron_topk_rois::type_id() {
+    static primitive_type_base<experimental_detectron_topk_rois> instance;
+    return &instance;
+}
+
+experimental_detectron_topk_rois_inst::typed_primitive_inst(network& network, experimental_detectron_topk_rois_node const &node)
+: parent(network, node) {
+}
+
+layout experimental_detectron_topk_rois_inst::calc_output_layout(experimental_detectron_topk_rois_node const &node) {
+    auto primitive = node.get_primitive();
+    auto input_layout = node.input(0).get_output_layout();
+
+    int32_t roi_num = std::min(input_layout.size.sizes()[0], static_cast<int32_t>(node.get_primitive()->max_rois));
+
+    return {input_layout.data_type, input_layout.format,  {roi_num,
+                                                                 input_layout.size.sizes()[1], 1, 1 }};
+}
+
+std::string experimental_detectron_topk_rois_inst::to_string(experimental_detectron_topk_rois_node const &node) {
+    auto node_info = node.desc_to_json();
+    json_composite experimental_detectron_topk_rois_info;
+    experimental_detectron_topk_rois_info.add("input id", node.input().id());
+    experimental_detectron_topk_rois_info.add("indices id", node.input(1).id());
+    experimental_detectron_topk_rois_info.add("max_rois", node.get_primitive()->max_rois);
+    node_info->add("experimental detectron TopK ROIs info", experimental_detectron_topk_rois_info);
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/experimental_detectron_topk_rois.cpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <experimental_detectron_topk_rois_inst.h>
+#include <edtopkroi/experimental_detectron_topk_rois_ref.h>
+#include "intel_gpu/runtime/error_handler.hpp"
+#include <impls/implementation_map.hpp>
+#include <edtopkroi/experimental_detectron_topk_rois_kernel_selector.h>
+#include "primitive_base.hpp"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+struct experimental_detectron_topk_rois_impl : typed_primitive_impl_ocl<experimental_detectron_topk_rois> {
+    using parent = typed_primitive_impl_ocl<experimental_detectron_topk_rois>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<experimental_detectron_topk_rois_impl>(*this);
+    }
+
+    static primitive_impl *create(const experimental_detectron_topk_rois_node &arg) {
+        auto params = get_default_params<kernel_selector::experimental_detectron_topk_roi_params>(
+                arg);
+        const auto& experimental_detectron_topk_rois_kernel_selector =
+                kernel_selector::experimental_detectron_topk_rois_kernel_selector::Instance();
+        const auto& primitive = arg.get_primitive();
+        params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+        params.max_rois = primitive->max_rois;
+        auto best_kernels = experimental_detectron_topk_rois_kernel_selector.GetBestKernels(params,
+                                                                                            kernel_selector::experimental_detectron_topk_roi_optional_params());
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+        return new experimental_detectron_topk_rois_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_experimental_detectron_topk_rois_impl::attach_experimental_detectron_topk_rois_impl() {
+    implementation_map<experimental_detectron_topk_rois>::add(impl_types::ocl,
+                                                              experimental_detectron_topk_rois_impl::create, {
+                                                                      std::make_tuple(data_types::f16, format::bfyx),
+                                                                      std::make_tuple(data_types::f32, format::bfyx)
+                                                              });
+}
+
+}  // namespace detail
+
+} // namespace ocl
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
@@ -29,6 +29,7 @@ void register_implementations() {
    REGISTER_OCL(detection_output);
    REGISTER_OCL(batch_to_space);
    REGISTER_OCL(experimental_detectron_roi_feature_extractor);
+    REGISTER_OCL(experimental_detectron_topk_rois);
    REGISTER_OCL(eltwise);
    REGISTER_OCL(fully_connected);
    REGISTER_OCL(gather);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
@@ -21,6 +21,7 @@
 #include "intel_gpu/primitives/detection_output.hpp"
 #include "intel_gpu/primitives/eltwise.hpp"
 #include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp"
+#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
 #include "intel_gpu/primitives/fully_connected.hpp"
 #include "intel_gpu/primitives/gather.hpp"
 #include "intel_gpu/primitives/gather_nd.hpp"
@@ -99,6 +100,7 @@ REGISTER_OCL(deformable_interp);
 REGISTER_OCL(depth_to_space);
 REGISTER_OCL(detection_output);
 REGISTER_OCL(experimental_detectron_roi_feature_extractor);
+REGISTER_OCL(experimental_detectron_topk_rois);
 REGISTER_OCL(eltwise);
 REGISTER_OCL(embed);
 REGISTER_OCL(fully_connected);
--- a/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/experimental_detectron_topk_rois_inst.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
+#include "primitive_inst.h"
+#include "intel_gpu/runtime/error_handler.hpp"
+
+namespace cldnn {
+
+template<>
+struct typed_program_node<experimental_detectron_topk_rois> : public typed_program_node_base<experimental_detectron_topk_rois> {
+    using parent = typed_program_node_base<experimental_detectron_topk_rois>;
+public:
+    using parent::parent;
+
+    const program_node &input(std::size_t index = 0) const { return get_dependency(index); }
+};
+
+using experimental_detectron_topk_rois_node = typed_program_node<experimental_detectron_topk_rois>;
+
+template<>
+class typed_primitive_inst<experimental_detectron_topk_rois> : public typed_primitive_inst_base<experimental_detectron_topk_rois> {
+    using parent = typed_primitive_inst_base<experimental_detectron_topk_rois>;
+
+public:
+    static layout calc_output_layout(experimental_detectron_topk_rois_node const &node);
+
+    static std::string to_string(experimental_detectron_topk_rois_node const &node);
+
+public:
+    typed_primitive_inst(network &network, experimental_detectron_topk_rois_node const &desc);
+};
+
+using experimental_detectron_topk_rois_inst = typed_primitive_inst<experimental_detectron_topk_rois>;
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common/common_types.h
@@ -76,6 +76,7 @@ enum class KernelType {
    NON_MAX_SUPPRESSION,
    DETECTION_OUTPUT,
    EXPERIMENTAL_DETECTRON_ROI_FEATURE_EXTRACTOR,
+    EXPERIMENTAL_DETECTRON_TOPK_ROIS,
    CONVERT_COLOR,
    RANDOM_UNIFORM
 };
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_kernel_selector.cpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "experimental_detectron_topk_rois_kernel_selector.h"
+#include "experimental_detectron_topk_rois_ref.h"
+
+namespace kernel_selector {
+
+
+experimental_detectron_topk_rois_kernel_selector &experimental_detectron_topk_rois_kernel_selector::Instance() {
+    static experimental_detectron_topk_rois_kernel_selector instance_;
+    return instance_;
+}
+
+experimental_detectron_topk_rois_kernel_selector::experimental_detectron_topk_rois_kernel_selector() {
+    Attach<ExperimentalDetectronTopKROIRef>();
+}
+
+KernelsData experimental_detectron_topk_rois_kernel_selector::GetBestKernels(const Params &params,
+                                                                             const optional_params &options) const {
+    return GetNaiveBestKernel(params, options, KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS);
+}
+
+} // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_kernel_selector.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_kernel_selector.h
@@ -0,0 +1,22 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+
+/**
+ * GPU kernel selector for the ExperimentalDetectronTopKROIS-6 operation
+ */
+class experimental_detectron_topk_rois_kernel_selector : public kernel_selector_base {
+public:
+    static experimental_detectron_topk_rois_kernel_selector &Instance();
+
+    experimental_detectron_topk_rois_kernel_selector();
+
+    KernelsData GetBestKernels(const Params &params, const optional_params &options) const override;
+};
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_ref.cpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "experimental_detectron_topk_rois_ref.h"
+
+#include <kernel_selector_utils.h>
+#include <random>
+
+
+namespace kernel_selector {
+
+namespace {
+
+
+CommonDispatchData SetDefault(const experimental_detectron_topk_roi_params &params, const optional_params &) {
+    CommonDispatchData dispatchData;
+    dispatchData.gws = {params.output.Batch().v, 1, 1};
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+    return dispatchData;
+}
+
+}  // namespace
+
+JitConstants ExperimentalDetectronTopKROIRef::GetJitConstants(const experimental_detectron_topk_roi_params &params) const {
+    return MakeBaseParamsJitConstants(params);
+}
+
+
+KernelsData ExperimentalDetectronTopKROIRef::GetKernelsData(const Params &params, const optional_params &options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    KernelData kernel_data = KernelData::Default<experimental_detectron_topk_roi_params>(params);
+    const experimental_detectron_topk_roi_params &new_params = dynamic_cast<const experimental_detectron_topk_roi_params &>(*kernel_data.params.get());
+
+    auto dispatch_data = SetDefault(new_params, options);
+    auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options);
+
+    auto experimental_detectron_topk_roi_jit = GetJitConstants(new_params);
+    auto jit = CreateJit(kernelName, experimental_detectron_topk_roi_jit, entry_point);
+
+    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo, kernelName, jit, entry_point, "", false,
+                     false, 2);
+
+    KernelsData kernelsData;
+    kernelsData.push_back(std::move(kernel_data));
+    return kernelsData;
+}
+
+KernelsPriority ExperimentalDetectronTopKROIRef::GetKernelsPriority(const Params & /*params*/,
+                                                                    const optional_params & /*options*/) const {
+    return FORCE_PRIORITY_1;
+}
+
+ParamsKey ExperimentalDetectronTopKROIRef::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::INT64);
+
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableDifferentTypes();
+    k.EnableOutputLayout(Tensor::bfyx);
+    k.EnableInputLayout(Tensor::bfyx);
+    k.EnableBatching();
+    return k;
+}
+
+bool ExperimentalDetectronTopKROIRef::Validate(const Params &params, const optional_params &optionalParams) const {
+    if (params.GetType() != KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS ||
+        optionalParams.GetType() != KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {
+        return false;
+    }
+
+    const experimental_detectron_topk_roi_params &new_params = dynamic_cast<const experimental_detectron_topk_roi_params &>(params);
+    if (new_params.inputs.size() != 2) {
+        return false;
+    }
+    return true;
+}
+
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/edtopkroi/experimental_detectron_topk_rois_ref.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+
+namespace kernel_selector {
+
+/**
+ * ExperimentalDetectronTopKROIs kernel params.
+ */
+struct experimental_detectron_topk_roi_params : public base_params {
+    experimental_detectron_topk_roi_params() : base_params(KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {}
+
+    size_t max_rois = 0; // maximal numbers of output ROIs.
+};
+
+struct experimental_detectron_topk_roi_optional_params : public optional_params {
+    experimental_detectron_topk_roi_optional_params() : optional_params(KernelType::EXPERIMENTAL_DETECTRON_TOPK_ROIS) {}
+};
+
+/**
+ * Reference GPU kernel for the ExperimentalDetectronTopKROIs-6 operation to set output by indices sorted before.
+ */
+class ExperimentalDetectronTopKROIRef : public KernelBaseOpenCL {
+public:
+    ExperimentalDetectronTopKROIRef() : KernelBaseOpenCL("experimental_detectron_topk_rois_ref") {}
+
+private:
+    virtual JitConstants GetJitConstants(const experimental_detectron_topk_roi_params &params) const;
+
+    KernelsData GetKernelsData(const Params &params, const optional_params &options) const override;
+
+    KernelsPriority GetKernelsPriority(const Params &params, const optional_params &options) const override;
+
+    bool Validate(const Params &params, const optional_params &optionalParams) const override;
+
+    ParamsKey GetSupportedKey() const override;
+};
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_topk_rois_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/experimental_detectron_topk_rois_ref.cl
@@ -0,0 +1,19 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/batch_headers/common.cl"
+#include "include/batch_headers/data_types.cl"
+
+KERNEL(experimental_detectron_topk_rois_ref)(const __global INPUT0_TYPE* input_rois,
+        const __global INPUT1_TYPE* topk_indices, __global OUTPUT_TYPE* output_rois)
+{
+    const uint b = get_global_id(0);
+    const uint output_idx = OUTPUT_GET_INDEX(b, 0, 0, 0);
+    const uint roi_idx = topk_indices[b];
+    const uint input_idx = INPUT0_GET_INDEX(roi_idx, 0, 0, 0);
+    output_rois[output_idx] = input_rois[input_idx];
+    output_rois[output_idx + 1] = input_rois[input_idx + 1];
+    output_rois[output_idx + 2] = input_rois[input_idx + 2];
+    output_rois[output_idx + 3] = input_rois[input_idx + 3];
+}
--- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_topk_rois.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/plugin/program.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
+
+#include "ngraph/op/experimental_detectron_topkrois.hpp"
+
+#include "intel_gpu/primitives/experimental_detectron_topk_rois.hpp"
+#include "intel_gpu/primitives/arg_max_min.hpp"
+
+namespace ov {
+namespace runtime {
+namespace intel_gpu {
+
+namespace {
+
+using namespace cldnn;
+
+void CreateExperimentalDetectronTopKROIsOp(Program &p,
+                                           const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronTopKROIs> &op) {
+    p.ValidateInputs(op, {2});
+    auto input_primitives = p.GetInputPrimitiveIDs(op);
+    auto max_rois = op->get_max_rois();
+    auto layer_name = layer_type_name_ID(op);
+    auto argmax_layer_name = layer_name + "_topk";
+    auto top_k_indices = arg_max_min(argmax_layer_name,
+                                     {input_primitives[1]}, arg_max_min::max, max_rois, arg_max_min::batch,
+                                     arg_max_min::sort_by_values, false, "", cldnn::padding(), cldnn::data_types::i32);
+
+
+    p.AddPrimitive(top_k_indices);
+    p.AddInnerPrimitiveToProfiler(top_k_indices, argmax_layer_name, op);
+
+    auto experimental_detectron_topk_layer = cldnn::experimental_detectron_topk_rois(layer_name,
+                                                                                     {input_primitives[0],
+                                                                                      argmax_layer_name}, max_rois);
+
+    p.AddPrimitive(experimental_detectron_topk_layer);
+    p.AddPrimitiveToProfiler(experimental_detectron_topk_layer, op);
+}
+
+} // namespace
+
+REGISTER_FACTORY_IMPL(v6, ExperimentalDetectronTopKROIs);
+
+} // namespace intel_gpu
+} // namespace runtime
+} // namespace ov
--- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp
@@ -965,3 +965,51 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) {
        EXPECT_EQ(out_buffer[i], ref_vec[i]);
    }
 }
+
+
+TEST(top_k_layer_tests, sort_probabilities_by_indices) {
+    static const int32_t x_size = 10, y_size = 1, feature_num = 1, batch_num = 1;
+    auto& engine = get_test_engine();
+    const int top_k = 5;
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(arg_max_min("arg_max", { "input"}, arg_max_min::max, top_k, arg_max_min::x,
+                             arg_max_min::sort_by_values, false, "", padding(), data_types::i32));
+
+    std::vector<float> input_vec = {
+           0.9f,
+           0.1f,
+           0.2f,
+           0.8f,
+           0.5f,
+           0.6f,
+           0.3f,
+           0.4f,
+           0.7f,
+           0.95f
+    };
+
+    std::vector<int> ref_vec = {
+           9, 0, 3, 8, 5
+    };
+
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "arg_max");
+    const int out_size = top_k;
+    auto output = outputs.at("arg_max").get_memory();
+    cldnn::mem_lock<int> output_ptr(output, get_test_stream());
+    int out_buffer[out_size];
+    for (uint32_t i = 0; i < out_size; i++) {
+        out_buffer[i] = get_value<int>(output_ptr.data(), i);
+    }
+    for (int i = 0; i < out_size; i++) {
+        EXPECT_EQ(out_buffer[i], ref_vec[i]);
+    }
+}
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2021-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/activation.hpp>
+#include <intel_gpu/primitives/experimental_detectron_topk_rois.hpp>
+
+#include <cstddef>
+#include <string>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(experimental_detectron_topk_rois_gpu_fp32, check_set_indices_layer) {
+    auto &engine = get_test_engine();
+
+    const int rois_num = 2;
+
+    auto roi_input = engine.allocate_memory(
+            {data_types::f32, format::bfyx, tensor(batch(4), feature(4))});
+    auto roi_indices = engine.allocate_memory({data_types::i32, format::bfyx, tensor(batch(rois_num), feature(1))});
+
+    std::vector<float> rois{1.0f, 1.0f, 4.0f, 5.0f,
+                            3.0f, 2.0f, 7.0f, 9.0f,
+                            10.0f, 15.0f, 13.0f, 17.0f,
+                            13.0f, 10.0f, 18.0f, 15.0f};
+    set_values(roi_input, rois);
+    set_values(roi_indices,
+               {3, 1});
+
+    const std::string input_rois_id = "InputRois";
+    const std::string input_indices_id = "InputIndices";;
+    const std::string experimental_detectron_topk_rois_id = "experimental_detectron_topk_rois";
+    topology topology;
+    topology.add(input_layout(input_rois_id, roi_input->get_layout()));
+    topology.add(input_layout(input_indices_id, roi_indices->get_layout()));
+
+    topology.add(experimental_detectron_topk_rois(experimental_detectron_topk_rois_id,
+                                                  {input_rois_id, input_indices_id}, rois_num));
+
+    network network(engine, topology);
+
+    network.set_input_data(input_rois_id, roi_input);
+    network.set_input_data(input_indices_id, roi_indices);
+
+    auto result = network.execute();
+
+    std::vector<float> expected_output{13.0f, 10.0f, 18.0f, 15.0f,
+                                       3.0f, 2.0f, 7.0f, 9.0f};
+
+    auto out_mem = result.at(experimental_detectron_topk_rois_id).get_memory();
+    cldnn::mem_lock<float> out_ptr(out_mem, get_test_stream());
+
+    ASSERT_EQ(expected_output.size(), out_ptr.size());
+    for (size_t i = 0; i < expected_output.size(); ++i) {
+        EXPECT_NEAR(expected_output[i], out_ptr[i], 0.0001) << "at i = " << i;
+    }
+}
+
+TEST(experimental_detectron_topk_rois_gpu_fp32, check_set_indices_layer_model_less_than_k) {
+    auto &engine = get_test_engine();
+    // topk is more than model size
+    const int rois_num = 3;
+
+    auto roi_input = engine.allocate_memory(
+            {data_types::f32, format::bfyx, tensor(batch(2), feature(4))});
+    auto roi_indices = engine.allocate_memory({data_types::i32, format::bfyx, tensor(batch(2), feature(1))});
+
+    std::vector<float> rois{1.0f, 1.0f, 4.0f, 5.0f,
+                            3.0f, 2.0f, 7.0f, 9.0f};
+    set_values(roi_input, rois);
+    set_values(roi_indices,
+               {1, 0});
+
+    const std::string input_rois_id = "InputRois";
+    const std::string input_indices_id = "InputIndices";;
+    const std::string experimental_detectron_topk_rois_id = "experimental_detectron_topk_rois";
+    topology topology;
+    topology.add(input_layout(input_rois_id, roi_input->get_layout()));
+    topology.add(input_layout(input_indices_id, roi_indices->get_layout()));
+
+    topology.add(experimental_detectron_topk_rois(experimental_detectron_topk_rois_id,
+                                                  {input_rois_id, input_indices_id}, rois_num));
+
+    network network(engine, topology);
+
+    network.set_input_data(input_rois_id, roi_input);
+    network.set_input_data(input_indices_id, roi_indices);
+
+    auto result = network.execute();
+
+    std::vector<float> expected_output{3.0f, 2.0f, 7.0f, 9.0f,
+                                       1.0f, 1.0f, 4.0f, 5.0f};
+
+
+    auto out_mem = result.at(experimental_detectron_topk_rois_id).get_memory();
+    cldnn::mem_lock<float> out_ptr(out_mem, get_test_stream());
+
+    ASSERT_EQ(expected_output.size(), out_ptr.size());
+    for (size_t i = 0; i < expected_output.size(); ++i) {
+        EXPECT_NEAR(expected_output[i], out_ptr[i], 0.0001) << "at i = " << i;
+    }
+}
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_topk_rois.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/experimental_detectron_topk_rois.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/experimental_detectron_topkrois.hpp"
+
+using namespace ov::test;
+using namespace ov::test::subgraph;
+
+namespace {
+std::vector<int64_t> maxRois {
+        1000,
+        1500,
+        2000
+};
+
+std::vector<ElementType> elementTypes {
+    ElementType::f16,
+    ElementType::f32
+};
+
+const std::vector<std::vector<InputShape>> staticInputShape = {
+        static_shapes_to_test_representation({{3000, 4}, {3000}}),
+        static_shapes_to_test_representation({{4200, 4}, {4200}}),
+        static_shapes_to_test_representation({{4500, 4}, {4500}})
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ExperimentalDetectronTopKROIs_static, ExperimentalDetectronTopKROIsLayerTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(staticInputShape),
+                                 ::testing::ValuesIn(maxRois),
+                                 ::testing::ValuesIn(elementTypes),
+                                 ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                         ExperimentalDetectronTopKROIsLayerTest::getTestCaseName);
+
+} // namespace
--- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
+++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#include <shared_test_classes/base/ov_subgraph.hpp>
 #include "ngraph/ops.hpp"

 #include "functional_test_utils/ov_tensor_utils.hpp"
@@ -628,6 +629,42 @@ ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v5::NonMaxSuppres
    }
 }

+template<ov::element::Type_t elemType>
+ov::runtime::Tensor generate_unique_possibilities(const ov::Shape &targetShape) {
+    using value_type = typename element_type_traits<elemType>::value_type;
+    ov::runtime::Tensor tensor = ov::runtime::Tensor(elemType, targetShape);
+    const size_t k = targetShape[0];
+    std::vector<size_t> indices(k);
+    std::iota(indices.begin(), indices.end(), 0lu);
+    std::default_random_engine random;
+    std::shuffle(indices.begin(), indices.end(), random);
+
+    auto dataPtr = tensor.data<value_type>();
+    for (size_t i = 0; i < k; ++i) {
+        // our goal is to have unique values for both f32 and f16 to avoid false failures because of the same possibilities
+        dataPtr[i] = ov::float16::from_bits(static_cast<  uint16_t>(indices[i]));
+    }
+    return tensor;
+}
+
+ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronTopKROIs>& node,
+                             size_t port,
+                             const ov::element::Type& elemType,
+                             const ov::Shape& targetShape) {
+    if (port == 1) {
+        switch (elemType) {
+            case element::Type_t::f16:
+                return generate_unique_possibilities<element::Type_t::f16>(targetShape);
+            case element::Type_t::f32:
+                return generate_unique_possibilities<element::Type_t::f32>(targetShape);
+            default:
+                OPENVINO_UNREACHABLE("Unsupported element type: ", elemType);
+        }
+    }
+    return generate(std::dynamic_pointer_cast<ov::Node>(node), port, elemType, targetShape);
+}
+
+
 ov::runtime::Tensor generate(const std::shared_ptr<ngraph::op::v5::RNNSequence>& node,
                             size_t port,
                             const ov::element::Type& elemType,