From caccde6a82c7ca3c144c0e439d51769c89a5ff36 Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
Date: Fri, 6 May 2022 10:36:46 +0900
Subject: [PATCH] [GPU] Add TC for MXNet-style NMS model(decrease_label_id)
 (#10624)

- Add TC for decrease_label_id=true to cover MXNet-style NMS models
- Fix segfault issue that occurs when data precision is fp16

Signed-off-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../src/graph/impls/cpu/detection_output.cpp  | 10 ++-
 .../test_cases/detection_output_test.cpp      | 73 +++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
index 15e731761cd..377ef6368fa 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
@@ -707,14 +707,22 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
                 scoreIndexPairs.push_back(score_index_per_prior);
             } else {
                 for (int prior = 0; prior < num_of_priors; ++prior) {
-                    for (int cls = 0; cls < num_classes; ++cls) {
+                    int idx_start = (background_label_id == 0 ? 1 : 0);
+                    float max_score = 0;
+                    int max_cls = 0;
+                    for (int cls = idx_start; cls < num_classes; ++cls) {
                         float score = static_cast<float>(confidence_data[idx]);
                         if (score > confidence_threshold) {
                             label_to_scores[cls].emplace_back(score, prior);
+                            if ((cls == idx_start) || score > max_score) {
+                                max_score = score; max_cls = cls;
+                            }
                         }
                         idx += stride;
                     }
+                    score_index_per_prior.emplace_back(std::make_pair(max_score, std::make_pair(max_cls, prior)));
                 }
+                scoreIndexPairs.push_back(score_index_per_prior);
             }
         }
     }
diff --git a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
index a0b02f1a276..fe5103dccb0 100644
--- a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp
@@ -397,6 +397,75 @@ public:
         check_results(output_prim, 3, "-1 0 0 0 0 0 0");
     }
 
+    void test_forward_decrease_label_id() {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 5;
+        const int background_label_id = 0;
+        const float nms_threshold = 0.4f;
+        const int top_k = 5;
+        const float eta = 1.f;
+        const prior_box_code_type code_type = prior_box_code_type::corner;
+        const bool variance_encoded_in_target = false;
+        const float confidence_threshold = 0.02;
+        const int32_t prior_info_size = 4;
+        const int32_t prior_coordinates_offset = 0;
+        const bool prior_is_normalized = true;
+        const int32_t input_width = 1;
+        const int32_t input_height = 1;
+        const bool decrease_label_id = true;
+
+        auto& engine = get_test_engine();
+        cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type<T>::value, format::bfyx,
+                                                                   { this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type<T>::value, format::bfyx,
+                                                                     { this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type<T>::value, format::bfyx,
+                                                                    { 1, 2, 1, this->num_priors * 4 } });
+
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+
+        topology topology;
+        topology.add(input_layout("input_location", input_location->get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence->get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box->get_layout()));
+
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box",
+            this->num_classes, keep_top_k, share_location, background_label_id, nms_threshold,
+            top_k, eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size,
+            prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
+        ));
+
+        build_options opts;
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 0 1.0 0.55 0.15 0.85 0.45");
+        check_results(output_prim, 1, "0 0 0.8 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 2, "0 0 0.2 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 3, "1 0 1.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 4, "1 0 0.8 0.25 0.45 0.55 0.75");
+        check_results(output_prim, 5, "1 0 0.2 0.45 0.45 0.75 0.75");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "0 0 0 0 0 0 0");
+        check_results(output_prim, 8, "0 0 0 0 0 0 0");
+        check_results(output_prim, 9, "0 0 0 0 0 0 0");
+    }
+
     void forward_no_share_location() {
         const bool share_location = false;
         const int num_loc_classes = share_location ? 1 : this->num_classes;
@@ -748,6 +817,10 @@ TYPED_TEST(detection_output_test, test_forward_share_location_top_k) {
     this->test_forward_share_location_top_k();
 }
 
+TYPED_TEST(detection_output_test, test_forward_decrease_label_id) {
+    this->test_forward_decrease_label_id();
+}
+
 TYPED_TEST(detection_output_test, test_forward_no_share_location) {
     this->forward_no_share_location();
 }