add num_classes=-1 support in cpu_impl of detection_output (#18924)

2023-08-03 03:37:19 +09:00 · 2023-08-03 03:37:19 +09:00 · d51fc7adad
commit d51fc7adad
parent 9e9cf72973
2 changed files with 58 additions and 14 deletions
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
@ -282,6 +282,12 @@ public:
        auto out_ptr = lock.begin();

        const auto& args = instance.argument;
+
+        auto confidence_layout = instance.confidence_memory()->get_layout();
+        auto priors_layout = instance.prior_box_memory()->get_layout();
+
+        const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
+        const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
        // Per image -> For each label: Pair (score, prior index)
        std::vector<std::map<int, std::vector<std::pair<float, int>>>> final_detections;
        for (int image = 0; image < num_of_images; ++image) {
@ -290,7 +296,7 @@ public:
            std::map<int, std::vector<int>> indices;
            int num_det = 0;
            if (nms_type == NMSType::CAFFE) {
-                for (int cls = 0; cls < static_cast<int>(args->num_classes); ++cls) {
+                for (int cls = 0; cls < num_classes; ++cls) {
                    if (static_cast<int>(cls) == args->background_label_id) {
                        conf_per_image[cls].clear();
                        continue;  // Skip background class.
@ -522,9 +528,7 @@ public:
    template <typename dtype>
    void extract_confidences_per_image_caffe(stream& stream, const detection_output_inst& instance,
                                             std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
-                                             const int num_of_priors) {
-        const int num_classes = instance.argument->num_classes;
-
+                                             const int num_of_priors, const int num_classes) {
        const int num_of_images = static_cast<int>(confidences.size());
        auto input_confidence = instance.confidence_memory();
        const float confidence_threshold = instance.argument->confidence_threshold;
@ -616,9 +620,8 @@ public:
    template <typename dtype>
    void extract_confidences_per_image_mxnet(stream& stream, const detection_output_inst& instance,
                                             std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
-                                             const int num_of_priors,
+                                             const int num_of_priors, const int num_classes,
                                             std::vector<std::vector<std::pair<float, std::pair<int, int>>>>& scoreIndexPairs) {
-        const int num_classes = instance.argument->num_classes;
        const int background_label_id = instance.argument->background_label_id;
        const int num_of_images = static_cast<int>(confidences.size());
        auto input_confidence = instance.confidence_memory();
@ -750,11 +753,13 @@ public:

        const auto& args = instance.argument;

+        auto confidence_layout = instance.confidence_memory()->get_layout();
        auto priors_layout = instance.prior_box_memory()->get_layout();

        const int num_of_images = static_cast<int>(bboxes.size());
        const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
-        const int num_loc_classes = args->share_location ? 1 : args->num_classes;
+        const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
+        const int num_loc_classes = args->share_location ? 1 : num_classes;

        // Extract locations per image.
        std::vector<std::vector<std::vector<bounding_box>>> locations(
@ -812,9 +817,9 @@ public:
        }
        // Extract confidences per image.
        if (nms_type == NMSType::CAFFE) {
-            extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors);
+            extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors, num_classes);
        } else {
-            extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, scoreIndexPairs);
+            extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, num_classes, scoreIndexPairs);
        }
    }

--- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp
@ -197,9 +197,24 @@ protected:

        auto params = ngraph::builder::makeDynamicParams(ngraph::element::f32, inputDynamicShapes);
        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
-        auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
-        ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
-        function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
+
+        if (attrs.num_classes == -1) {
+            std::shared_ptr<ov::op::v8::DetectionOutput> detOut;
+
+            if (paramOuts.size() == 3)
+                detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], attrs);
+            else if (paramOuts.size() == 5)
+                detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], attrs);
+            else
+                throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs");
+
+            ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
+            function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
+        } else {
+            auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
+            ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
+            function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
+        }
    }

 private:
@ -242,7 +257,7 @@ TEST_P(DetectionOutputLayerGPUTest, CompareWithRefs) {

 namespace {

-const int numClasses = 11;
+const std::vector<int> numClasses = {11, -1};
 const int backgroundLabelId = 0;
 const std::vector<int> topK = {75};
 const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
@ -256,7 +271,7 @@ const float objectnessScore = 0.4f;
 const std::vector<size_t> numberBatch = {1, 2};

 const auto commonAttributes = ::testing::Combine(
-    ::testing::Values(numClasses),
+    ::testing::Values(numClasses[0]),
    ::testing::Values(backgroundLabelId),
    ::testing::ValuesIn(topK),
    ::testing::ValuesIn(keepTopK),
@ -268,6 +283,18 @@ const auto commonAttributes = ::testing::Combine(
    ::testing::ValuesIn(decreaseLabelId)
 );

+const auto commonAttributes_v8 = ::testing::Combine(
+    ::testing::Values(numClasses[1]),
+    ::testing::Values(backgroundLabelId),
+    ::testing::Values(topK[0]),
+    ::testing::Values(keepTopK[0]),
+    ::testing::ValuesIn(codeType),
+    ::testing::Values(nmsThreshold),
+    ::testing::Values(confidenceThreshold),
+    ::testing::Values(clipAfterNms[0]),
+    ::testing::Values(clipBeforeNms[0]),
+    ::testing::Values(decreaseLabelId[0])
+);
 /* =============== 3 inputs cases =============== */

 const std::vector<ParamsWhichSizeDependsDynamic> specificParams3InDynamic = {
@ -362,9 +389,21 @@ const auto params3InputsDynamic = ::testing::Combine(
        ::testing::Values(ov::test::utils::DEVICE_GPU)
 );

+const auto params3InputsDynamic_v8 = ::testing::Combine(
+        commonAttributes_v8,
+        ::testing::Values(specificParams3InDynamic[0]),
+        ::testing::ValuesIn(numberBatch),
+        ::testing::Values(objectnessScore),
+        ::testing::Values(true),
+        ::testing::Values(ov::test::utils::DEVICE_GPU)
+);
+
 INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In, DetectionOutputLayerGPUTest,
                         params3InputsDynamic,
                         DetectionOutputLayerGPUTest::getTestCaseName);

+INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputV8Dynamic3In, DetectionOutputLayerGPUTest,
+                         params3InputsDynamic_v8,
+                         DetectionOutputLayerGPUTest::getTestCaseName);
 } // namespace
 } // namespace GPULayerTestsDefinitions