diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 7fbc7f38801..3fb57d21cd8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -282,6 +282,12 @@ public: auto out_ptr = lock.begin(); const auto& args = instance.argument; + + auto confidence_layout = instance.confidence_memory()->get_layout(); + auto priors_layout = instance.prior_box_memory()->get_layout(); + + const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size; + const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes; // Per image -> For each label: Pair (score, prior index) std::vector>>> final_detections; for (int image = 0; image < num_of_images; ++image) { @@ -290,7 +296,7 @@ public: std::map> indices; int num_det = 0; if (nms_type == NMSType::CAFFE) { - for (int cls = 0; cls < static_cast(args->num_classes); ++cls) { + for (int cls = 0; cls < num_classes; ++cls) { if (static_cast(cls) == args->background_label_id) { conf_per_image[cls].clear(); continue; // Skip background class. @@ -522,9 +528,7 @@ public: template void extract_confidences_per_image_caffe(stream& stream, const detection_output_inst& instance, std::vector>>>& confidences, - const int num_of_priors) { - const int num_classes = instance.argument->num_classes; - + const int num_of_priors, const int num_classes) { const int num_of_images = static_cast(confidences.size()); auto input_confidence = instance.confidence_memory(); const float confidence_threshold = instance.argument->confidence_threshold; @@ -616,9 +620,8 @@ public: template void extract_confidences_per_image_mxnet(stream& stream, const detection_output_inst& instance, std::vector>>>& confidences, - const int num_of_priors, + const int num_of_priors, const int num_classes, std::vector>>>& scoreIndexPairs) { - const int num_classes = instance.argument->num_classes; const int background_label_id = instance.argument->background_label_id; const int num_of_images = static_cast(confidences.size()); auto input_confidence = instance.confidence_memory(); @@ -750,11 +753,13 @@ public: const auto& args = instance.argument; + auto confidence_layout = instance.confidence_memory()->get_layout(); auto priors_layout = instance.prior_box_memory()->get_layout(); const int num_of_images = static_cast(bboxes.size()); const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size; - const int num_loc_classes = args->share_location ? 1 : args->num_classes; + const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes; + const int num_loc_classes = args->share_location ? 1 : num_classes; // Extract locations per image. std::vector>> locations( @@ -812,9 +817,9 @@ public: } // Extract confidences per image. if (nms_type == NMSType::CAFFE) { - extract_confidences_per_image_caffe(stream, instance, confidences, num_of_priors); + extract_confidences_per_image_caffe(stream, instance, confidences, num_of_priors, num_classes); } else { - extract_confidences_per_image_mxnet(stream, instance, confidences, num_of_priors, scoreIndexPairs); + extract_confidences_per_image_mxnet(stream, instance, confidences, num_of_priors, num_classes, scoreIndexPairs); } } diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp index 5cb201d48ca..cba4a9d88b3 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/detection_output.cpp @@ -197,9 +197,24 @@ protected: auto params = ngraph::builder::makeDynamicParams(ngraph::element::f32, inputDynamicShapes); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); - ngraph::ResultVector results{std::make_shared(detOut)}; - function = std::make_shared(results, params, "DetectionOutputDynamic"); + + if (attrs.num_classes == -1) { + std::shared_ptr detOut; + + if (paramOuts.size() == 3) + detOut = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], attrs); + else if (paramOuts.size() == 5) + detOut = std::make_shared(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], attrs); + else + throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs"); + + ngraph::ResultVector results{std::make_shared(detOut)}; + function = std::make_shared(results, params, "DetectionOutputDynamic"); + } else { + auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs); + ngraph::ResultVector results{std::make_shared(detOut)}; + function = std::make_shared(results, params, "DetectionOutputDynamic"); + } } private: @@ -242,7 +257,7 @@ TEST_P(DetectionOutputLayerGPUTest, CompareWithRefs) { namespace { -const int numClasses = 11; +const std::vector numClasses = {11, -1}; const int backgroundLabelId = 0; const std::vector topK = {75}; const std::vector> keepTopK = { {50}, {100} }; @@ -256,7 +271,7 @@ const float objectnessScore = 0.4f; const std::vector numberBatch = {1, 2}; const auto commonAttributes = ::testing::Combine( - ::testing::Values(numClasses), + ::testing::Values(numClasses[0]), ::testing::Values(backgroundLabelId), ::testing::ValuesIn(topK), ::testing::ValuesIn(keepTopK), @@ -268,6 +283,18 @@ const auto commonAttributes = ::testing::Combine( ::testing::ValuesIn(decreaseLabelId) ); +const auto commonAttributes_v8 = ::testing::Combine( + ::testing::Values(numClasses[1]), + ::testing::Values(backgroundLabelId), + ::testing::Values(topK[0]), + ::testing::Values(keepTopK[0]), + ::testing::ValuesIn(codeType), + ::testing::Values(nmsThreshold), + ::testing::Values(confidenceThreshold), + ::testing::Values(clipAfterNms[0]), + ::testing::Values(clipBeforeNms[0]), + ::testing::Values(decreaseLabelId[0]) +); /* =============== 3 inputs cases =============== */ const std::vector specificParams3InDynamic = { @@ -362,9 +389,21 @@ const auto params3InputsDynamic = ::testing::Combine( ::testing::Values(ov::test::utils::DEVICE_GPU) ); +const auto params3InputsDynamic_v8 = ::testing::Combine( + commonAttributes_v8, + ::testing::Values(specificParams3InDynamic[0]), + ::testing::ValuesIn(numberBatch), + ::testing::Values(objectnessScore), + ::testing::Values(true), + ::testing::Values(ov::test::utils::DEVICE_GPU) +); + INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In, DetectionOutputLayerGPUTest, params3InputsDynamic, DetectionOutputLayerGPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputV8Dynamic3In, DetectionOutputLayerGPUTest, + params3InputsDynamic_v8, + DetectionOutputLayerGPUTest::getTestCaseName); } // namespace } // namespace GPULayerTestsDefinitions