add num_classes=-1 support in cpu_impl of detection_output (#18924)

This commit is contained in:
Wilson Seok 2023-08-03 03:37:19 +09:00 committed by GitHub
parent 9e9cf72973
commit d51fc7adad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 14 deletions

View File

@ -282,6 +282,12 @@ public:
auto out_ptr = lock.begin();
const auto& args = instance.argument;
auto confidence_layout = instance.confidence_memory()->get_layout();
auto priors_layout = instance.prior_box_memory()->get_layout();
const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
// Per image -> For each label: Pair (score, prior index)
std::vector<std::map<int, std::vector<std::pair<float, int>>>> final_detections;
for (int image = 0; image < num_of_images; ++image) {
@ -290,7 +296,7 @@ public:
std::map<int, std::vector<int>> indices;
int num_det = 0;
if (nms_type == NMSType::CAFFE) {
for (int cls = 0; cls < static_cast<int>(args->num_classes); ++cls) {
for (int cls = 0; cls < num_classes; ++cls) {
if (static_cast<int>(cls) == args->background_label_id) {
conf_per_image[cls].clear();
continue; // Skip background class.
@ -522,9 +528,7 @@ public:
template <typename dtype>
void extract_confidences_per_image_caffe(stream& stream, const detection_output_inst& instance,
std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
const int num_of_priors) {
const int num_classes = instance.argument->num_classes;
const int num_of_priors, const int num_classes) {
const int num_of_images = static_cast<int>(confidences.size());
auto input_confidence = instance.confidence_memory();
const float confidence_threshold = instance.argument->confidence_threshold;
@ -616,9 +620,8 @@ public:
template <typename dtype>
void extract_confidences_per_image_mxnet(stream& stream, const detection_output_inst& instance,
std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
const int num_of_priors,
const int num_of_priors, const int num_classes,
std::vector<std::vector<std::pair<float, std::pair<int, int>>>>& scoreIndexPairs) {
const int num_classes = instance.argument->num_classes;
const int background_label_id = instance.argument->background_label_id;
const int num_of_images = static_cast<int>(confidences.size());
auto input_confidence = instance.confidence_memory();
@ -750,11 +753,13 @@ public:
const auto& args = instance.argument;
auto confidence_layout = instance.confidence_memory()->get_layout();
auto priors_layout = instance.prior_box_memory()->get_layout();
const int num_of_images = static_cast<int>(bboxes.size());
const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
const int num_loc_classes = args->share_location ? 1 : args->num_classes;
const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
const int num_loc_classes = args->share_location ? 1 : num_classes;
// Extract locations per image.
std::vector<std::vector<std::vector<bounding_box>>> locations(
@ -812,9 +817,9 @@ public:
}
// Extract confidences per image.
if (nms_type == NMSType::CAFFE) {
extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors);
extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors, num_classes);
} else {
extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, scoreIndexPairs);
extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, num_classes, scoreIndexPairs);
}
}

View File

@ -197,9 +197,24 @@ protected:
auto params = ngraph::builder::makeDynamicParams(ngraph::element::f32, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
if (attrs.num_classes == -1) {
std::shared_ptr<ov::op::v8::DetectionOutput> detOut;
if (paramOuts.size() == 3)
detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], attrs);
else if (paramOuts.size() == 5)
detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], attrs);
else
throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs");
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
} else {
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
}
}
private:
@ -242,7 +257,7 @@ TEST_P(DetectionOutputLayerGPUTest, CompareWithRefs) {
namespace {
const int numClasses = 11;
const std::vector<int> numClasses = {11, -1};
const int backgroundLabelId = 0;
const std::vector<int> topK = {75};
const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
@ -256,7 +271,7 @@ const float objectnessScore = 0.4f;
const std::vector<size_t> numberBatch = {1, 2};
const auto commonAttributes = ::testing::Combine(
::testing::Values(numClasses),
::testing::Values(numClasses[0]),
::testing::Values(backgroundLabelId),
::testing::ValuesIn(topK),
::testing::ValuesIn(keepTopK),
@ -268,6 +283,18 @@ const auto commonAttributes = ::testing::Combine(
::testing::ValuesIn(decreaseLabelId)
);
const auto commonAttributes_v8 = ::testing::Combine(
::testing::Values(numClasses[1]),
::testing::Values(backgroundLabelId),
::testing::Values(topK[0]),
::testing::Values(keepTopK[0]),
::testing::ValuesIn(codeType),
::testing::Values(nmsThreshold),
::testing::Values(confidenceThreshold),
::testing::Values(clipAfterNms[0]),
::testing::Values(clipBeforeNms[0]),
::testing::Values(decreaseLabelId[0])
);
/* =============== 3 inputs cases =============== */
const std::vector<ParamsWhichSizeDependsDynamic> specificParams3InDynamic = {
@ -362,9 +389,21 @@ const auto params3InputsDynamic = ::testing::Combine(
::testing::Values(ov::test::utils::DEVICE_GPU)
);
const auto params3InputsDynamic_v8 = ::testing::Combine(
commonAttributes_v8,
::testing::Values(specificParams3InDynamic[0]),
::testing::ValuesIn(numberBatch),
::testing::Values(objectnessScore),
::testing::Values(true),
::testing::Values(ov::test::utils::DEVICE_GPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In, DetectionOutputLayerGPUTest,
params3InputsDynamic,
DetectionOutputLayerGPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputV8Dynamic3In, DetectionOutputLayerGPUTest,
params3InputsDynamic_v8,
DetectionOutputLayerGPUTest::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions