[CPU] Add support 4th and 5th input DetectionOutput (#1290)

* [CPU] Add support 4th and 5th input DetectionOutput

* fix any comments

* move reference to ngraph

* some changes for mx nms

* change namespace for ref impl
This commit is contained in:
Maxim Andronov 2020-08-07 09:05:41 +03:00 committed by GitHub
parent 8c118ef8b2
commit f9023ff7da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1140 additions and 46 deletions

View File

@ -26,7 +26,7 @@ class DetectionOutputImpl: public ExtLayerBase {
public:
explicit DetectionOutputImpl(const CNNLayer* layer) {
try {
if (layer->insData.size() != 3)
if (layer->insData.size() != 3 && layer->insData.size() != 5)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << layer->name;
if (layer->outData.empty())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << layer->name;
@ -50,6 +50,9 @@ public:
_offset = _normalized ? 0 : 1;
_num_loc_classes = _share_location ? 1 : _num_classes;
with_add_box_pred = layer->insData.size() == 5;
_objectness_score = layer->GetParamAsFloat("objectness_score", 0.0f);
std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
_code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
: CodeType::CORNER);
@ -109,9 +112,8 @@ public:
_num_priors_actual = InferenceEngine::make_shared_blob<int>({Precision::I32, num_priors_actual_size, C});
_num_priors_actual->allocate();
addConfig(layer, {DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
std::vector<DataConfigurator> in_data_conf(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
addConfig(layer, in_data_conf, {DataConfigurator(ConfLayout::PLN)});
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
@ -121,51 +123,81 @@ public:
ResponseDesc *resp) noexcept override {
float *dst_data = outputs[0]->buffer();
const float *loc_data = inputs[idx_location]->buffer();
const float *conf_data = inputs[idx_confidence]->buffer();
const float *prior_data = inputs[idx_priors]->buffer();
const float *loc_data = inputs[idx_location]->buffer().as<const float *>();
const float *conf_data = inputs[idx_confidence]->buffer().as<const float *>();
const float *prior_data = inputs[idx_priors]->buffer().as<const float *>();
const float *arm_conf_data = inputs.size() > 3 ? inputs[idx_arm_confidence]->buffer().as<const float *>() : nullptr;
const float *arm_loc_data = inputs.size() > 4 ? inputs[idx_arm_location]->buffer().as<const float *>() : nullptr;
const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0];
float *decoded_bboxes_data = _decoded_bboxes->buffer();
float *reordered_conf_data = _reordered_conf->buffer();
float *bbox_sizes_data = _bbox_sizes->buffer();
int *detections_data = _detections_count->buffer();
int *buffer_data = _buffer->buffer();
int *indices_data = _indices->buffer();
int *num_priors_actual = _num_priors_actual->buffer();
float *decoded_bboxes_data = _decoded_bboxes->buffer().as<float *>();
float *reordered_conf_data = _reordered_conf->buffer().as<float *>();
float *bbox_sizes_data = _bbox_sizes->buffer().as<float *>();
int *detections_data = _detections_count->buffer().as<int *>();
int *buffer_data = _buffer->buffer().as<int *>();
int *indices_data = _indices->buffer().as<int *>();
int *num_priors_actual = _num_priors_actual->buffer().as<int *>();
for (int n = 0; n < N; ++n) {
const float *ppriors = prior_data;
const float *prior_variances = prior_data + _num_priors*_prior_size;
if (_priors_batches) {
ppriors += _variance_encoded_in_target ? n*_num_priors*_prior_size : 2*n*_num_priors*_prior_size;
prior_variances += _variance_encoded_in_target ? 0 : n*_num_priors*_prior_size;
prior_variances += _variance_encoded_in_target ? 0 : 2*n*_num_priors*_prior_size;
}
if (_share_location) {
const float *ploc = loc_data + n*4*_num_priors;
float *pboxes = decoded_bboxes_data + n*4*_num_priors;
float *psizes = bbox_sizes_data + n*_num_priors;
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
if (with_add_box_pred) {
const float *p_arm_loc = arm_loc_data + n*4*_num_priors;
decodeBBoxes(ppriors, p_arm_loc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
decodeBBoxes(pboxes, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, 0, 4, false);
} else {
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
}
} else {
for (int c = 0; c < _num_loc_classes; ++c) {
if (c == _background_label_id) {
continue;
}
const float *ploc = loc_data + n*4*_num_loc_classes*_num_priors + c*4;
float *pboxes = decoded_bboxes_data + n*4*_num_loc_classes*_num_priors + c*4*_num_priors;
float *psizes = bbox_sizes_data + n*_num_loc_classes*_num_priors + c*_num_priors;
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
if (with_add_box_pred) {
const float *p_arm_loc = arm_loc_data + n*4*_num_loc_classes*_num_priors + c*4;
decodeBBoxes(ppriors, p_arm_loc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
decodeBBoxes(pboxes, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, 0, 4, false);
} else {
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
}
}
}
}
for (int n = 0; n < N; ++n) {
for (int c = 0; c < _num_classes; ++c) {
if (with_add_box_pred) {
for (int n = 0; n < N; ++n) {
for (int p = 0; p < _num_priors; ++p) {
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
if (arm_conf_data[n*_num_priors*2 + p * 2 + 1] < _objectness_score) {
for (int c = 0; c < _num_classes; ++c) {
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = c == _background_label_id ? 1.0f : 0.0f;
}
} else {
for (int c = 0; c < _num_classes; ++c) {
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
}
}
}
}
} else {
for (int n = 0; n < N; ++n) {
for (int c = 0; c < _num_classes; ++c) {
for (int p = 0; p < _num_priors; ++p) {
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
}
}
}
}
@ -204,8 +236,8 @@ public:
int *pdetections = detections_data + n*_num_classes;
const float *pconf = reordered_conf_data + n*_num_classes*_num_priors;
const float *pboxes = decoded_bboxes_data + n*4*_num_priors;
const float *psizes = bbox_sizes_data + n*_num_priors;
const float *pboxes = decoded_bboxes_data + n*4*_num_loc_classes*_num_priors;
const float *psizes = bbox_sizes_data + n*_num_loc_classes*_num_priors;
nms_mx(pconf, pboxes, psizes, pbuffer, pindices, pdetections, _num_priors);
}
@ -220,6 +252,7 @@ public:
for (int c = 0; c < _num_classes; ++c) {
int detections = detections_data[n*_num_classes + c];
int *pindices = indices_data + n*_num_classes*_num_priors + c*_num_priors;
float *pconf = reordered_conf_data + n*_num_classes*_num_priors + c*_num_priors;
for (int i = 0; i < detections; ++i) {
@ -310,7 +343,8 @@ private:
const int idx_location = 0;
const int idx_confidence = 1;
const int idx_priors = 2;
const int idx_arm_confidence = 3;
const int idx_arm_location = 4;
int _num_classes = 0;
int _background_label_id = 0;
@ -324,6 +358,8 @@ private:
bool _clip_after_nms = false; // clip bounding boxes after nms step
bool _decrease_label_id = false;
bool with_add_box_pred = false;
int _image_width = 0;
int _image_height = 0;
int _prior_size = 4;
@ -332,6 +368,7 @@ private:
float _nms_threshold = 0.0f;
float _confidence_threshold = 0.0f;
float _objectness_score = 0.0f;
int _num = 0;
int _num_loc_classes = 0;
@ -344,7 +381,8 @@ private:
};
void decodeBBoxes(const float *prior_data, const float *loc_data, const float *variance_data,
float *decoded_bboxes, float *decoded_bbox_sizes, int* num_priors_actual, int n);
float *decoded_bboxes, float *decoded_bbox_sizes, int* num_priors_actual, int n, const int& offs, const int& pr_size,
bool decodeType = true); // after ARM = false
void nms_cf(const float *conf_data, const float *bboxes, const float *sizes,
int *buffer, int *indices, int &detections, int num_priors_actual);
@ -384,8 +422,8 @@ static inline float JaccardOverlap(const float *decoded_bbox,
float xmin2 = decoded_bbox[idx2*4 + 0];
float ymin2 = decoded_bbox[idx2*4 + 1];
float ymax2 = decoded_bbox[idx2*4 + 3];
float xmax2 = decoded_bbox[idx2*4 + 2];
float ymax2 = decoded_bbox[idx2*4 + 3];
if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
return 0.0f;
@ -411,34 +449,36 @@ static inline float JaccardOverlap(const float *decoded_bbox,
}
void DetectionOutputImpl::decodeBBoxes(const float *prior_data,
const float *loc_data,
const float *variance_data,
float *decoded_bboxes,
float *decoded_bbox_sizes,
int* num_priors_actual,
int n) {
const float *loc_data,
const float *variance_data,
float *decoded_bboxes,
float *decoded_bbox_sizes,
int* num_priors_actual,
int n,
const int& offs,
const int& pr_size,
bool decodeType) {
num_priors_actual[n] = _num_priors;
if (!_normalized) {
if (!_normalized && decodeType) {
int num = 0;
for (; num < _num_priors; ++num) {
float batch_id = prior_data[num * _prior_size + 0];
float batch_id = prior_data[num * pr_size + 0];
if (batch_id == -1.f) {
num_priors_actual[n] = num;
break;
}
}
}
parallel_for(num_priors_actual[n], [&](int p) {
float new_xmin = 0.0f;
float new_ymin = 0.0f;
float new_xmax = 0.0f;
float new_ymax = 0.0f;
float prior_xmin = prior_data[p*_prior_size + 0 + _offset];
float prior_ymin = prior_data[p*_prior_size + 1 + _offset];
float prior_xmax = prior_data[p*_prior_size + 2 + _offset];
float prior_ymax = prior_data[p*_prior_size + 3 + _offset];
float prior_xmin = prior_data[p*pr_size + 0 + offs];
float prior_ymin = prior_data[p*pr_size + 1 + offs];
float prior_xmax = prior_data[p*pr_size + 2 + offs];
float prior_ymax = prior_data[p*pr_size + 3 + offs];
float loc_xmin = loc_data[4*p*_num_loc_classes + 0];
float loc_ymin = loc_data[4*p*_num_loc_classes + 1];
@ -591,7 +631,12 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
bool keep = true;
for (int k = 0; k < ndetection; ++k) {
const int kept_idx = pindices[k];
float overlap = JaccardOverlap(bboxes, sizes, prior, kept_idx);
float overlap = 0.0f;
if (_share_location) {
overlap = JaccardOverlap(bboxes, sizes, prior, kept_idx);
} else {
overlap = JaccardOverlap(bboxes, sizes, cls*_num_priors + prior, cls*_num_priors + kept_idx);
}
if (overlap > _nms_threshold) {
keep = false;
break;

View File

@ -0,0 +1,85 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/detection_output.hpp"
using namespace LayerTestsDefinitions;
namespace {
const int numClasses = 11;
const int backgroundLabelId = 0;
const std::vector<int> topK = {75};
const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
const std::vector<std::string> codeType = {"caffe.PriorBoxParameter.CORNER", "caffe.PriorBoxParameter.CENTER_SIZE"};
const float nmsThreshold = 0.5f;
const float confidenceThreshold = 0.3f;
const std::vector<bool> clipAfterNms = {true, false};
const std::vector<bool> clipBeforeNms = {true, false};
const std::vector<bool> decreaseLabelId = {true, false};
const float objectnessScore = 0.4f;
const std::vector<size_t> numberBatch = {1, 2};
const auto commonAttributes = ::testing::Combine(
::testing::Values(numClasses),
::testing::Values(backgroundLabelId),
::testing::ValuesIn(topK),
::testing::ValuesIn(keepTopK),
::testing::ValuesIn(codeType),
::testing::Values(nmsThreshold),
::testing::Values(confidenceThreshold),
::testing::ValuesIn(clipAfterNms),
::testing::ValuesIn(clipBeforeNms),
::testing::ValuesIn(decreaseLabelId)
);
/* =============== 3 inputs cases =============== */
const std::vector<ParamsWhichSizeDepends> specificParams3In = {
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {}, {}},
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {}, {}},
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {}, {}},
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {}, {}},
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {}, {}},
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {}, {}},
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {}, {}},
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {}, {}}
};
const auto params3Inputs = ::testing::Combine(
commonAttributes,
::testing::ValuesIn(specificParams3In),
::testing::ValuesIn(numberBatch),
::testing::Values(0.0f),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
/* =============== 5 inputs cases =============== */
const std::vector<ParamsWhichSizeDepends> specificParams5In = {
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 660}}
};
const auto params5Inputs = ::testing::Combine(
commonAttributes,
::testing::ValuesIn(specificParams5In),
::testing::ValuesIn(numberBatch),
::testing::Values(objectnessScore),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
} // namespace

View File

@ -82,18 +82,18 @@ class ActivationLayerTest : public testing::WithParamInterface<activationParams>
public:
ngraph::helpers::ActivationTypes activationType;
static std::string getTestCaseName(const testing::TestParamInfo<activationParams> &obj);
virtual InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const;
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
protected:
void SetUp();
void SetUp() override;
};
class ActivationParamLayerTest : public ActivationLayerTest {
public:
void Infer();
void Infer() override;
protected:
void SetUp();
void SetUp() override;
private:
void generateActivationBlob();

View File

@ -0,0 +1,71 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstddef>
#include <vector>
#include <string>
#include <tuple>
#include "ngraph/op/detection_output.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
namespace LayerTestsDefinitions {
enum {
idxLocation,
idxConfidence,
idxPriors,
idxArmConfidence,
idxArmLocation,
numInputs
};
using DetectionOutputAttributes = std::tuple<
int, // numClasses
int, // backgroundLabelId
int, // topK
std::vector<int>, // keepTopK
std::string, // codeType
float, // nmsThreshold
float, // confidenceThreshold
bool, // clip_afterNms
bool, // clip_beforeNms
bool // decreaseLabelId
>;
using ParamsWhichSizeDepends = std::tuple<
bool, // varianceEncodedInTarget
bool, // shareLocation
bool, // normalized
size_t, // inputHeight
size_t, // inputWidth
InferenceEngine::SizeVector, // "Location" input
InferenceEngine::SizeVector, // "Confidence" input
InferenceEngine::SizeVector, // "Priors" input
InferenceEngine::SizeVector, // "ArmConfidence" input
InferenceEngine::SizeVector // "ArmLocation" input
>;
using DetectionOutputParams = std::tuple<
DetectionOutputAttributes,
ParamsWhichSizeDepends,
size_t, // Number of batch
float, // objectnessScore
std::string // Device name
>;
class DetectionOutputLayerTest : public testing::WithParamInterface<DetectionOutputParams>, public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<DetectionOutputParams> obj);
ngraph::op::DetectionOutputAttrs attrs;
std::vector<InferenceEngine::SizeVector> inShapes;
void Infer() override;
void Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual) override;
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -26,7 +26,7 @@ class RangeLayerTest : public testing::WithParamInterface<RangeParams>,
float start, stop, step;
public:
static std::string getTestCaseName(testing::TestParamInfo<RangeParams> obj);
void Infer();
void Infer() override;
protected:
void SetUp() override;

View File

@ -0,0 +1,164 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <vector>
#include "ngraph_functions/builders.hpp"
#include "common_test_utils/data_utils.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include "single_layer_tests/detection_output.hpp"
namespace LayerTestsDefinitions {
std::string DetectionOutputLayerTest::getTestCaseName(testing::TestParamInfo<DetectionOutputParams> obj) {
DetectionOutputAttributes commonAttrs;
ParamsWhichSizeDepends specificAttrs;
ngraph::op::DetectionOutputAttrs attrs;
size_t batch;
std::string targetDevice;
std::tie(commonAttrs, specificAttrs, batch, attrs.objectness_score, targetDevice) = obj.param;
std::tie(attrs.num_classes, attrs.background_label_id, attrs.top_k, attrs.keep_top_k, attrs.code_type, attrs.nms_threshold, attrs.confidence_threshold,
attrs.clip_after_nms, attrs.clip_before_nms, attrs.decrease_label_id) = commonAttrs;
const size_t numInputs = 5;
std::vector<InferenceEngine::SizeVector> inShapes(numInputs);
std::tie(attrs.variance_encoded_in_target, attrs.share_location, attrs.normalized, attrs.input_height, attrs.input_width,
inShapes[idxLocation], inShapes[idxConfidence], inShapes[idxPriors], inShapes[idxArmConfidence], inShapes[idxArmLocation]) = specificAttrs;
if (inShapes[idxArmConfidence].empty()) {
inShapes.resize(3);
}
for (size_t i = 0; i < inShapes.size(); i++) {
inShapes[i][0] = batch;
}
std::ostringstream result;
result << "IS = { ";
result << "LOC=" << CommonTestUtils::vec2str(inShapes[0]) << "_";
result << "CONF=" << CommonTestUtils::vec2str(inShapes[1]) << "_";
result << "PRIOR=" << CommonTestUtils::vec2str(inShapes[2]);
std::string armConf, armLoc;
if (inShapes.size() > 3) {
armConf = "_ARM_CONF=" + CommonTestUtils::vec2str(inShapes[3]) + "_";
armLoc = "ARM_LOC=" + CommonTestUtils::vec2str(inShapes[4]);
}
result << armConf;
result << armLoc << " }_";
result << "Classes=" << attrs.num_classes << "_";
result << "backgrId=" << attrs.background_label_id << "_";
result << "topK=" << attrs.top_k << "_";
result << "varEnc=" << attrs.variance_encoded_in_target << "_";
result << "keepTopK=" << CommonTestUtils::vec2str(attrs.keep_top_k) << "_";
result << "codeType=" << attrs.code_type << "_";
result << "shareLoc=" << attrs.share_location << "_";
result << "nmsThr=" << attrs.nms_threshold << "_";
result << "confThr=" << attrs.confidence_threshold << "_";
result << "clipAfterNms=" << attrs.clip_after_nms << "_";
result << "clipBeforeNms=" << attrs.clip_before_nms << "_";
result << "decrId=" << attrs.decrease_label_id << "_";
result << "norm=" << attrs.normalized << "_";
result << "inH=" << attrs.input_height << "_";
result << "inW=" << attrs.input_width << "_";
result << "OS=" << attrs.objectness_score << "_";
result << "TargetDevice=" << targetDevice;
return result.str();
}
void DetectionOutputLayerTest::Infer() {
inferRequest = executableNetwork.CreateInferRequest();
inputs.clear();
size_t it = 0;
for (const auto &input : cnnNetwork.getInputsInfo()) {
const auto &info = input.second;
InferenceEngine::Blob::Ptr blob;
int32_t resolution = 1;
uint32_t range = 1;
if (it == 2) {
if (attrs.normalized) {
resolution = 100;
} else {
range = 10;
}
} else if (it == 1 || it == 3) {
resolution = 1000;
} else {
resolution = 10;
}
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
CommonTestUtils::fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, 0, resolution);
inferRequest.SetBlob(info->name(), blob);
inputs.push_back(blob);
it++;
}
inferRequest.Infer();
}
void DetectionOutputLayerTest::Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual) {
ASSERT_EQ(expected.size(), actual->byteSize());
size_t expSize = 0;
size_t actSize = 0;
const auto &expectedBuffer = expected.data();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const std::uint8_t *>();
const float *expBuf = reinterpret_cast<const float *>(expectedBuffer);
const float *actBuf = reinterpret_cast<const float *>(actualBuffer);
for (size_t i = 0; i < actual->size(); i+=7) {
if (expBuf[i] == -1)
break;
expSize += 7;
}
for (size_t i = 0; i < actual->size(); i+=7) {
if (actBuf[i] == -1)
break;
actSize += 7;
}
ASSERT_EQ(expSize, actSize);
LayerTestsCommon::Compare<float>(expBuf, actBuf, expSize, 1e-2f);
}
void DetectionOutputLayerTest::SetUp() {
DetectionOutputAttributes commonAttrs;
ParamsWhichSizeDepends specificAttrs;
size_t batch;
std::tie(commonAttrs, specificAttrs, batch, attrs.objectness_score, targetDevice) = this->GetParam();
std::tie(attrs.num_classes, attrs.background_label_id, attrs.top_k, attrs.keep_top_k, attrs.code_type, attrs.nms_threshold, attrs.confidence_threshold,
attrs.clip_after_nms, attrs.clip_before_nms, attrs.decrease_label_id) = commonAttrs;
inShapes.resize(numInputs);
std::tie(attrs.variance_encoded_in_target, attrs.share_location, attrs.normalized, attrs.input_height, attrs.input_width,
inShapes[idxLocation], inShapes[idxConfidence], inShapes[idxPriors], inShapes[idxArmConfidence], inShapes[idxArmLocation]) = specificAttrs;
if (inShapes[idxArmConfidence].empty()) {
inShapes.resize(3);
}
for (size_t i = 0; i < inShapes.size(); i++) {
inShapes[i][0] = batch;
}
auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutput");
}
TEST_P(DetectionOutputLayerTest, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -96,7 +96,7 @@ protected:
void LoadNetwork();
void Infer();
virtual void Infer();
TargetDevice targetDevice;
std::shared_ptr<ngraph::Function> function;

View File

@ -314,5 +314,8 @@ std::shared_ptr<ngraph::Node> makeLogical(const ngraph::Output<Node> &in0,
const ngraph::Output<Node> &in1,
ngraph::helpers::LogicalTypes logicalType);
std::shared_ptr<ngraph::Node> makeDetectionOutput(const ngraph::OutputVector &inputs,
const ngraph::op::DetectionOutputAttrs& attrs);
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,21 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/builders.hpp"
namespace ngraph {
namespace builder {
std::shared_ptr<ngraph::Node> makeDetectionOutput(const ngraph::OutputVector &inputs,
const ngraph::op::DetectionOutputAttrs& attrs) {
if (inputs.size() == 3)
return std::make_shared<ngraph::opset3::DetectionOutput>(inputs[0], inputs[1], inputs[2], attrs);
else if (inputs.size() == 5)
return std::make_shared<ngraph::opset3::DetectionOutput>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], attrs);
else
throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs");
}
} // namespace builder
} // namespace ngraph

View File

@ -92,6 +92,8 @@
#include "op/convolution.hpp"
#include "op/group_conv.hpp"
#include "reference/detection_output.hpp"
namespace ngraph
{
namespace runtime
@ -1112,6 +1114,36 @@ protected:
}
break;
}
case OP_TYPEID::DetectionOutput_v0:
{
const op::DetectionOutput* detOut = static_cast<const op::DetectionOutput*>(&node);
reference::referenceDetectionOutput<T> refDetOut(
detOut->get_attrs(), node.get_input_shape(0), node.get_input_shape(2));
if (node.get_input_size() == 3)
{
refDetOut.run(args[0]->get_data_ptr<const T>(),
args[1]->get_data_ptr<const T>(),
args[2]->get_data_ptr<const T>(),
nullptr,
nullptr,
out[0]->get_data_ptr<T>());
}
else if (node.get_input_size() == 5)
{
refDetOut.run(args[0]->get_data_ptr<const T>(),
args[1]->get_data_ptr<const T>(),
args[2]->get_data_ptr<const T>(),
args[3]->get_data_ptr<const T>(),
args[4]->get_data_ptr<const T>(),
out[0]->get_data_ptr<T>());
}
else
{
throw ngraph_error("DetectionOutput layer supports only 3 or 5 inputs");
}
break;
}
// Fused Ops are not supported in interpreter. They need to be decomposed before execution
case OP_TYPEID::DepthToSpace:

View File

@ -18,6 +18,10 @@
#include "opset0_tbl.hpp"
#undef ID_SUFFIX
#define ID_SUFFIX(NAME) NAME##_v0
NGRAPH_OP(DetectionOutput, op::v0)
#undef ID_SUFFIX
#define ID_SUFFIX(NAME) NAME##_v1
NGRAPH_OP(LessEqual, op::v1)
NGRAPH_OP(LogicalAnd, op::v1)

View File

@ -0,0 +1,669 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstddef>
#include <map>
#include <string>
#include <vector>
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace reference
{
enum
{
idxLocation,
idxConfidence,
idxPriors,
idxArmConfidence,
idxArmLocation,
numInputs
};
template <typename dataType>
class referenceDetectionOutput
{
private:
struct NormalizedBBox
{
dataType xmin = 0;
dataType ymin = 0;
dataType xmax = 0;
dataType ymax = 0;
dataType size = 0;
};
using LabelBBox = std::map<int, std::vector<NormalizedBBox>>;
ngraph::op::DetectionOutputAttrs attrs;
size_t numImages;
size_t priorSize;
size_t numPriors;
size_t numLocClasses;
size_t offset;
void GetLocPredictions(const dataType* locData, std::vector<LabelBBox>& locations)
{
locations.resize(numImages);
for (size_t i = 0; i < numImages; ++i)
{
LabelBBox& labelBbox = locations[i];
for (size_t p = 0; p < numPriors; ++p)
{
size_t startIdx = p * numLocClasses * 4;
for (size_t c = 0; c < numLocClasses; ++c)
{
int label = attrs.share_location ? -1 : c;
if (labelBbox.find(label) == labelBbox.end())
{
labelBbox[label].resize(numPriors);
}
labelBbox[label][p].xmin = locData[startIdx + c * 4];
labelBbox[label][p].ymin = locData[startIdx + c * 4 + 1];
labelBbox[label][p].xmax = locData[startIdx + c * 4 + 2];
labelBbox[label][p].ymax = locData[startIdx + c * 4 + 3];
}
}
locData += numPriors * numLocClasses * 4;
}
}
void GetConfidenceScores(
const dataType* confData,
std::vector<std::map<int, std::vector<dataType>>>& confPreds)
{
confPreds.resize(numImages);
for (int i = 0; i < numImages; ++i)
{
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
for (int p = 0; p < numPriors; ++p)
{
int startIdx = p * attrs.num_classes;
for (int c = 0; c < attrs.num_classes; ++c)
{
labelScores[c].push_back(confData[startIdx + c]);
}
}
confData += numPriors * attrs.num_classes;
}
}
void OSGetConfidenceScores(
const dataType* confData,
const dataType* armConfData,
std::vector<std::map<int, std::vector<dataType>>>& confPreds)
{
confPreds.resize(numImages);
for (int i = 0; i < numImages; ++i)
{
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
for (int p = 0; p < numPriors; ++p)
{
int startIdx = p * attrs.num_classes;
if (armConfData[p * 2 + 1] < attrs.objectness_score)
{
for (int c = 0; c < attrs.num_classes; ++c)
{
c == attrs.background_label_id ? labelScores[c].push_back(1)
: labelScores[c].push_back(0);
}
}
else
{
for (int c = 0; c < attrs.num_classes; ++c)
{
labelScores[c].push_back(confData[startIdx + c]);
}
}
}
confData += numPriors * attrs.num_classes;
armConfData += numPriors * 2;
}
}
dataType BBoxSize(const NormalizedBBox& bbox)
{
if (bbox.xmax < bbox.xmin || bbox.ymax < bbox.ymin)
{
return 0;
}
else
{
dataType width = bbox.xmax - bbox.xmin;
dataType height = bbox.ymax - bbox.ymin;
return width * height;
}
}
void GetPriorBBoxes(const dataType* priorData,
std::vector<std::vector<NormalizedBBox>>& priorBboxes,
std::vector<std::vector<std::vector<dataType>>>& priorVariances)
{
priorBboxes.resize(numImages);
priorVariances.resize(numImages);
for (int n = 0; n < numImages; n++)
{
priorData += attrs.variance_encoded_in_target
? n * numPriors * priorSize
: 2 * n * numPriors * priorSize;
std::vector<NormalizedBBox>& currPrBbox = priorBboxes[n];
std::vector<std::vector<dataType>>& currPrVar = priorVariances[n];
for (int i = 0; i < numPriors; ++i)
{
int start_idx = i * priorSize;
NormalizedBBox bbox;
bbox.xmin = priorData[start_idx + 0 + offset];
bbox.ymin = priorData[start_idx + 1 + offset];
bbox.xmax = priorData[start_idx + 2 + offset];
bbox.ymax = priorData[start_idx + 3 + offset];
dataType bbox_size = BBoxSize(bbox);
bbox.size = bbox_size;
currPrBbox.push_back(bbox);
}
if (!attrs.variance_encoded_in_target)
{
const dataType* priorVar = priorData + numPriors * priorSize;
for (int i = 0; i < numPriors; ++i)
{
int start_idx = i * 4;
std::vector<dataType> var;
for (int j = 0; j < 4; ++j)
{
var.push_back(priorVar[start_idx + j]);
}
currPrVar.push_back(var);
}
}
}
}
void DecodeBBox(const NormalizedBBox& priorBboxes,
const std::vector<dataType>& priorVariances,
const NormalizedBBox& bbox,
NormalizedBBox& decodeBbox)
{
dataType priorXmin = priorBboxes.xmin;
dataType priorYmin = priorBboxes.ymin;
dataType priorXmax = priorBboxes.xmax;
dataType priorYmax = priorBboxes.ymax;
if (!attrs.normalized)
{
priorXmin /= attrs.input_width;
priorYmin /= attrs.input_height;
priorXmax /= attrs.input_width;
priorYmax /= attrs.input_height;
}
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER")
{
if (attrs.variance_encoded_in_target)
{
decodeBbox.xmin = priorXmin + bbox.xmin;
decodeBbox.ymin = priorYmin + bbox.ymin;
decodeBbox.xmax = priorXmax + bbox.xmax;
decodeBbox.ymax = priorYmax + bbox.ymax;
}
else
{
decodeBbox.xmin = priorXmin + priorVariances[0] * bbox.xmin;
decodeBbox.ymin = priorYmin + priorVariances[1] * bbox.ymin;
decodeBbox.xmax = priorXmax + priorVariances[2] * bbox.xmax;
decodeBbox.ymax = priorYmax + priorVariances[3] * bbox.ymax;
}
}
else if (attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE")
{
dataType priorWidth = priorXmax - priorXmin;
dataType priorHeight = priorYmax - priorYmin;
dataType priorCenterX = (priorXmin + priorXmax) / 2;
dataType priorCenterY = (priorYmin + priorYmax) / 2;
dataType decodeBboxCenterX, decodeBboxCenterY;
dataType decodeBboxWidth, decodeBboxHeight;
if (attrs.variance_encoded_in_target)
{
decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
decodeBboxWidth = std::exp(bbox.xmax) * priorWidth;
decodeBboxHeight = std::exp(bbox.ymax) * priorHeight;
}
else
{
decodeBboxCenterX =
priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
decodeBboxCenterY =
priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
decodeBboxWidth = std::exp(priorVariances[2] * bbox.xmax) * priorWidth;
decodeBboxHeight =
std::exp(priorVariances[3] * bbox.ymax) * priorHeight;
}
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
decodeBbox.ymax = decodeBboxCenterY + decodeBboxHeight / 2;
}
if (attrs.clip_before_nms)
{
decodeBbox.xmin =
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.xmin));
decodeBbox.ymin =
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.ymin));
decodeBbox.xmax =
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.xmax));
decodeBbox.ymax =
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.ymax));
}
dataType bboxSize = BBoxSize(decodeBbox);
decodeBbox.size = bboxSize;
}
void DecodeBBoxes(const std::vector<NormalizedBBox>& priorBboxes,
const std::vector<std::vector<dataType>>& priorVariances,
const std::vector<NormalizedBBox>& labelLocPreds,
std::vector<NormalizedBBox>& decodeBboxes)
{
int numBboxes = priorBboxes.size();
for (int i = 0; i < numBboxes; ++i)
{
NormalizedBBox decodeBbox;
DecodeBBox(priorBboxes[i], priorVariances[i], labelLocPreds[i], decodeBbox);
decodeBboxes.push_back(decodeBbox);
}
}
void DecodeBBoxesAll(
const std::vector<LabelBBox>& locPreds,
const std::vector<std::vector<NormalizedBBox>>& priorBboxes,
const std::vector<std::vector<std::vector<dataType>>>& priorVariances,
std::vector<LabelBBox>& decodeBboxes)
{
decodeBboxes.resize(numImages);
for (int i = 0; i < numImages; ++i)
{
LabelBBox& decodeBboxesImage = decodeBboxes[i];
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
for (int c = 0; c < numLocClasses; ++c)
{
int label = attrs.share_location ? -1 : c;
if (label == attrs.background_label_id)
{
continue;
}
const std::vector<NormalizedBBox>& labelLocPreds =
locPreds[i].find(label)->second;
DecodeBBoxes(
currPrBbox, currPrVar, labelLocPreds, decodeBboxesImage[label]);
}
}
}
void CasRegDecodeBBoxesAll(
const std::vector<LabelBBox>& locPreds,
const std::vector<std::vector<NormalizedBBox>>& priorBboxes,
const std::vector<std::vector<std::vector<dataType>>>& priorVariances,
std::vector<LabelBBox>& decodeBboxes,
const std::vector<LabelBBox>& armLocPreds)
{
decodeBboxes.resize(numImages);
for (int i = 0; i < numImages; ++i)
{
LabelBBox& decodeBboxesImage = decodeBboxes[i];
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
for (int c = 0; c < numLocClasses; ++c)
{
int label = attrs.share_location ? -1 : c;
if (label == attrs.background_label_id)
{
continue;
}
const std::vector<NormalizedBBox>& labelArmLocPreds =
armLocPreds[i].find(label)->second;
std::vector<NormalizedBBox> decodePriorBboxes;
DecodeBBoxes(
currPrBbox, currPrVar, labelArmLocPreds, decodePriorBboxes);
const std::vector<NormalizedBBox>& labelLocPreds =
locPreds[i].find(label)->second;
DecodeBBoxes(decodePriorBboxes,
currPrVar,
labelLocPreds,
decodeBboxesImage[label]);
}
}
}
template <typename T>
static bool SortScorePairDescend(const std::pair<dataType, T>& pair1,
const std::pair<dataType, T>& pair2)
{
return pair1.first > pair2.first;
}
void GetMaxScoreIndex(const std::vector<dataType>& scores,
const dataType threshold,
const int topK,
std::vector<std::pair<dataType, int>>& scoreIndexVec)
{
for (int i = 0; i < scores.size(); ++i)
{
if (scores[i] > threshold)
{
scoreIndexVec.push_back(std::make_pair(scores[i], i));
}
}
std::stable_sort(
scoreIndexVec.begin(), scoreIndexVec.end(), SortScorePairDescend<int>);
if (topK > -1 && topK < scoreIndexVec.size())
{
scoreIndexVec.resize(topK);
}
}
void IntersectBBox(const NormalizedBBox& bbox1,
const NormalizedBBox& bbox2,
NormalizedBBox& intersectBbox)
{
if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin ||
bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin)
{
intersectBbox.xmin = 0;
intersectBbox.ymin = 0;
intersectBbox.xmax = 0;
intersectBbox.ymax = 0;
}
else
{
intersectBbox.xmin = std::max<dataType>(bbox1.xmin, bbox2.xmin);
intersectBbox.ymin = std::max<dataType>(bbox1.ymin, bbox2.ymin);
intersectBbox.xmax = std::min<dataType>(bbox1.xmax, bbox2.xmax);
intersectBbox.ymax = std::min<dataType>(bbox1.ymax, bbox2.ymax);
}
}
dataType JaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2)
{
NormalizedBBox intersectBbox;
IntersectBBox(bbox1, bbox2, intersectBbox);
dataType intersectWidth, intersectHeight;
intersectWidth = intersectBbox.xmax - intersectBbox.xmin;
intersectHeight = intersectBbox.ymax - intersectBbox.ymin;
if (intersectWidth > 0 && intersectHeight > 0)
{
dataType intersect_size = intersectWidth * intersectHeight;
dataType bbox1_size = BBoxSize(bbox1);
dataType bbox2_size = BBoxSize(bbox2);
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
}
else
{
return 0.0f;
}
}
void caffeNMS(const std::vector<NormalizedBBox>& bboxes,
const std::vector<dataType>& scores,
std::vector<int>& indices)
{
std::vector<std::pair<dataType, int>> scoreIndexVec;
GetMaxScoreIndex(
scores, attrs.confidence_threshold, attrs.top_k, scoreIndexVec);
while (scoreIndexVec.size() != 0)
{
const int idx = scoreIndexVec.front().second;
bool keep = true;
for (int k = 0; k < indices.size(); ++k)
{
const int kept_idx = indices[k];
dataType overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
if (overlap > attrs.nms_threshold)
{
keep = false;
break;
}
}
if (keep)
{
indices.push_back(idx);
}
scoreIndexVec.erase(scoreIndexVec.begin());
}
}
void mxNetNms(const LabelBBox& decodeBboxesImage,
const std::map<int, std::vector<dataType>>& confScores,
std::map<int, std::vector<int>>& indices)
{
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
for (int p = 0; p < numPriors; p++)
{
dataType conf = -1;
int id = 0;
for (int c = 1; c < attrs.num_classes; c++)
{
dataType temp = confScores.at(c)[p];
if (temp > conf)
{
conf = temp;
id = c;
}
}
if (id > 0 && conf >= attrs.confidence_threshold)
{
scoreIndexPairs.push_back(std::make_pair(conf, std::make_pair(id, p)));
}
}
std::sort(scoreIndexPairs.begin(),
scoreIndexPairs.end(),
SortScorePairDescend<std::pair<int, int>>);
if (attrs.top_k != -1)
if (scoreIndexPairs.size() > attrs.top_k)
scoreIndexPairs.resize(attrs.top_k);
while (scoreIndexPairs.size() != 0)
{
const int cls = scoreIndexPairs.front().second.first;
const int prior = scoreIndexPairs.front().second.second;
std::vector<int>& currInd = indices[cls];
bool keep = true;
for (int i = 0; i < currInd.size(); i++)
{
const int keptIdx = currInd[i];
auto currBbox = attrs.share_location ? decodeBboxesImage.at(-1)
: decodeBboxesImage.at(cls);
dataType overlap = JaccardOverlap(currBbox[prior], currBbox[keptIdx]);
if (overlap > attrs.nms_threshold)
{
keep = false;
break;
}
}
if (keep)
{
currInd.push_back(prior);
}
scoreIndexPairs.erase(scoreIndexPairs.begin());
}
}
public:
referenceDetectionOutput(const ngraph::op::DetectionOutputAttrs& _attrs,
const ngraph::Shape& locShape,
const ngraph::Shape& priorsShape)
: attrs(_attrs)
{
numImages = locShape[0];
priorSize = _attrs.normalized ? 4 : 5;
offset = _attrs.normalized ? 0 : 1;
numPriors = priorsShape[2] / priorSize;
numLocClasses =
_attrs.share_location ? 1 : static_cast<size_t>(_attrs.num_classes);
}
void run(const dataType* _location,
const dataType* _confidence,
const dataType* _priors,
const dataType* _armConfidence,
const dataType* _armLocation,
dataType* result)
{
bool withAddBoxPred = _armConfidence != nullptr && _armLocation != nullptr;
std::vector<LabelBBox> armLocPreds;
if (withAddBoxPred)
{
GetLocPredictions(_armLocation, armLocPreds);
}
std::vector<LabelBBox> locPreds;
GetLocPredictions(_location, locPreds);
std::vector<std::map<int, std::vector<dataType>>> confPreds;
if (withAddBoxPred)
{
OSGetConfidenceScores(_confidence, _armConfidence, confPreds);
}
else
{
GetConfidenceScores(_confidence, confPreds);
}
std::vector<std::vector<NormalizedBBox>> priorBboxes;
std::vector<std::vector<std::vector<dataType>>> priorVariances;
GetPriorBBoxes(_priors, priorBboxes, priorVariances);
std::vector<LabelBBox> decodeBboxes;
if (withAddBoxPred)
{
CasRegDecodeBBoxesAll(
locPreds, priorBboxes, priorVariances, decodeBboxes, armLocPreds);
}
else
{
DecodeBBoxesAll(locPreds, priorBboxes, priorVariances, decodeBboxes);
}
int numKept = 0;
std::vector<std::map<int, std::vector<int>>> allIndices;
for (int i = 0; i < numImages; ++i)
{
const LabelBBox& decodeBboxesImage = decodeBboxes[i];
const std::map<int, std::vector<dataType>>& confScores = confPreds[i];
std::map<int, std::vector<int>> indices;
int numDet = 0;
if (!attrs.decrease_label_id)
{
// Caffe style
for (int c = 0; c < attrs.num_classes; ++c)
{
if (c == attrs.background_label_id)
{
continue;
}
const std::vector<dataType>& scores = confScores.find(c)->second;
int label = attrs.share_location ? -1 : c;
const std::vector<NormalizedBBox>& bboxes =
decodeBboxesImage.find(label)->second;
caffeNMS(bboxes, scores, indices[c]);
numDet += indices[c].size();
}
}
else
{
// MXNet style
mxNetNms(decodeBboxesImage, confScores, indices);
for (auto it = indices.begin(); it != indices.end(); it++)
numDet += it->second.size();
}
if (attrs.keep_top_k[0] > -1 && numDet > attrs.keep_top_k[0])
{
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
for (auto it = indices.begin(); it != indices.end(); ++it)
{
int label = it->first;
const std::vector<int>& labelIndices = it->second;
const std::vector<dataType>& scores =
confScores.find(label)->second;
for (int j = 0; j < labelIndices.size(); ++j)
{
int idx = labelIndices[j];
scoreIndexPairs.push_back(
std::make_pair(scores[idx], std::make_pair(label, idx)));
}
}
std::sort(scoreIndexPairs.begin(),
scoreIndexPairs.end(),
SortScorePairDescend<std::pair<int, int>>);
scoreIndexPairs.resize(attrs.keep_top_k[0]);
std::map<int, std::vector<int>> newIndices;
for (int j = 0; j < scoreIndexPairs.size(); ++j)
{
int label = scoreIndexPairs[j].second.first;
int idx = scoreIndexPairs[j].second.second;
newIndices[label].push_back(idx);
}
allIndices.push_back(newIndices);
numKept += attrs.top_k;
}
else
{
allIndices.push_back(indices);
numKept += numDet;
}
}
int count = 0;
for (int i = 0; i < numImages; ++i)
{
const std::map<int, std::vector<dataType>>& confScores = confPreds[i];
const LabelBBox& decodeBboxesImage = decodeBboxes[i];
for (auto it = allIndices[i].begin(); it != allIndices[i].end(); ++it)
{
int label = it->first;
const std::vector<dataType>& scores = confScores.find(label)->second;
int loc_label = attrs.share_location ? -1 : label;
const std::vector<NormalizedBBox>& bboxes =
decodeBboxesImage.find(loc_label)->second;
std::vector<int>& indices = it->second;
for (int j = 0; j < indices.size(); ++j)
{
int idx = indices[j];
result[count * 7 + 0] = i;
result[count * 7 + 1] =
attrs.decrease_label_id ? (label - 1) : label;
result[count * 7 + 2] = scores[idx];
const NormalizedBBox& bbox = bboxes[idx];
dataType xmin = bbox.xmin;
dataType ymin = bbox.ymin;
dataType xmax = bbox.xmax;
dataType ymax = bbox.ymax;
if (attrs.clip_after_nms)
{
xmin = std::max<dataType>(0, std::min<dataType>(1, xmin));
ymin = std::max<dataType>(0, std::min<dataType>(1, ymin));
xmax = std::max<dataType>(0, std::min<dataType>(1, xmax));
ymax = std::max<dataType>(0, std::min<dataType>(1, ymax));
}
result[count * 7 + 3] = xmin;
result[count * 7 + 4] = ymin;
result[count * 7 + 5] = xmax;
result[count * 7 + 6] = ymax;
++count;
}
}
}
if (count < numImages * attrs.keep_top_k[0])
{
result[count * 7 + 0] = -1;
}
}
};
} // namespace reference
} // namespace runtime
} // namespace ngraph