[CPU] Add support 4th and 5th input DetectionOutput (#1290)
* [CPU] Add support 4th and 5th input DetectionOutput * fix any comments * move reference to ngraph * some changes for mx nms * change namespace for ref impl
This commit is contained in:
parent
8c118ef8b2
commit
f9023ff7da
@ -26,7 +26,7 @@ class DetectionOutputImpl: public ExtLayerBase {
|
||||
public:
|
||||
explicit DetectionOutputImpl(const CNNLayer* layer) {
|
||||
try {
|
||||
if (layer->insData.size() != 3)
|
||||
if (layer->insData.size() != 3 && layer->insData.size() != 5)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << layer->name;
|
||||
if (layer->outData.empty())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << layer->name;
|
||||
@ -50,6 +50,9 @@ public:
|
||||
_offset = _normalized ? 0 : 1;
|
||||
_num_loc_classes = _share_location ? 1 : _num_classes;
|
||||
|
||||
with_add_box_pred = layer->insData.size() == 5;
|
||||
_objectness_score = layer->GetParamAsFloat("objectness_score", 0.0f);
|
||||
|
||||
std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
|
||||
_code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
|
||||
: CodeType::CORNER);
|
||||
@ -109,9 +112,8 @@ public:
|
||||
_num_priors_actual = InferenceEngine::make_shared_blob<int>({Precision::I32, num_priors_actual_size, C});
|
||||
_num_priors_actual->allocate();
|
||||
|
||||
addConfig(layer, {DataConfigurator(ConfLayout::PLN),
|
||||
DataConfigurator(ConfLayout::PLN),
|
||||
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
|
||||
std::vector<DataConfigurator> in_data_conf(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
|
||||
addConfig(layer, in_data_conf, {DataConfigurator(ConfLayout::PLN)});
|
||||
} catch (InferenceEngine::details::InferenceEngineException &ex) {
|
||||
errorMsg = ex.what();
|
||||
}
|
||||
@ -121,51 +123,81 @@ public:
|
||||
ResponseDesc *resp) noexcept override {
|
||||
float *dst_data = outputs[0]->buffer();
|
||||
|
||||
const float *loc_data = inputs[idx_location]->buffer();
|
||||
const float *conf_data = inputs[idx_confidence]->buffer();
|
||||
const float *prior_data = inputs[idx_priors]->buffer();
|
||||
const float *loc_data = inputs[idx_location]->buffer().as<const float *>();
|
||||
const float *conf_data = inputs[idx_confidence]->buffer().as<const float *>();
|
||||
const float *prior_data = inputs[idx_priors]->buffer().as<const float *>();
|
||||
const float *arm_conf_data = inputs.size() > 3 ? inputs[idx_arm_confidence]->buffer().as<const float *>() : nullptr;
|
||||
const float *arm_loc_data = inputs.size() > 4 ? inputs[idx_arm_location]->buffer().as<const float *>() : nullptr;
|
||||
|
||||
const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0];
|
||||
|
||||
float *decoded_bboxes_data = _decoded_bboxes->buffer();
|
||||
float *reordered_conf_data = _reordered_conf->buffer();
|
||||
float *bbox_sizes_data = _bbox_sizes->buffer();
|
||||
int *detections_data = _detections_count->buffer();
|
||||
int *buffer_data = _buffer->buffer();
|
||||
int *indices_data = _indices->buffer();
|
||||
int *num_priors_actual = _num_priors_actual->buffer();
|
||||
float *decoded_bboxes_data = _decoded_bboxes->buffer().as<float *>();
|
||||
float *reordered_conf_data = _reordered_conf->buffer().as<float *>();
|
||||
float *bbox_sizes_data = _bbox_sizes->buffer().as<float *>();
|
||||
int *detections_data = _detections_count->buffer().as<int *>();
|
||||
int *buffer_data = _buffer->buffer().as<int *>();
|
||||
int *indices_data = _indices->buffer().as<int *>();
|
||||
int *num_priors_actual = _num_priors_actual->buffer().as<int *>();
|
||||
|
||||
for (int n = 0; n < N; ++n) {
|
||||
const float *ppriors = prior_data;
|
||||
const float *prior_variances = prior_data + _num_priors*_prior_size;
|
||||
if (_priors_batches) {
|
||||
ppriors += _variance_encoded_in_target ? n*_num_priors*_prior_size : 2*n*_num_priors*_prior_size;
|
||||
prior_variances += _variance_encoded_in_target ? 0 : n*_num_priors*_prior_size;
|
||||
prior_variances += _variance_encoded_in_target ? 0 : 2*n*_num_priors*_prior_size;
|
||||
}
|
||||
|
||||
if (_share_location) {
|
||||
const float *ploc = loc_data + n*4*_num_priors;
|
||||
float *pboxes = decoded_bboxes_data + n*4*_num_priors;
|
||||
float *psizes = bbox_sizes_data + n*_num_priors;
|
||||
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
|
||||
|
||||
if (with_add_box_pred) {
|
||||
const float *p_arm_loc = arm_loc_data + n*4*_num_priors;
|
||||
decodeBBoxes(ppriors, p_arm_loc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
|
||||
decodeBBoxes(pboxes, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, 0, 4, false);
|
||||
} else {
|
||||
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < _num_loc_classes; ++c) {
|
||||
if (c == _background_label_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float *ploc = loc_data + n*4*_num_loc_classes*_num_priors + c*4;
|
||||
float *pboxes = decoded_bboxes_data + n*4*_num_loc_classes*_num_priors + c*4*_num_priors;
|
||||
float *psizes = bbox_sizes_data + n*_num_loc_classes*_num_priors + c*_num_priors;
|
||||
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n);
|
||||
if (with_add_box_pred) {
|
||||
const float *p_arm_loc = arm_loc_data + n*4*_num_loc_classes*_num_priors + c*4;
|
||||
decodeBBoxes(ppriors, p_arm_loc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
|
||||
decodeBBoxes(pboxes, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, 0, 4, false);
|
||||
} else {
|
||||
decodeBBoxes(ppriors, ploc, prior_variances, pboxes, psizes, num_priors_actual, n, _offset, _prior_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = 0; n < N; ++n) {
|
||||
for (int c = 0; c < _num_classes; ++c) {
|
||||
if (with_add_box_pred) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
for (int p = 0; p < _num_priors; ++p) {
|
||||
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
|
||||
if (arm_conf_data[n*_num_priors*2 + p * 2 + 1] < _objectness_score) {
|
||||
for (int c = 0; c < _num_classes; ++c) {
|
||||
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = c == _background_label_id ? 1.0f : 0.0f;
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < _num_classes; ++c) {
|
||||
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
for (int c = 0; c < _num_classes; ++c) {
|
||||
for (int p = 0; p < _num_priors; ++p) {
|
||||
reordered_conf_data[n*_num_priors*_num_classes + c*_num_priors + p] = conf_data[n*_num_priors*_num_classes + p*_num_classes + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -204,8 +236,8 @@ public:
|
||||
int *pdetections = detections_data + n*_num_classes;
|
||||
|
||||
const float *pconf = reordered_conf_data + n*_num_classes*_num_priors;
|
||||
const float *pboxes = decoded_bboxes_data + n*4*_num_priors;
|
||||
const float *psizes = bbox_sizes_data + n*_num_priors;
|
||||
const float *pboxes = decoded_bboxes_data + n*4*_num_loc_classes*_num_priors;
|
||||
const float *psizes = bbox_sizes_data + n*_num_loc_classes*_num_priors;
|
||||
|
||||
nms_mx(pconf, pboxes, psizes, pbuffer, pindices, pdetections, _num_priors);
|
||||
}
|
||||
@ -220,6 +252,7 @@ public:
|
||||
for (int c = 0; c < _num_classes; ++c) {
|
||||
int detections = detections_data[n*_num_classes + c];
|
||||
int *pindices = indices_data + n*_num_classes*_num_priors + c*_num_priors;
|
||||
|
||||
float *pconf = reordered_conf_data + n*_num_classes*_num_priors + c*_num_priors;
|
||||
|
||||
for (int i = 0; i < detections; ++i) {
|
||||
@ -310,7 +343,8 @@ private:
|
||||
const int idx_location = 0;
|
||||
const int idx_confidence = 1;
|
||||
const int idx_priors = 2;
|
||||
|
||||
const int idx_arm_confidence = 3;
|
||||
const int idx_arm_location = 4;
|
||||
|
||||
int _num_classes = 0;
|
||||
int _background_label_id = 0;
|
||||
@ -324,6 +358,8 @@ private:
|
||||
bool _clip_after_nms = false; // clip bounding boxes after nms step
|
||||
bool _decrease_label_id = false;
|
||||
|
||||
bool with_add_box_pred = false;
|
||||
|
||||
int _image_width = 0;
|
||||
int _image_height = 0;
|
||||
int _prior_size = 4;
|
||||
@ -332,6 +368,7 @@ private:
|
||||
|
||||
float _nms_threshold = 0.0f;
|
||||
float _confidence_threshold = 0.0f;
|
||||
float _objectness_score = 0.0f;
|
||||
|
||||
int _num = 0;
|
||||
int _num_loc_classes = 0;
|
||||
@ -344,7 +381,8 @@ private:
|
||||
};
|
||||
|
||||
void decodeBBoxes(const float *prior_data, const float *loc_data, const float *variance_data,
|
||||
float *decoded_bboxes, float *decoded_bbox_sizes, int* num_priors_actual, int n);
|
||||
float *decoded_bboxes, float *decoded_bbox_sizes, int* num_priors_actual, int n, const int& offs, const int& pr_size,
|
||||
bool decodeType = true); // after ARM = false
|
||||
|
||||
void nms_cf(const float *conf_data, const float *bboxes, const float *sizes,
|
||||
int *buffer, int *indices, int &detections, int num_priors_actual);
|
||||
@ -384,8 +422,8 @@ static inline float JaccardOverlap(const float *decoded_bbox,
|
||||
|
||||
float xmin2 = decoded_bbox[idx2*4 + 0];
|
||||
float ymin2 = decoded_bbox[idx2*4 + 1];
|
||||
float ymax2 = decoded_bbox[idx2*4 + 3];
|
||||
float xmax2 = decoded_bbox[idx2*4 + 2];
|
||||
float ymax2 = decoded_bbox[idx2*4 + 3];
|
||||
|
||||
if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
|
||||
return 0.0f;
|
||||
@ -411,34 +449,36 @@ static inline float JaccardOverlap(const float *decoded_bbox,
|
||||
}
|
||||
|
||||
void DetectionOutputImpl::decodeBBoxes(const float *prior_data,
|
||||
const float *loc_data,
|
||||
const float *variance_data,
|
||||
float *decoded_bboxes,
|
||||
float *decoded_bbox_sizes,
|
||||
int* num_priors_actual,
|
||||
int n) {
|
||||
const float *loc_data,
|
||||
const float *variance_data,
|
||||
float *decoded_bboxes,
|
||||
float *decoded_bbox_sizes,
|
||||
int* num_priors_actual,
|
||||
int n,
|
||||
const int& offs,
|
||||
const int& pr_size,
|
||||
bool decodeType) {
|
||||
num_priors_actual[n] = _num_priors;
|
||||
if (!_normalized) {
|
||||
if (!_normalized && decodeType) {
|
||||
int num = 0;
|
||||
for (; num < _num_priors; ++num) {
|
||||
float batch_id = prior_data[num * _prior_size + 0];
|
||||
float batch_id = prior_data[num * pr_size + 0];
|
||||
if (batch_id == -1.f) {
|
||||
num_priors_actual[n] = num;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(num_priors_actual[n], [&](int p) {
|
||||
float new_xmin = 0.0f;
|
||||
float new_ymin = 0.0f;
|
||||
float new_xmax = 0.0f;
|
||||
float new_ymax = 0.0f;
|
||||
|
||||
float prior_xmin = prior_data[p*_prior_size + 0 + _offset];
|
||||
float prior_ymin = prior_data[p*_prior_size + 1 + _offset];
|
||||
float prior_xmax = prior_data[p*_prior_size + 2 + _offset];
|
||||
float prior_ymax = prior_data[p*_prior_size + 3 + _offset];
|
||||
float prior_xmin = prior_data[p*pr_size + 0 + offs];
|
||||
float prior_ymin = prior_data[p*pr_size + 1 + offs];
|
||||
float prior_xmax = prior_data[p*pr_size + 2 + offs];
|
||||
float prior_ymax = prior_data[p*pr_size + 3 + offs];
|
||||
|
||||
float loc_xmin = loc_data[4*p*_num_loc_classes + 0];
|
||||
float loc_ymin = loc_data[4*p*_num_loc_classes + 1];
|
||||
@ -591,7 +631,12 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
|
||||
bool keep = true;
|
||||
for (int k = 0; k < ndetection; ++k) {
|
||||
const int kept_idx = pindices[k];
|
||||
float overlap = JaccardOverlap(bboxes, sizes, prior, kept_idx);
|
||||
float overlap = 0.0f;
|
||||
if (_share_location) {
|
||||
overlap = JaccardOverlap(bboxes, sizes, prior, kept_idx);
|
||||
} else {
|
||||
overlap = JaccardOverlap(bboxes, sizes, cls*_num_priors + prior, cls*_num_priors + kept_idx);
|
||||
}
|
||||
if (overlap > _nms_threshold) {
|
||||
keep = false;
|
||||
break;
|
||||
|
@ -0,0 +1,85 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "single_layer_tests/detection_output.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const int numClasses = 11;
|
||||
const int backgroundLabelId = 0;
|
||||
const std::vector<int> topK = {75};
|
||||
const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
|
||||
const std::vector<std::string> codeType = {"caffe.PriorBoxParameter.CORNER", "caffe.PriorBoxParameter.CENTER_SIZE"};
|
||||
const float nmsThreshold = 0.5f;
|
||||
const float confidenceThreshold = 0.3f;
|
||||
const std::vector<bool> clipAfterNms = {true, false};
|
||||
const std::vector<bool> clipBeforeNms = {true, false};
|
||||
const std::vector<bool> decreaseLabelId = {true, false};
|
||||
const float objectnessScore = 0.4f;
|
||||
const std::vector<size_t> numberBatch = {1, 2};
|
||||
|
||||
const auto commonAttributes = ::testing::Combine(
|
||||
::testing::Values(numClasses),
|
||||
::testing::Values(backgroundLabelId),
|
||||
::testing::ValuesIn(topK),
|
||||
::testing::ValuesIn(keepTopK),
|
||||
::testing::ValuesIn(codeType),
|
||||
::testing::Values(nmsThreshold),
|
||||
::testing::Values(confidenceThreshold),
|
||||
::testing::ValuesIn(clipAfterNms),
|
||||
::testing::ValuesIn(clipBeforeNms),
|
||||
::testing::ValuesIn(decreaseLabelId)
|
||||
);
|
||||
|
||||
/* =============== 3 inputs cases =============== */
|
||||
|
||||
const std::vector<ParamsWhichSizeDepends> specificParams3In = {
|
||||
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {}, {}},
|
||||
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {}, {}},
|
||||
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {}, {}},
|
||||
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {}, {}},
|
||||
|
||||
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {}, {}},
|
||||
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {}, {}},
|
||||
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {}, {}},
|
||||
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {}, {}}
|
||||
};
|
||||
|
||||
const auto params3Inputs = ::testing::Combine(
|
||||
commonAttributes,
|
||||
::testing::ValuesIn(specificParams3In),
|
||||
::testing::ValuesIn(numberBatch),
|
||||
::testing::Values(0.0f),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
|
||||
|
||||
/* =============== 5 inputs cases =============== */
|
||||
|
||||
const std::vector<ParamsWhichSizeDepends> specificParams5In = {
|
||||
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 60}},
|
||||
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 660}},
|
||||
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 60}},
|
||||
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 660}},
|
||||
|
||||
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 60}},
|
||||
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 660}},
|
||||
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 60}},
|
||||
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 660}}
|
||||
};
|
||||
|
||||
const auto params5Inputs = ::testing::Combine(
|
||||
commonAttributes,
|
||||
::testing::ValuesIn(specificParams5In),
|
||||
::testing::ValuesIn(numberBatch),
|
||||
::testing::Values(objectnessScore),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
@ -82,18 +82,18 @@ class ActivationLayerTest : public testing::WithParamInterface<activationParams>
|
||||
public:
|
||||
ngraph::helpers::ActivationTypes activationType;
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<activationParams> &obj);
|
||||
virtual InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const;
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
|
||||
|
||||
protected:
|
||||
void SetUp();
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
class ActivationParamLayerTest : public ActivationLayerTest {
|
||||
public:
|
||||
void Infer();
|
||||
void Infer() override;
|
||||
|
||||
protected:
|
||||
void SetUp();
|
||||
void SetUp() override;
|
||||
|
||||
private:
|
||||
void generateActivationBlob();
|
||||
|
@ -0,0 +1,71 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
#include "ngraph/op/detection_output.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
enum {
|
||||
idxLocation,
|
||||
idxConfidence,
|
||||
idxPriors,
|
||||
idxArmConfidence,
|
||||
idxArmLocation,
|
||||
numInputs
|
||||
};
|
||||
|
||||
using DetectionOutputAttributes = std::tuple<
|
||||
int, // numClasses
|
||||
int, // backgroundLabelId
|
||||
int, // topK
|
||||
std::vector<int>, // keepTopK
|
||||
std::string, // codeType
|
||||
float, // nmsThreshold
|
||||
float, // confidenceThreshold
|
||||
bool, // clip_afterNms
|
||||
bool, // clip_beforeNms
|
||||
bool // decreaseLabelId
|
||||
>;
|
||||
|
||||
using ParamsWhichSizeDepends = std::tuple<
|
||||
bool, // varianceEncodedInTarget
|
||||
bool, // shareLocation
|
||||
bool, // normalized
|
||||
size_t, // inputHeight
|
||||
size_t, // inputWidth
|
||||
InferenceEngine::SizeVector, // "Location" input
|
||||
InferenceEngine::SizeVector, // "Confidence" input
|
||||
InferenceEngine::SizeVector, // "Priors" input
|
||||
InferenceEngine::SizeVector, // "ArmConfidence" input
|
||||
InferenceEngine::SizeVector // "ArmLocation" input
|
||||
>;
|
||||
|
||||
using DetectionOutputParams = std::tuple<
|
||||
DetectionOutputAttributes,
|
||||
ParamsWhichSizeDepends,
|
||||
size_t, // Number of batch
|
||||
float, // objectnessScore
|
||||
std::string // Device name
|
||||
>;
|
||||
|
||||
class DetectionOutputLayerTest : public testing::WithParamInterface<DetectionOutputParams>, public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<DetectionOutputParams> obj);
|
||||
ngraph::op::DetectionOutputAttrs attrs;
|
||||
std::vector<InferenceEngine::SizeVector> inShapes;
|
||||
void Infer() override;
|
||||
void Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual) override;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -26,7 +26,7 @@ class RangeLayerTest : public testing::WithParamInterface<RangeParams>,
|
||||
float start, stop, step;
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<RangeParams> obj);
|
||||
void Infer();
|
||||
void Infer() override;
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
|
@ -0,0 +1,164 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "common_test_utils/data_utils.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include "single_layer_tests/detection_output.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string DetectionOutputLayerTest::getTestCaseName(testing::TestParamInfo<DetectionOutputParams> obj) {
|
||||
DetectionOutputAttributes commonAttrs;
|
||||
ParamsWhichSizeDepends specificAttrs;
|
||||
ngraph::op::DetectionOutputAttrs attrs;
|
||||
size_t batch;
|
||||
std::string targetDevice;
|
||||
std::tie(commonAttrs, specificAttrs, batch, attrs.objectness_score, targetDevice) = obj.param;
|
||||
|
||||
std::tie(attrs.num_classes, attrs.background_label_id, attrs.top_k, attrs.keep_top_k, attrs.code_type, attrs.nms_threshold, attrs.confidence_threshold,
|
||||
attrs.clip_after_nms, attrs.clip_before_nms, attrs.decrease_label_id) = commonAttrs;
|
||||
|
||||
const size_t numInputs = 5;
|
||||
std::vector<InferenceEngine::SizeVector> inShapes(numInputs);
|
||||
std::tie(attrs.variance_encoded_in_target, attrs.share_location, attrs.normalized, attrs.input_height, attrs.input_width,
|
||||
inShapes[idxLocation], inShapes[idxConfidence], inShapes[idxPriors], inShapes[idxArmConfidence], inShapes[idxArmLocation]) = specificAttrs;
|
||||
|
||||
if (inShapes[idxArmConfidence].empty()) {
|
||||
inShapes.resize(3);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < inShapes.size(); i++) {
|
||||
inShapes[i][0] = batch;
|
||||
}
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS = { ";
|
||||
result << "LOC=" << CommonTestUtils::vec2str(inShapes[0]) << "_";
|
||||
result << "CONF=" << CommonTestUtils::vec2str(inShapes[1]) << "_";
|
||||
result << "PRIOR=" << CommonTestUtils::vec2str(inShapes[2]);
|
||||
std::string armConf, armLoc;
|
||||
if (inShapes.size() > 3) {
|
||||
armConf = "_ARM_CONF=" + CommonTestUtils::vec2str(inShapes[3]) + "_";
|
||||
armLoc = "ARM_LOC=" + CommonTestUtils::vec2str(inShapes[4]);
|
||||
}
|
||||
result << armConf;
|
||||
result << armLoc << " }_";
|
||||
|
||||
result << "Classes=" << attrs.num_classes << "_";
|
||||
result << "backgrId=" << attrs.background_label_id << "_";
|
||||
result << "topK=" << attrs.top_k << "_";
|
||||
result << "varEnc=" << attrs.variance_encoded_in_target << "_";
|
||||
result << "keepTopK=" << CommonTestUtils::vec2str(attrs.keep_top_k) << "_";
|
||||
result << "codeType=" << attrs.code_type << "_";
|
||||
result << "shareLoc=" << attrs.share_location << "_";
|
||||
result << "nmsThr=" << attrs.nms_threshold << "_";
|
||||
result << "confThr=" << attrs.confidence_threshold << "_";
|
||||
result << "clipAfterNms=" << attrs.clip_after_nms << "_";
|
||||
result << "clipBeforeNms=" << attrs.clip_before_nms << "_";
|
||||
result << "decrId=" << attrs.decrease_label_id << "_";
|
||||
result << "norm=" << attrs.normalized << "_";
|
||||
result << "inH=" << attrs.input_height << "_";
|
||||
result << "inW=" << attrs.input_width << "_";
|
||||
result << "OS=" << attrs.objectness_score << "_";
|
||||
result << "TargetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void DetectionOutputLayerTest::Infer() {
|
||||
inferRequest = executableNetwork.CreateInferRequest();
|
||||
inputs.clear();
|
||||
|
||||
size_t it = 0;
|
||||
for (const auto &input : cnnNetwork.getInputsInfo()) {
|
||||
const auto &info = input.second;
|
||||
InferenceEngine::Blob::Ptr blob;
|
||||
int32_t resolution = 1;
|
||||
uint32_t range = 1;
|
||||
if (it == 2) {
|
||||
if (attrs.normalized) {
|
||||
resolution = 100;
|
||||
} else {
|
||||
range = 10;
|
||||
}
|
||||
} else if (it == 1 || it == 3) {
|
||||
resolution = 1000;
|
||||
} else {
|
||||
resolution = 10;
|
||||
}
|
||||
blob = make_blob_with_precision(info->getTensorDesc());
|
||||
blob->allocate();
|
||||
CommonTestUtils::fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, 0, resolution);
|
||||
inferRequest.SetBlob(info->name(), blob);
|
||||
inputs.push_back(blob);
|
||||
it++;
|
||||
}
|
||||
inferRequest.Infer();
|
||||
}
|
||||
|
||||
void DetectionOutputLayerTest::Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual) {
|
||||
ASSERT_EQ(expected.size(), actual->byteSize());
|
||||
|
||||
size_t expSize = 0;
|
||||
size_t actSize = 0;
|
||||
|
||||
const auto &expectedBuffer = expected.data();
|
||||
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
|
||||
IE_ASSERT(memory);
|
||||
const auto lockedMemory = memory->wmap();
|
||||
const auto actualBuffer = lockedMemory.as<const std::uint8_t *>();
|
||||
|
||||
const float *expBuf = reinterpret_cast<const float *>(expectedBuffer);
|
||||
const float *actBuf = reinterpret_cast<const float *>(actualBuffer);
|
||||
for (size_t i = 0; i < actual->size(); i+=7) {
|
||||
if (expBuf[i] == -1)
|
||||
break;
|
||||
expSize += 7;
|
||||
}
|
||||
for (size_t i = 0; i < actual->size(); i+=7) {
|
||||
if (actBuf[i] == -1)
|
||||
break;
|
||||
actSize += 7;
|
||||
}
|
||||
ASSERT_EQ(expSize, actSize);
|
||||
LayerTestsCommon::Compare<float>(expBuf, actBuf, expSize, 1e-2f);
|
||||
}
|
||||
|
||||
void DetectionOutputLayerTest::SetUp() {
|
||||
DetectionOutputAttributes commonAttrs;
|
||||
ParamsWhichSizeDepends specificAttrs;
|
||||
size_t batch;
|
||||
std::tie(commonAttrs, specificAttrs, batch, attrs.objectness_score, targetDevice) = this->GetParam();
|
||||
|
||||
std::tie(attrs.num_classes, attrs.background_label_id, attrs.top_k, attrs.keep_top_k, attrs.code_type, attrs.nms_threshold, attrs.confidence_threshold,
|
||||
attrs.clip_after_nms, attrs.clip_before_nms, attrs.decrease_label_id) = commonAttrs;
|
||||
|
||||
inShapes.resize(numInputs);
|
||||
std::tie(attrs.variance_encoded_in_target, attrs.share_location, attrs.normalized, attrs.input_height, attrs.input_width,
|
||||
inShapes[idxLocation], inShapes[idxConfidence], inShapes[idxPriors], inShapes[idxArmConfidence], inShapes[idxArmLocation]) = specificAttrs;
|
||||
|
||||
if (inShapes[idxArmConfidence].empty()) {
|
||||
inShapes.resize(3);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < inShapes.size(); i++) {
|
||||
inShapes[i][0] = batch;
|
||||
}
|
||||
|
||||
auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
|
||||
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutput");
|
||||
}
|
||||
|
||||
TEST_P(DetectionOutputLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -96,7 +96,7 @@ protected:
|
||||
|
||||
void LoadNetwork();
|
||||
|
||||
void Infer();
|
||||
virtual void Infer();
|
||||
|
||||
TargetDevice targetDevice;
|
||||
std::shared_ptr<ngraph::Function> function;
|
||||
|
@ -314,5 +314,8 @@ std::shared_ptr<ngraph::Node> makeLogical(const ngraph::Output<Node> &in0,
|
||||
const ngraph::Output<Node> &in1,
|
||||
ngraph::helpers::LogicalTypes logicalType);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeDetectionOutput(const ngraph::OutputVector &inputs,
|
||||
const ngraph::op::DetectionOutputAttrs& attrs);
|
||||
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
|
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeDetectionOutput(const ngraph::OutputVector &inputs,
|
||||
const ngraph::op::DetectionOutputAttrs& attrs) {
|
||||
if (inputs.size() == 3)
|
||||
return std::make_shared<ngraph::opset3::DetectionOutput>(inputs[0], inputs[1], inputs[2], attrs);
|
||||
else if (inputs.size() == 5)
|
||||
return std::make_shared<ngraph::opset3::DetectionOutput>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], attrs);
|
||||
else
|
||||
throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs");
|
||||
}
|
||||
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
@ -92,6 +92,8 @@
|
||||
#include "op/convolution.hpp"
|
||||
#include "op/group_conv.hpp"
|
||||
|
||||
#include "reference/detection_output.hpp"
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace runtime
|
||||
@ -1112,6 +1114,36 @@ protected:
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OP_TYPEID::DetectionOutput_v0:
|
||||
{
|
||||
const op::DetectionOutput* detOut = static_cast<const op::DetectionOutput*>(&node);
|
||||
reference::referenceDetectionOutput<T> refDetOut(
|
||||
detOut->get_attrs(), node.get_input_shape(0), node.get_input_shape(2));
|
||||
if (node.get_input_size() == 3)
|
||||
{
|
||||
refDetOut.run(args[0]->get_data_ptr<const T>(),
|
||||
args[1]->get_data_ptr<const T>(),
|
||||
args[2]->get_data_ptr<const T>(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
out[0]->get_data_ptr<T>());
|
||||
}
|
||||
else if (node.get_input_size() == 5)
|
||||
{
|
||||
refDetOut.run(args[0]->get_data_ptr<const T>(),
|
||||
args[1]->get_data_ptr<const T>(),
|
||||
args[2]->get_data_ptr<const T>(),
|
||||
args[3]->get_data_ptr<const T>(),
|
||||
args[4]->get_data_ptr<const T>(),
|
||||
out[0]->get_data_ptr<T>());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw ngraph_error("DetectionOutput layer supports only 3 or 5 inputs");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Fused Ops are not supported in interpreter. They need to be decomposed before execution
|
||||
case OP_TYPEID::DepthToSpace:
|
||||
|
@ -18,6 +18,10 @@
|
||||
#include "opset0_tbl.hpp"
|
||||
#undef ID_SUFFIX
|
||||
|
||||
#define ID_SUFFIX(NAME) NAME##_v0
|
||||
NGRAPH_OP(DetectionOutput, op::v0)
|
||||
#undef ID_SUFFIX
|
||||
|
||||
#define ID_SUFFIX(NAME) NAME##_v1
|
||||
NGRAPH_OP(LessEqual, op::v1)
|
||||
NGRAPH_OP(LogicalAnd, op::v1)
|
||||
|
669
ngraph/test/runtime/interpreter/reference/detection_output.hpp
Normal file
669
ngraph/test/runtime/interpreter/reference/detection_output.hpp
Normal file
@ -0,0 +1,669 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/shape.hpp"
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace runtime
|
||||
{
|
||||
namespace reference
|
||||
{
|
||||
enum
|
||||
{
|
||||
idxLocation,
|
||||
idxConfidence,
|
||||
idxPriors,
|
||||
idxArmConfidence,
|
||||
idxArmLocation,
|
||||
numInputs
|
||||
};
|
||||
|
||||
template <typename dataType>
|
||||
class referenceDetectionOutput
|
||||
{
|
||||
private:
|
||||
struct NormalizedBBox
|
||||
{
|
||||
dataType xmin = 0;
|
||||
dataType ymin = 0;
|
||||
dataType xmax = 0;
|
||||
dataType ymax = 0;
|
||||
dataType size = 0;
|
||||
};
|
||||
using LabelBBox = std::map<int, std::vector<NormalizedBBox>>;
|
||||
|
||||
ngraph::op::DetectionOutputAttrs attrs;
|
||||
size_t numImages;
|
||||
size_t priorSize;
|
||||
size_t numPriors;
|
||||
size_t numLocClasses;
|
||||
size_t offset;
|
||||
|
||||
void GetLocPredictions(const dataType* locData, std::vector<LabelBBox>& locations)
|
||||
{
|
||||
locations.resize(numImages);
|
||||
for (size_t i = 0; i < numImages; ++i)
|
||||
{
|
||||
LabelBBox& labelBbox = locations[i];
|
||||
for (size_t p = 0; p < numPriors; ++p)
|
||||
{
|
||||
size_t startIdx = p * numLocClasses * 4;
|
||||
for (size_t c = 0; c < numLocClasses; ++c)
|
||||
{
|
||||
int label = attrs.share_location ? -1 : c;
|
||||
if (labelBbox.find(label) == labelBbox.end())
|
||||
{
|
||||
labelBbox[label].resize(numPriors);
|
||||
}
|
||||
labelBbox[label][p].xmin = locData[startIdx + c * 4];
|
||||
labelBbox[label][p].ymin = locData[startIdx + c * 4 + 1];
|
||||
labelBbox[label][p].xmax = locData[startIdx + c * 4 + 2];
|
||||
labelBbox[label][p].ymax = locData[startIdx + c * 4 + 3];
|
||||
}
|
||||
}
|
||||
locData += numPriors * numLocClasses * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void GetConfidenceScores(
|
||||
const dataType* confData,
|
||||
std::vector<std::map<int, std::vector<dataType>>>& confPreds)
|
||||
{
|
||||
confPreds.resize(numImages);
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
||||
for (int p = 0; p < numPriors; ++p)
|
||||
{
|
||||
int startIdx = p * attrs.num_classes;
|
||||
for (int c = 0; c < attrs.num_classes; ++c)
|
||||
{
|
||||
labelScores[c].push_back(confData[startIdx + c]);
|
||||
}
|
||||
}
|
||||
confData += numPriors * attrs.num_classes;
|
||||
}
|
||||
}
|
||||
|
||||
void OSGetConfidenceScores(
|
||||
const dataType* confData,
|
||||
const dataType* armConfData,
|
||||
std::vector<std::map<int, std::vector<dataType>>>& confPreds)
|
||||
{
|
||||
confPreds.resize(numImages);
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
std::map<int, std::vector<dataType>>& labelScores = confPreds[i];
|
||||
for (int p = 0; p < numPriors; ++p)
|
||||
{
|
||||
int startIdx = p * attrs.num_classes;
|
||||
if (armConfData[p * 2 + 1] < attrs.objectness_score)
|
||||
{
|
||||
for (int c = 0; c < attrs.num_classes; ++c)
|
||||
{
|
||||
c == attrs.background_label_id ? labelScores[c].push_back(1)
|
||||
: labelScores[c].push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int c = 0; c < attrs.num_classes; ++c)
|
||||
{
|
||||
labelScores[c].push_back(confData[startIdx + c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
confData += numPriors * attrs.num_classes;
|
||||
armConfData += numPriors * 2;
|
||||
}
|
||||
}
|
||||
|
||||
dataType BBoxSize(const NormalizedBBox& bbox)
|
||||
{
|
||||
if (bbox.xmax < bbox.xmin || bbox.ymax < bbox.ymin)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dataType width = bbox.xmax - bbox.xmin;
|
||||
dataType height = bbox.ymax - bbox.ymin;
|
||||
return width * height;
|
||||
}
|
||||
}
|
||||
|
||||
void GetPriorBBoxes(const dataType* priorData,
|
||||
std::vector<std::vector<NormalizedBBox>>& priorBboxes,
|
||||
std::vector<std::vector<std::vector<dataType>>>& priorVariances)
|
||||
{
|
||||
priorBboxes.resize(numImages);
|
||||
priorVariances.resize(numImages);
|
||||
for (int n = 0; n < numImages; n++)
|
||||
{
|
||||
priorData += attrs.variance_encoded_in_target
|
||||
? n * numPriors * priorSize
|
||||
: 2 * n * numPriors * priorSize;
|
||||
std::vector<NormalizedBBox>& currPrBbox = priorBboxes[n];
|
||||
std::vector<std::vector<dataType>>& currPrVar = priorVariances[n];
|
||||
for (int i = 0; i < numPriors; ++i)
|
||||
{
|
||||
int start_idx = i * priorSize;
|
||||
NormalizedBBox bbox;
|
||||
bbox.xmin = priorData[start_idx + 0 + offset];
|
||||
bbox.ymin = priorData[start_idx + 1 + offset];
|
||||
bbox.xmax = priorData[start_idx + 2 + offset];
|
||||
bbox.ymax = priorData[start_idx + 3 + offset];
|
||||
dataType bbox_size = BBoxSize(bbox);
|
||||
bbox.size = bbox_size;
|
||||
currPrBbox.push_back(bbox);
|
||||
}
|
||||
if (!attrs.variance_encoded_in_target)
|
||||
{
|
||||
const dataType* priorVar = priorData + numPriors * priorSize;
|
||||
for (int i = 0; i < numPriors; ++i)
|
||||
{
|
||||
int start_idx = i * 4;
|
||||
std::vector<dataType> var;
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
var.push_back(priorVar[start_idx + j]);
|
||||
}
|
||||
currPrVar.push_back(var);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeBBox(const NormalizedBBox& priorBboxes,
|
||||
const std::vector<dataType>& priorVariances,
|
||||
const NormalizedBBox& bbox,
|
||||
NormalizedBBox& decodeBbox)
|
||||
{
|
||||
dataType priorXmin = priorBboxes.xmin;
|
||||
dataType priorYmin = priorBboxes.ymin;
|
||||
dataType priorXmax = priorBboxes.xmax;
|
||||
dataType priorYmax = priorBboxes.ymax;
|
||||
|
||||
if (!attrs.normalized)
|
||||
{
|
||||
priorXmin /= attrs.input_width;
|
||||
priorYmin /= attrs.input_height;
|
||||
priorXmax /= attrs.input_width;
|
||||
priorYmax /= attrs.input_height;
|
||||
}
|
||||
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER")
|
||||
{
|
||||
if (attrs.variance_encoded_in_target)
|
||||
{
|
||||
decodeBbox.xmin = priorXmin + bbox.xmin;
|
||||
decodeBbox.ymin = priorYmin + bbox.ymin;
|
||||
decodeBbox.xmax = priorXmax + bbox.xmax;
|
||||
decodeBbox.ymax = priorYmax + bbox.ymax;
|
||||
}
|
||||
else
|
||||
{
|
||||
decodeBbox.xmin = priorXmin + priorVariances[0] * bbox.xmin;
|
||||
decodeBbox.ymin = priorYmin + priorVariances[1] * bbox.ymin;
|
||||
decodeBbox.xmax = priorXmax + priorVariances[2] * bbox.xmax;
|
||||
decodeBbox.ymax = priorYmax + priorVariances[3] * bbox.ymax;
|
||||
}
|
||||
}
|
||||
else if (attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE")
|
||||
{
|
||||
dataType priorWidth = priorXmax - priorXmin;
|
||||
dataType priorHeight = priorYmax - priorYmin;
|
||||
dataType priorCenterX = (priorXmin + priorXmax) / 2;
|
||||
dataType priorCenterY = (priorYmin + priorYmax) / 2;
|
||||
dataType decodeBboxCenterX, decodeBboxCenterY;
|
||||
dataType decodeBboxWidth, decodeBboxHeight;
|
||||
if (attrs.variance_encoded_in_target)
|
||||
{
|
||||
decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
|
||||
decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
|
||||
decodeBboxWidth = std::exp(bbox.xmax) * priorWidth;
|
||||
decodeBboxHeight = std::exp(bbox.ymax) * priorHeight;
|
||||
}
|
||||
else
|
||||
{
|
||||
decodeBboxCenterX =
|
||||
priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
|
||||
decodeBboxCenterY =
|
||||
priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
|
||||
decodeBboxWidth = std::exp(priorVariances[2] * bbox.xmax) * priorWidth;
|
||||
decodeBboxHeight =
|
||||
std::exp(priorVariances[3] * bbox.ymax) * priorHeight;
|
||||
}
|
||||
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
|
||||
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
|
||||
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
|
||||
decodeBbox.ymax = decodeBboxCenterY + decodeBboxHeight / 2;
|
||||
}
|
||||
if (attrs.clip_before_nms)
|
||||
{
|
||||
decodeBbox.xmin =
|
||||
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.xmin));
|
||||
decodeBbox.ymin =
|
||||
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.ymin));
|
||||
decodeBbox.xmax =
|
||||
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.xmax));
|
||||
decodeBbox.ymax =
|
||||
std::max<dataType>(0, std::min<dataType>(1, decodeBbox.ymax));
|
||||
}
|
||||
dataType bboxSize = BBoxSize(decodeBbox);
|
||||
decodeBbox.size = bboxSize;
|
||||
}
|
||||
|
||||
void DecodeBBoxes(const std::vector<NormalizedBBox>& priorBboxes,
|
||||
const std::vector<std::vector<dataType>>& priorVariances,
|
||||
const std::vector<NormalizedBBox>& labelLocPreds,
|
||||
std::vector<NormalizedBBox>& decodeBboxes)
|
||||
{
|
||||
int numBboxes = priorBboxes.size();
|
||||
for (int i = 0; i < numBboxes; ++i)
|
||||
{
|
||||
NormalizedBBox decodeBbox;
|
||||
DecodeBBox(priorBboxes[i], priorVariances[i], labelLocPreds[i], decodeBbox);
|
||||
decodeBboxes.push_back(decodeBbox);
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeBBoxesAll(
|
||||
const std::vector<LabelBBox>& locPreds,
|
||||
const std::vector<std::vector<NormalizedBBox>>& priorBboxes,
|
||||
const std::vector<std::vector<std::vector<dataType>>>& priorVariances,
|
||||
std::vector<LabelBBox>& decodeBboxes)
|
||||
{
|
||||
decodeBboxes.resize(numImages);
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
||||
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
|
||||
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
|
||||
for (int c = 0; c < numLocClasses; ++c)
|
||||
{
|
||||
int label = attrs.share_location ? -1 : c;
|
||||
if (label == attrs.background_label_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::vector<NormalizedBBox>& labelLocPreds =
|
||||
locPreds[i].find(label)->second;
|
||||
DecodeBBoxes(
|
||||
currPrBbox, currPrVar, labelLocPreds, decodeBboxesImage[label]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CasRegDecodeBBoxesAll(
|
||||
const std::vector<LabelBBox>& locPreds,
|
||||
const std::vector<std::vector<NormalizedBBox>>& priorBboxes,
|
||||
const std::vector<std::vector<std::vector<dataType>>>& priorVariances,
|
||||
std::vector<LabelBBox>& decodeBboxes,
|
||||
const std::vector<LabelBBox>& armLocPreds)
|
||||
{
|
||||
decodeBboxes.resize(numImages);
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
||||
const std::vector<NormalizedBBox>& currPrBbox = priorBboxes[i];
|
||||
const std::vector<std::vector<dataType>>& currPrVar = priorVariances[i];
|
||||
for (int c = 0; c < numLocClasses; ++c)
|
||||
{
|
||||
int label = attrs.share_location ? -1 : c;
|
||||
if (label == attrs.background_label_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::vector<NormalizedBBox>& labelArmLocPreds =
|
||||
armLocPreds[i].find(label)->second;
|
||||
std::vector<NormalizedBBox> decodePriorBboxes;
|
||||
DecodeBBoxes(
|
||||
currPrBbox, currPrVar, labelArmLocPreds, decodePriorBboxes);
|
||||
const std::vector<NormalizedBBox>& labelLocPreds =
|
||||
locPreds[i].find(label)->second;
|
||||
DecodeBBoxes(decodePriorBboxes,
|
||||
currPrVar,
|
||||
labelLocPreds,
|
||||
decodeBboxesImage[label]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool SortScorePairDescend(const std::pair<dataType, T>& pair1,
|
||||
const std::pair<dataType, T>& pair2)
|
||||
{
|
||||
return pair1.first > pair2.first;
|
||||
}
|
||||
|
||||
void GetMaxScoreIndex(const std::vector<dataType>& scores,
|
||||
const dataType threshold,
|
||||
const int topK,
|
||||
std::vector<std::pair<dataType, int>>& scoreIndexVec)
|
||||
{
|
||||
for (int i = 0; i < scores.size(); ++i)
|
||||
{
|
||||
if (scores[i] > threshold)
|
||||
{
|
||||
scoreIndexVec.push_back(std::make_pair(scores[i], i));
|
||||
}
|
||||
}
|
||||
|
||||
std::stable_sort(
|
||||
scoreIndexVec.begin(), scoreIndexVec.end(), SortScorePairDescend<int>);
|
||||
if (topK > -1 && topK < scoreIndexVec.size())
|
||||
{
|
||||
scoreIndexVec.resize(topK);
|
||||
}
|
||||
}
|
||||
|
||||
void IntersectBBox(const NormalizedBBox& bbox1,
|
||||
const NormalizedBBox& bbox2,
|
||||
NormalizedBBox& intersectBbox)
|
||||
{
|
||||
if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin ||
|
||||
bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin)
|
||||
{
|
||||
intersectBbox.xmin = 0;
|
||||
intersectBbox.ymin = 0;
|
||||
intersectBbox.xmax = 0;
|
||||
intersectBbox.ymax = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
intersectBbox.xmin = std::max<dataType>(bbox1.xmin, bbox2.xmin);
|
||||
intersectBbox.ymin = std::max<dataType>(bbox1.ymin, bbox2.ymin);
|
||||
intersectBbox.xmax = std::min<dataType>(bbox1.xmax, bbox2.xmax);
|
||||
intersectBbox.ymax = std::min<dataType>(bbox1.ymax, bbox2.ymax);
|
||||
}
|
||||
}
|
||||
|
||||
dataType JaccardOverlap(const NormalizedBBox& bbox1, const NormalizedBBox& bbox2)
|
||||
{
|
||||
NormalizedBBox intersectBbox;
|
||||
IntersectBBox(bbox1, bbox2, intersectBbox);
|
||||
dataType intersectWidth, intersectHeight;
|
||||
intersectWidth = intersectBbox.xmax - intersectBbox.xmin;
|
||||
intersectHeight = intersectBbox.ymax - intersectBbox.ymin;
|
||||
if (intersectWidth > 0 && intersectHeight > 0)
|
||||
{
|
||||
dataType intersect_size = intersectWidth * intersectHeight;
|
||||
dataType bbox1_size = BBoxSize(bbox1);
|
||||
dataType bbox2_size = BBoxSize(bbox2);
|
||||
|
||||
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
void caffeNMS(const std::vector<NormalizedBBox>& bboxes,
|
||||
const std::vector<dataType>& scores,
|
||||
std::vector<int>& indices)
|
||||
{
|
||||
std::vector<std::pair<dataType, int>> scoreIndexVec;
|
||||
GetMaxScoreIndex(
|
||||
scores, attrs.confidence_threshold, attrs.top_k, scoreIndexVec);
|
||||
while (scoreIndexVec.size() != 0)
|
||||
{
|
||||
const int idx = scoreIndexVec.front().second;
|
||||
bool keep = true;
|
||||
for (int k = 0; k < indices.size(); ++k)
|
||||
{
|
||||
const int kept_idx = indices[k];
|
||||
dataType overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
|
||||
if (overlap > attrs.nms_threshold)
|
||||
{
|
||||
keep = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (keep)
|
||||
{
|
||||
indices.push_back(idx);
|
||||
}
|
||||
scoreIndexVec.erase(scoreIndexVec.begin());
|
||||
}
|
||||
}
|
||||
|
||||
void mxNetNms(const LabelBBox& decodeBboxesImage,
|
||||
const std::map<int, std::vector<dataType>>& confScores,
|
||||
std::map<int, std::vector<int>>& indices)
|
||||
{
|
||||
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
|
||||
for (int p = 0; p < numPriors; p++)
|
||||
{
|
||||
dataType conf = -1;
|
||||
int id = 0;
|
||||
for (int c = 1; c < attrs.num_classes; c++)
|
||||
{
|
||||
dataType temp = confScores.at(c)[p];
|
||||
if (temp > conf)
|
||||
{
|
||||
conf = temp;
|
||||
id = c;
|
||||
}
|
||||
}
|
||||
if (id > 0 && conf >= attrs.confidence_threshold)
|
||||
{
|
||||
scoreIndexPairs.push_back(std::make_pair(conf, std::make_pair(id, p)));
|
||||
}
|
||||
}
|
||||
std::sort(scoreIndexPairs.begin(),
|
||||
scoreIndexPairs.end(),
|
||||
SortScorePairDescend<std::pair<int, int>>);
|
||||
|
||||
if (attrs.top_k != -1)
|
||||
if (scoreIndexPairs.size() > attrs.top_k)
|
||||
scoreIndexPairs.resize(attrs.top_k);
|
||||
|
||||
while (scoreIndexPairs.size() != 0)
|
||||
{
|
||||
const int cls = scoreIndexPairs.front().second.first;
|
||||
const int prior = scoreIndexPairs.front().second.second;
|
||||
std::vector<int>& currInd = indices[cls];
|
||||
bool keep = true;
|
||||
for (int i = 0; i < currInd.size(); i++)
|
||||
{
|
||||
const int keptIdx = currInd[i];
|
||||
auto currBbox = attrs.share_location ? decodeBboxesImage.at(-1)
|
||||
: decodeBboxesImage.at(cls);
|
||||
dataType overlap = JaccardOverlap(currBbox[prior], currBbox[keptIdx]);
|
||||
if (overlap > attrs.nms_threshold)
|
||||
{
|
||||
keep = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (keep)
|
||||
{
|
||||
currInd.push_back(prior);
|
||||
}
|
||||
scoreIndexPairs.erase(scoreIndexPairs.begin());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
referenceDetectionOutput(const ngraph::op::DetectionOutputAttrs& _attrs,
|
||||
const ngraph::Shape& locShape,
|
||||
const ngraph::Shape& priorsShape)
|
||||
: attrs(_attrs)
|
||||
{
|
||||
numImages = locShape[0];
|
||||
priorSize = _attrs.normalized ? 4 : 5;
|
||||
offset = _attrs.normalized ? 0 : 1;
|
||||
numPriors = priorsShape[2] / priorSize;
|
||||
numLocClasses =
|
||||
_attrs.share_location ? 1 : static_cast<size_t>(_attrs.num_classes);
|
||||
}
|
||||
|
||||
void run(const dataType* _location,
|
||||
const dataType* _confidence,
|
||||
const dataType* _priors,
|
||||
const dataType* _armConfidence,
|
||||
const dataType* _armLocation,
|
||||
dataType* result)
|
||||
{
|
||||
bool withAddBoxPred = _armConfidence != nullptr && _armLocation != nullptr;
|
||||
std::vector<LabelBBox> armLocPreds;
|
||||
if (withAddBoxPred)
|
||||
{
|
||||
GetLocPredictions(_armLocation, armLocPreds);
|
||||
}
|
||||
std::vector<LabelBBox> locPreds;
|
||||
GetLocPredictions(_location, locPreds);
|
||||
std::vector<std::map<int, std::vector<dataType>>> confPreds;
|
||||
if (withAddBoxPred)
|
||||
{
|
||||
OSGetConfidenceScores(_confidence, _armConfidence, confPreds);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetConfidenceScores(_confidence, confPreds);
|
||||
}
|
||||
std::vector<std::vector<NormalizedBBox>> priorBboxes;
|
||||
std::vector<std::vector<std::vector<dataType>>> priorVariances;
|
||||
GetPriorBBoxes(_priors, priorBboxes, priorVariances);
|
||||
std::vector<LabelBBox> decodeBboxes;
|
||||
if (withAddBoxPred)
|
||||
{
|
||||
CasRegDecodeBBoxesAll(
|
||||
locPreds, priorBboxes, priorVariances, decodeBboxes, armLocPreds);
|
||||
}
|
||||
else
|
||||
{
|
||||
DecodeBBoxesAll(locPreds, priorBboxes, priorVariances, decodeBboxes);
|
||||
}
|
||||
|
||||
int numKept = 0;
|
||||
std::vector<std::map<int, std::vector<int>>> allIndices;
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
const LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
||||
const std::map<int, std::vector<dataType>>& confScores = confPreds[i];
|
||||
std::map<int, std::vector<int>> indices;
|
||||
int numDet = 0;
|
||||
if (!attrs.decrease_label_id)
|
||||
{
|
||||
// Caffe style
|
||||
for (int c = 0; c < attrs.num_classes; ++c)
|
||||
{
|
||||
if (c == attrs.background_label_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const std::vector<dataType>& scores = confScores.find(c)->second;
|
||||
int label = attrs.share_location ? -1 : c;
|
||||
const std::vector<NormalizedBBox>& bboxes =
|
||||
decodeBboxesImage.find(label)->second;
|
||||
caffeNMS(bboxes, scores, indices[c]);
|
||||
numDet += indices[c].size();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// MXNet style
|
||||
mxNetNms(decodeBboxesImage, confScores, indices);
|
||||
for (auto it = indices.begin(); it != indices.end(); it++)
|
||||
numDet += it->second.size();
|
||||
}
|
||||
if (attrs.keep_top_k[0] > -1 && numDet > attrs.keep_top_k[0])
|
||||
{
|
||||
std::vector<std::pair<dataType, std::pair<int, int>>> scoreIndexPairs;
|
||||
for (auto it = indices.begin(); it != indices.end(); ++it)
|
||||
{
|
||||
int label = it->first;
|
||||
const std::vector<int>& labelIndices = it->second;
|
||||
const std::vector<dataType>& scores =
|
||||
confScores.find(label)->second;
|
||||
for (int j = 0; j < labelIndices.size(); ++j)
|
||||
{
|
||||
int idx = labelIndices[j];
|
||||
scoreIndexPairs.push_back(
|
||||
std::make_pair(scores[idx], std::make_pair(label, idx)));
|
||||
}
|
||||
}
|
||||
std::sort(scoreIndexPairs.begin(),
|
||||
scoreIndexPairs.end(),
|
||||
SortScorePairDescend<std::pair<int, int>>);
|
||||
scoreIndexPairs.resize(attrs.keep_top_k[0]);
|
||||
std::map<int, std::vector<int>> newIndices;
|
||||
for (int j = 0; j < scoreIndexPairs.size(); ++j)
|
||||
{
|
||||
int label = scoreIndexPairs[j].second.first;
|
||||
int idx = scoreIndexPairs[j].second.second;
|
||||
newIndices[label].push_back(idx);
|
||||
}
|
||||
allIndices.push_back(newIndices);
|
||||
numKept += attrs.top_k;
|
||||
}
|
||||
else
|
||||
{
|
||||
allIndices.push_back(indices);
|
||||
numKept += numDet;
|
||||
}
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
for (int i = 0; i < numImages; ++i)
|
||||
{
|
||||
const std::map<int, std::vector<dataType>>& confScores = confPreds[i];
|
||||
const LabelBBox& decodeBboxesImage = decodeBboxes[i];
|
||||
for (auto it = allIndices[i].begin(); it != allIndices[i].end(); ++it)
|
||||
{
|
||||
int label = it->first;
|
||||
const std::vector<dataType>& scores = confScores.find(label)->second;
|
||||
int loc_label = attrs.share_location ? -1 : label;
|
||||
const std::vector<NormalizedBBox>& bboxes =
|
||||
decodeBboxesImage.find(loc_label)->second;
|
||||
std::vector<int>& indices = it->second;
|
||||
for (int j = 0; j < indices.size(); ++j)
|
||||
{
|
||||
int idx = indices[j];
|
||||
result[count * 7 + 0] = i;
|
||||
result[count * 7 + 1] =
|
||||
attrs.decrease_label_id ? (label - 1) : label;
|
||||
result[count * 7 + 2] = scores[idx];
|
||||
const NormalizedBBox& bbox = bboxes[idx];
|
||||
|
||||
dataType xmin = bbox.xmin;
|
||||
dataType ymin = bbox.ymin;
|
||||
dataType xmax = bbox.xmax;
|
||||
dataType ymax = bbox.ymax;
|
||||
|
||||
if (attrs.clip_after_nms)
|
||||
{
|
||||
xmin = std::max<dataType>(0, std::min<dataType>(1, xmin));
|
||||
ymin = std::max<dataType>(0, std::min<dataType>(1, ymin));
|
||||
xmax = std::max<dataType>(0, std::min<dataType>(1, xmax));
|
||||
ymax = std::max<dataType>(0, std::min<dataType>(1, ymax));
|
||||
}
|
||||
|
||||
result[count * 7 + 3] = xmin;
|
||||
result[count * 7 + 4] = ymin;
|
||||
result[count * 7 + 5] = xmax;
|
||||
result[count * 7 + 6] = ymax;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count < numImages * attrs.keep_top_k[0])
|
||||
{
|
||||
result[count * 7 + 0] = -1;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace reference
|
||||
} // namespace runtime
|
||||
} // namespace ngraph
|
Loading…
Reference in New Issue
Block a user