[CPU] Added support NMS-5 (#2768)

This commit is contained in:
Maxim Andronov 2020-11-06 08:59:23 +03:00 committed by GitHub
parent 6e7db6fb37
commit 32c48b1087
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,7 +10,9 @@
#include <cassert> #include <cassert>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <queue>
#include "ie_parallel.hpp" #include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine { namespace InferenceEngine {
namespace Extensions { namespace Extensions {
@ -20,82 +22,131 @@ class NonMaxSuppressionImpl: public ExtLayerBase {
public: public:
explicit NonMaxSuppressionImpl(const CNNLayer* layer) { explicit NonMaxSuppressionImpl(const CNNLayer* layer) {
try { try {
if (layer->insData.size() < 2 || layer->insData.size() > 5) logPrefix = "NMS layer with name '" + layer->name + "' ";
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; if (layer->insData.size() < 2 || layer->insData.size() > 6)
THROW_IE_EXCEPTION << logPrefix << "has incorrect number of input edges: " << layer->insData.size();
if (layer->outData.size() != 1) if (layer->outData.size() < 1 || layer->outData.size() > 3)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of output edges!"; THROW_IE_EXCEPTION << logPrefix << "has incorrect number of output edges: " << layer->outData.size();
if (layer->insData[NMS_BOXES].lock()->getTensorDesc().getPrecision() != Precision::FP32) // TODO: remove legacy attribute presentation after migration on opset1
THROW_IE_EXCEPTION << layer->name << " Incorrect 'boxes' input precision. Only FP32 is supported!"; if (layer->CheckParamPresence("center_point_box")) {
SizeVector boxes_dims = layer->insData[NMS_BOXES].lock()->getTensorDesc().getDims(); bool center_point_box = layer->GetParamAsBool("center_point_box", false);
if (boxes_dims.size() != 3 || boxes_dims[2] != 4) boxEncodingType = center_point_box ? boxEncoding::CENTER : boxEncoding::CORNER;
THROW_IE_EXCEPTION << layer->name << " 'boxes' should be with shape [num_batches, spatial_dimension, 4]"; } else if (layer->CheckParamPresence("box_encoding")) {
std::string boxEncAttr = layer->GetParamAsString("box_encoding", "corner");
if (layer->insData[NMS_SCORES].lock()->getTensorDesc().getPrecision() != Precision::FP32) if (boxEncAttr == "corner") {
THROW_IE_EXCEPTION << layer->name << " Incorrect 'scores' input precision. Only FP32 is supported!"; boxEncodingType = boxEncoding::CORNER;
SizeVector scores_dims = layer->insData[NMS_SCORES].lock()->getTensorDesc().getDims(); } else if (boxEncAttr == "center") {
if (scores_dims.size() != 3) boxEncodingType = boxEncoding::CENTER;
THROW_IE_EXCEPTION << layer->name << " 'scores' should be with shape [num_batches, num_classes, spatial_dimension]"; } else {
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'box_encoding' attribute: " << boxEncAttr;
if (boxes_dims[0] != scores_dims[0]) }
THROW_IE_EXCEPTION << layer->name << " num_batches is different in 'boxes' and 'scores' tensors";
if (boxes_dims[1] != scores_dims[2])
THROW_IE_EXCEPTION << layer->name << " spatial_dimension is different in 'boxes' and 'scores' tensors";
if (layer->insData.size() > 2) {
if (layer->insData[NMS_MAXOUTPUTBOXESPERCLASS].lock()->getTensorDesc().getPrecision() != Precision::I32)
THROW_IE_EXCEPTION << layer->name << " Incorrect 'max_output_boxes_per_class' input precision. Only I32 is supported!";
SizeVector max_output_boxes_per_class_dims = layer->insData[NMS_MAXOUTPUTBOXESPERCLASS].lock()->getTensorDesc().getDims();
if (max_output_boxes_per_class_dims.size() && max_output_boxes_per_class_dims[0] != 1)
THROW_IE_EXCEPTION << layer->name << " 'max_output_boxes_per_class' should be scalar";
} }
if (layer->insData.size() > 3) {
if (layer->insData[NMS_IOUTHRESHOLD].lock()->getTensorDesc().getPrecision() != Precision::FP32)
THROW_IE_EXCEPTION << layer->name << " Incorrect 'iou_threshold' input precision. Only FP32 is supported!";
SizeVector iou_threshold_dims = layer->insData[NMS_IOUTHRESHOLD].lock()->getTensorDesc().getDims();
if (iou_threshold_dims.size() && iou_threshold_dims[0] != 1)
THROW_IE_EXCEPTION << layer->name << " 'iou_threshold' should be scalar";
}
if (layer->insData.size() > 4) {
if (layer->insData[NMS_SCORETHRESHOLD].lock()->getTensorDesc().getPrecision() != Precision::FP32)
THROW_IE_EXCEPTION << layer->name << " Incorrect 'score_threshold' input precision. Only FP32 is supported!";
SizeVector score_threshold_dims = layer->insData[NMS_SCORETHRESHOLD].lock()->getTensorDesc().getDims();
if (score_threshold_dims.size() && score_threshold_dims[0] != 1)
THROW_IE_EXCEPTION << layer->name << " 'score_threshold' should be scalar";
}
if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::I32)
THROW_IE_EXCEPTION << layer->name << " Incorrect 'selected_indices' input precision. Only I32 is supported!";
SizeVector selected_indices_dims = layer->outData[0]->getTensorDesc().getDims();
if (selected_indices_dims.size() != 2 || selected_indices_dims[1] != 3)
THROW_IE_EXCEPTION << layer->name << " 'selected_indices' should be with shape [num_selected_indices, 3]";
center_point_box = layer->GetParamAsBool("center_point_box", false);
sort_result_descending = layer->GetParamAsBool("sort_result_descending", true); sort_result_descending = layer->GetParamAsBool("sort_result_descending", true);
if (layer->insData.size() == 2) { const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) }); const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
} else if (layer->insData.size() == 3) {
addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, auto boxesDataPtr = layer->insData[NMS_BOXES].lock();
{ DataConfigurator(ConfLayout::PLN) }); if (boxesDataPtr == nullptr) {
} else if (layer->insData.size() == 4) { THROW_IE_EXCEPTION << logPrefix << "has nullable 'boxes' input";
addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
} else {
addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
} }
checkPrecision(boxesDataPtr, supportedFloatPrecision, "boxes", inType);
const SizeVector &boxes_dims = boxesDataPtr->getTensorDesc().getDims();
num_batches = boxes_dims[0];
num_boxes = boxes_dims[1];
if (boxes_dims.size() != 3)
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
if (boxes_dims[2] != 4)
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
auto scoresDataPtr = layer->insData[NMS_SCORES].lock();
if (scoresDataPtr == nullptr) {
THROW_IE_EXCEPTION << logPrefix << "has nullable 'scores' input";
}
checkPrecision(scoresDataPtr, supportedFloatPrecision, "scores", inType);
const SizeVector &scores_dims = scoresDataPtr->getTensorDesc().getDims();
num_classes = scores_dims[1];
if (scores_dims.size() != 3)
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
if (num_batches != scores_dims[0])
THROW_IE_EXCEPTION << logPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
if (num_boxes != scores_dims[2])
THROW_IE_EXCEPTION << logPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
numFiltBox.resize(num_batches);
for (size_t i = 0; i < numFiltBox.size(); i++)
numFiltBox[i].resize(num_classes);
if (layer->insData.size() > NMS_MAXOUTPUTBOXESPERCLASS) {
const std::vector<Precision> supportedPrecision = {Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32,
Precision::U32, Precision::I64, Precision::U64};
check1DInput(layer->insData[NMS_MAXOUTPUTBOXESPERCLASS], supportedPrecision, "max_output_boxes_per_class");
}
if (layer->insData.size() > NMS_IOUTHRESHOLD) {
check1DInput(layer->insData[NMS_IOUTHRESHOLD], supportedFloatPrecision, "iou_threshold");
}
if (layer->insData.size() > NMS_SCORETHRESHOLD) {
check1DInput(layer->insData[NMS_SCORETHRESHOLD], supportedFloatPrecision, "score_threshold");
}
if (layer->insData.size() > NMS_SOFTNMSSIGMA) {
check1DInput(layer->insData[NMS_SOFTNMSSIGMA], supportedFloatPrecision, "soft_nms_sigma");
}
checkOutput(layer->outData[NMS_SELECTEDINDICES], supportedIntOutputPrecision, "selected_indices");
if (layer->outData.size() > NMS_SELECTEDSCORES) {
checkOutput(layer->outData[NMS_SELECTEDSCORES], supportedFloatPrecision, "selected_scores");
}
if (layer->outData.size() > NMS_VALIDOUTPUTS) {
checkPrecision(layer->outData[NMS_VALIDOUTPUTS], supportedIntOutputPrecision, "valid_outputs", outType);
const SizeVector &valid_outputs_dims = layer->outData[NMS_VALIDOUTPUTS]->getTensorDesc().getDims();
if (valid_outputs_dims.size() != 1)
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
if (valid_outputs_dims[0] != 1)
THROW_IE_EXCEPTION << logPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[1];
}
LayerConfig config;
for (size_t i = 0; i < layer->insData.size(); i++) {
DataConfig inConfig;
Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32;
auto validDataPtr = layer->insData[i].lock();
if (validDataPtr == nullptr) {
THROW_IE_EXCEPTION << logPrefix << "has nullable " << i << "th input";
}
const SizeVector& inDims = validDataPtr->getTensorDesc().getDims();
inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
config.inConfs.push_back(inConfig);
}
for (size_t i = 0; i < layer->outData.size(); i++) {
DataConfig outConfig;
Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32;
const SizeVector& outDims = layer->outData[i]->getTensorDesc().getDims();
outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
config.outConfs.push_back(outConfig);
}
config.dynBatchSupport = false;
confs.push_back(config);
} catch (InferenceEngine::details::InferenceEngineException &ex) { } catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what(); errorMsg = ex.what();
} }
} }
static float intersectionOverUnion(float* boxesI, float* boxesJ, bool center_point_box) { float intersectionOverUnion(const float *boxesI, const float *boxesJ) {
float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
if (center_point_box) { if (boxEncodingType == boxEncoding::CENTER) {
// box format: x_center, y_center, width, height // box format: x_center, y_center, width, height
yminI = boxesI[1] - boxesI[3] / 2.f; yminI = boxesI[1] - boxesI[3] / 2.f;
xminI = boxesI[0] - boxesI[2] / 2.f; xminI = boxesI[0] - boxesI[2] / 2.f;
@ -128,117 +179,294 @@ public:
return intersection_area / (areaI + areaJ - intersection_area); return intersection_area / (areaI + areaJ - intersection_area);
} }
typedef struct { struct filteredBoxes {
float score; float score;
int batch_index; int batch_index;
int class_index; int class_index;
int box_index; int box_index;
} filteredBoxes; filteredBoxes() {}
filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) :
score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {}
};
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override { struct boxInfo {
float *boxes = inputs[NMS_BOXES]->cbuffer().as<float *>() + float score;
inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getOffsetPadding(); int idx;
float *scores = inputs[NMS_SCORES]->cbuffer().as<float *>() + int suppress_begin_index;
inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getOffsetPadding(); };
SizeVector scores_dims = inputs[NMS_SCORES]->getTensorDesc().getDims(); void nmsWithSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, const SizeVector &scoresStrides,
int num_boxes = static_cast<int>(scores_dims[2]); std::vector<filteredBoxes> &filtBoxes) {
int max_output_boxes_per_class = num_boxes; auto less = [](const boxInfo& l, const boxInfo& r) {
if (inputs.size() > 2) return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx));
max_output_boxes_per_class = (std::min)(max_output_boxes_per_class, };
(inputs[NMS_MAXOUTPUTBOXESPERCLASS]->cbuffer().as<int *>() +
inputs[NMS_MAXOUTPUTBOXESPERCLASS]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]);
float iou_threshold = 1.f; // Value range [0, 1] auto coeff = [&](float iou) {
if (inputs.size() > 3) const float weight = std::exp(scale * iou * iou);
iou_threshold = (std::min)(iou_threshold, (inputs[NMS_IOUTHRESHOLD]->cbuffer().as<float *>() + return iou <= iou_threshold ? weight : 0.0f;
inputs[NMS_IOUTHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]); };
float score_threshold = 0.f; parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
if (inputs.size() > 4)
score_threshold = (inputs[NMS_SCORETHRESHOLD]->cbuffer().as<float *>() +
inputs[NMS_SCORETHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
int* selected_indices = outputs[0]->cbuffer().as<int *>() +
outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
SizeVector selected_indices_dims = outputs[0]->getTensorDesc().getDims();
SizeVector boxesStrides = inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getStrides();
SizeVector scoresStrides = inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getStrides();
// boxes shape: {num_batches, num_boxes, 4}
// scores shape: {num_batches, num_classes, num_boxes}
int num_batches = static_cast<int>(scores_dims[0]);
int num_classes = static_cast<int>(scores_dims[1]);
std::vector<filteredBoxes> fb; std::vector<filteredBoxes> fb;
const float *boxesPtr = boxes + batch_idx * boxesStrides[0];
const float *scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
for (int batch = 0; batch < num_batches; batch++) { std::priority_queue<boxInfo, std::vector<boxInfo>, decltype(less)> sorted_boxes(less);
float *boxesPtr = boxes + batch * boxesStrides[0];
for (int class_idx = 0; class_idx < num_classes; class_idx++) {
float *scoresPtr = scores + batch * scoresStrides[0] + class_idx * scoresStrides[1];
std::vector<std::pair<float, int> > scores_vector;
for (int box_idx = 0; box_idx < num_boxes; box_idx++) { for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
if (scoresPtr[box_idx] > score_threshold) if (scoresPtr[box_idx] > score_threshold)
scores_vector.push_back(std::make_pair(scoresPtr[box_idx], box_idx)); sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0}));
} }
if (scores_vector.size()) { fb.reserve(sorted_boxes.size());
parallel_sort(scores_vector.begin(), scores_vector.end(), if (sorted_boxes.size() > 0) {
[](const std::pair<float, int>& l, const std::pair<float, int>& r) { return l.first > r.first; }); while (fb.size() < max_output_boxes_per_class && !sorted_boxes.empty()) {
boxInfo currBox = sorted_boxes.top();
float origScore = currBox.score;
sorted_boxes.pop();
int io_selection_size = 1; bool box_is_selected = true;
fb.push_back({ scores_vector[0].first, batch, class_idx, scores_vector[0].second }); for (int idx = static_cast<int>(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) {
for (int box_idx = 1; (box_idx < static_cast<int>(scores_vector.size()) && io_selection_size < max_output_boxes_per_class); box_idx++) { float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4]);
currBox.score *= coeff(iou);
if (iou >= iou_threshold) {
box_is_selected = false;
break;
}
if (currBox.score <= score_threshold)
break;
}
currBox.suppress_begin_index = fb.size();
if (box_is_selected) {
if (currBox.score == origScore) {
fb.push_back({ currBox.score, batch_idx, class_idx, currBox.idx });
continue;
}
if (currBox.score > score_threshold) {
sorted_boxes.push(currBox);
}
}
}
}
numFiltBox[batch_idx][class_idx] = fb.size();
size_t offset = batch_idx*num_classes*max_output_boxes_per_class + class_idx*max_output_boxes_per_class;
cpu_memcpy(filtBoxes.data() + offset, fb.data(), fb.size() * sizeof(filteredBoxes));
});
}
void nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, const SizeVector &scoresStrides,
std::vector<filteredBoxes> &filtBoxes) {
int max_out_box = static_cast<int>(max_output_boxes_per_class);
parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
const float *boxesPtr = boxes + batch_idx * boxesStrides[0];
const float *scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
std::vector<std::pair<float, int>> sorted_boxes;
for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
if (scoresPtr[box_idx] > score_threshold)
sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx));
}
int io_selection_size = 0;
if (sorted_boxes.size() > 0) {
parallel_sort(sorted_boxes.begin(), sorted_boxes.end(),
[](const std::pair<float, int>& l, const std::pair<float, int>& r) {
return (l.first > r.first || ((l.first == r.first) && (l.second < r.second)));
});
size_t offset = batch_idx*num_classes*max_output_boxes_per_class + class_idx*max_output_boxes_per_class;
filteredBoxes *fb = filtBoxes.data() + offset;
fb[0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
io_selection_size++;
for (size_t box_idx = 1; (box_idx < sorted_boxes.size()) && (io_selection_size < max_out_box); box_idx++) {
bool box_is_selected = true; bool box_is_selected = true;
for (int idx = io_selection_size - 1; idx >= 0; idx--) { for (int idx = io_selection_size - 1; idx >= 0; idx--) {
float iou = intersectionOverUnion(&boxesPtr[scores_vector[box_idx].second * 4], float iou = intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[fb[idx].box_index * 4]);
&boxesPtr[scores_vector[idx].second * 4], center_point_box); if (iou >= iou_threshold) {
if (iou > iou_threshold) {
box_is_selected = false; box_is_selected = false;
break; break;
} }
} }
if (box_is_selected) { if (box_is_selected) {
scores_vector[io_selection_size] = scores_vector[box_idx]; fb[io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second);
io_selection_size++; io_selection_size++;
fb.push_back({ scores_vector[box_idx].first, batch, class_idx, scores_vector[box_idx].second });
}
} }
} }
} }
numFiltBox[batch_idx][class_idx] = io_selection_size;
});
} }
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
const float *boxes = inputs[NMS_BOXES]->cbuffer().as<const float *>() + inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
const float *scores = inputs[NMS_SCORES]->cbuffer().as<const float *>() + inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
max_output_boxes_per_class = outputs.size() > NMS_SELECTEDSCORES ? 0 : num_boxes;
if (inputs.size() > NMS_MAXOUTPUTBOXESPERCLASS) {
max_output_boxes_per_class = (inputs[NMS_MAXOUTPUTBOXESPERCLASS]->cbuffer().as<int *>() +
inputs[NMS_MAXOUTPUTBOXESPERCLASS]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
}
if (max_output_boxes_per_class == 0)
return OK;
iou_threshold = outputs.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f;
if (inputs.size() > NMS_IOUTHRESHOLD)
iou_threshold = (inputs[NMS_IOUTHRESHOLD]->cbuffer().as<float *>() +
inputs[NMS_IOUTHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
score_threshold = 0.0f;
if (inputs.size() > NMS_SCORETHRESHOLD)
score_threshold = (inputs[NMS_SCORETHRESHOLD]->cbuffer().as<float *>() +
inputs[NMS_SCORETHRESHOLD]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
soft_nms_sigma = 0.0f;
if (inputs.size() > NMS_SOFTNMSSIGMA)
soft_nms_sigma = (inputs[NMS_SOFTNMSSIGMA]->cbuffer().as<float *>() +
inputs[NMS_SOFTNMSSIGMA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
scale = 0.0f;
if (soft_nms_sigma > 0.0) {
scale = -0.5 / soft_nms_sigma;
}
int *selected_indices = outputs[NMS_SELECTEDINDICES]->buffer().as<int *>() +
outputs[NMS_SELECTEDINDICES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float *selected_scores = nullptr;
if (outputs.size() > NMS_SELECTEDSCORES)
selected_scores = outputs[NMS_SELECTEDSCORES]->buffer().as<float *>() +
outputs[NMS_SELECTEDSCORES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
int *valid_outputs = nullptr;
if (outputs.size() > NMS_VALIDOUTPUTS)
valid_outputs = outputs[NMS_VALIDOUTPUTS]->buffer().as<int *>() +
outputs[NMS_VALIDOUTPUTS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
const SizeVector &boxesStrides = inputs[NMS_BOXES]->getTensorDesc().getBlockingDesc().getStrides();
const SizeVector &scoresStrides = inputs[NMS_SCORES]->getTensorDesc().getBlockingDesc().getStrides();
std::vector<filteredBoxes> filtBoxes(max_output_boxes_per_class * num_batches * num_classes);
if (soft_nms_sigma == 0.0f) {
nmsWithoutSoftSigma(boxes, scores, boxesStrides, scoresStrides, filtBoxes);
} else {
nmsWithSoftSigma(boxes, scores, boxesStrides, scoresStrides, filtBoxes);
}
size_t startOffset = numFiltBox[0][0];
for (size_t b = 0; b < numFiltBox.size(); b++) {
size_t batchOffset = b*num_classes*max_output_boxes_per_class;
for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) {
size_t offset = batchOffset + c*max_output_boxes_per_class;
cpu_memcpy(filtBoxes.data() + startOffset, filtBoxes.data() + offset,
numFiltBox[b][c] * sizeof(filteredBoxes));
startOffset += numFiltBox[b][c];
}
}
filtBoxes.resize(startOffset);
// need more particular comparator to get deterministic behaviour
// escape situation when filtred boxes with same score have different position from launch to launch
if (sort_result_descending) { if (sort_result_descending) {
parallel_sort(fb.begin(), fb.end(), [](const filteredBoxes& l, const filteredBoxes& r) { return l.score > r.score; }); parallel_sort(filtBoxes.begin(), filtBoxes.end(),
[](const filteredBoxes& l, const filteredBoxes& r) {
return (l.score > r.score) ||
(l.score == r.score && l.batch_index < r.batch_index) ||
(l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) ||
(l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index);
});
} }
int selected_indicesStride = outputs[0]->getTensorDesc().getBlockingDesc().getStrides()[0]; size_t validOutputs = std::min(filtBoxes.size(), static_cast<size_t>(outputs[NMS_SELECTEDINDICES]->getTensorDesc().getDims()[0]));
int* selected_indicesPtr = selected_indices;
size_t idx; int selectedIndicesStride = outputs[NMS_SELECTEDINDICES]->getTensorDesc().getBlockingDesc().getStrides()[0];
for (idx = 0; idx < (std::min)(selected_indices_dims[0], fb.size()); idx++) { int *selectedIndicesPtr = selected_indices;
selected_indicesPtr[0] = fb[idx].batch_index; float *selectedScoresPtr = selected_scores;
selected_indicesPtr[1] = fb[idx].class_index;
selected_indicesPtr[2] = fb[idx].box_index; for (size_t idx = 0; idx < validOutputs; idx++) {
selected_indicesPtr += selected_indicesStride; selectedIndicesPtr[0] = filtBoxes[idx].batch_index;
selectedIndicesPtr[1] = filtBoxes[idx].class_index;
selectedIndicesPtr[2] = filtBoxes[idx].box_index;
selectedIndicesPtr += selectedIndicesStride;
if (outputs.size() > NMS_SELECTEDSCORES) {
selectedScoresPtr[0] = static_cast<float>(filtBoxes[idx].batch_index);
selectedScoresPtr[1] = static_cast<float>(filtBoxes[idx].class_index);
selectedScoresPtr[2] = static_cast<float>(filtBoxes[idx].score);
selectedScoresPtr += selectedIndicesStride;
} }
for (; idx < selected_indices_dims[0]; idx++) {
selected_indicesPtr[0] = -1;
selected_indicesPtr[1] = -1;
selected_indicesPtr[2] = -1;
selected_indicesPtr += selected_indicesStride;
} }
if (outputs.size() > NMS_VALIDOUTPUTS)
*valid_outputs = static_cast<int>(validOutputs);
return OK; return OK;
} }
private: private:
// input
const size_t NMS_BOXES = 0; const size_t NMS_BOXES = 0;
const size_t NMS_SCORES = 1; const size_t NMS_SCORES = 1;
const size_t NMS_MAXOUTPUTBOXESPERCLASS = 2; const size_t NMS_MAXOUTPUTBOXESPERCLASS = 2;
const size_t NMS_IOUTHRESHOLD = 3; const size_t NMS_IOUTHRESHOLD = 3;
const size_t NMS_SCORETHRESHOLD = 4; const size_t NMS_SCORETHRESHOLD = 4;
bool center_point_box = false; const size_t NMS_SOFTNMSSIGMA = 5;
// output
const size_t NMS_SELECTEDINDICES = 0;
const size_t NMS_SELECTEDSCORES = 1;
const size_t NMS_VALIDOUTPUTS = 2;
enum class boxEncoding {
CORNER,
CENTER
};
boxEncoding boxEncodingType = boxEncoding::CORNER;
bool sort_result_descending = true; bool sort_result_descending = true;
size_t num_batches;
size_t num_boxes;
size_t num_classes;
size_t max_output_boxes_per_class;
float iou_threshold;
float score_threshold;
float soft_nms_sigma;
float scale;
std::vector<std::vector<size_t>> numFiltBox;
const std::string inType = "input", outType = "output";
std::string logPrefix;
void checkPrecision(const DataPtr &dataPtr, const std::vector<Precision> precList, const std::string name, const std::string type) {
const TensorDesc &tensorDesc = dataPtr->getTensorDesc();
if (std::find(precList.begin(), precList.end(), tensorDesc.getPrecision()) == precList.end())
THROW_IE_EXCEPTION << logPrefix << " has unsupported '" << name << "' " << type << " precision: " << tensorDesc.getPrecision();
}
void check1DInput(const DataWeakPtr &dataPtr, const std::vector<Precision> precList, const std::string name) {
auto lockDataPtr = dataPtr.lock();
if (lockDataPtr == nullptr) {
THROW_IE_EXCEPTION << logPrefix << "has nullable '" << name << "' input";
}
checkPrecision(lockDataPtr, precList, name, inType);
const SizeVector &dims = lockDataPtr->getTensorDesc().getDims();
if (dims.size() != 0 && dims.size() != 1)
THROW_IE_EXCEPTION << logPrefix << "has unsupported '" << name << "' input rank: " << dims.size();
if (dims.size() == 1)
if (dims[0] != 1)
THROW_IE_EXCEPTION << logPrefix << "has unsupported '" << name << "' input 1st dimension size: " << dims[0];
}
void checkOutput(const DataPtr &dataPtr, const std::vector<Precision> precList, const std::string name) {
checkPrecision(dataPtr, precList, name, outType);
const SizeVector &dims = dataPtr->getTensorDesc().getDims();
if (dims.size() != 2)
THROW_IE_EXCEPTION << logPrefix << "has unsupported '" << name << "' output rank: " << dims.size();
if (dims[1] != 3)
THROW_IE_EXCEPTION << logPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << dims[1];
}
}; };
REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression); REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);