diff --git a/docs/ops/detection/DetectionOutput_1.md b/docs/ops/detection/DetectionOutput_1.md
index d6ab50950dd..6175a966898 100644
--- a/docs/ops/detection/DetectionOutput_1.md
+++ b/docs/ops/detection/DetectionOutput_1.md
@@ -6,7 +6,7 @@
**Short description**: *DetectionOutput* performs non-maximum suppression to generate the detection output using information on location and confidence predictions.
-**Detailed description**: [Reference](https://arxiv.org/pdf/1512.02325.pdf). The layer has 3 mandatory inputs: tensor with box logits, tensor with confidence predictions and tensor with box coordinates (proposals). It can have 2 additional inputs with additional confidence predictions and box coordinates described in the [article](https://arxiv.org/pdf/1711.06897.pdf). The 5-input version of the layer is supported with Myriad plugin only. The output tensor contains information about filtered detections described with 7 element tuples: *[batch_id, class_id, confidence, x_1, y_1, x_2, y_2]*. The first tuple with *batch_id* equal to *-1* means end of output.
+**Detailed description**: [Reference](https://arxiv.org/pdf/1512.02325.pdf). The layer has 3 mandatory inputs: tensor with box logits, tensor with confidence predictions and tensor with box coordinates (proposals). It can have 2 additional inputs with additional confidence predictions and box coordinates described in the [article](https://arxiv.org/pdf/1711.06897.pdf). The output tensor contains information about filtered detections described with 7 element tuples: `[batch_id, class_id, confidence, x_1, y_1, x_2, y_2]`. The first tuple with `batch_id` equal to `-1` means end of output.
At each feature map cell, *DetectionOutput* predicts the offsets relative to the default box shapes in the cell, as well as the per-class scores that indicate the presence of a class instance in each of those boxes. Specifically, for each box out of k at a given location, *DetectionOutput* computes class scores and the four offsets relative to the original default box shape. This results in a total of \f$(c + 4)k\f$ filters that are applied around each location in the feature map, yielding \f$(c + 4)kmn\f$ outputs for a *m \* n* feature map.
@@ -63,9 +63,9 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the
* *share_location*
* **Description**: *share_location* is a flag that denotes if bounding boxes are shared among different classes.
- * **Range of values**: 0 or 1
- * **Type**: int
- * **Default value**: 1
+ * **Range of values**: false or true
+ * **Type**: boolean
+ * **Default value**: true
* **Required**: *no*
* *nms_threshold*
@@ -87,35 +87,35 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the
* *clip_after_nms*
* **Description**: *clip_after_nms* flag that denotes whether to perform clip bounding boxes after non-maximum suppression or not.
- * **Range of values**: 0 or 1
- * **Type**: int
- * **Default value**: 0
+ * **Range of values**: false or true
+ * **Type**: boolean
+ * **Default value**: false
* **Required**: *no*
* *clip_before_nms*
* **Description**: *clip_before_nms* flag that denotes whether to perform clip bounding boxes before non-maximum suppression or not.
- * **Range of values**: 0 or 1
- * **Type**: int
- * **Default value**: 0
+ * **Range of values**: false or true
+ * **Type**: boolean
+ * **Default value**: false
* **Required**: *no*
* *decrease_label_id*
* **Description**: *decrease_label_id* flag that denotes how to perform NMS.
* **Range of values**:
- * 0 - perform NMS like in Caffe\*.
- * 1 - perform NMS like in MxNet\*.
- * **Type**: int
- * **Default value**: 0
+ * false - perform NMS like in Caffe\*.
+ * true - perform NMS like in MxNet\*.
+ * **Type**: boolean
+ * **Default value**: false
* **Required**: *no*
* *normalized*
- * **Description**: *normalized* flag that denotes whether input tensors with boxes are normalized. If tensors are not normalized then *input_height* and *input_width* attributes are used to normalize box coordinates.
- * **Range of values**: 0 or 1
- * **Type**: int
- * **Default value**: 0
+ * **Description**: *normalized* flag that denotes whether input tensor with proposal boxes is normalized. If tensor is not normalized then *input_height* and *input_width* attributes are used to normalize box coordinates.
+ * **Range of values**: false or true
+ * **Type**: boolean
+ * **Default value**: false
* **Required**: *no*
* *input_height (input_width)*
@@ -133,21 +133,52 @@ At each feature map cell, *DetectionOutput* predicts the offsets relative to the
* **Type**: float
* **Default value**: 0
* **Required**: *no*
-
+
**Inputs**
-* **1**: 2D input tensor with box logits. Required.
-* **2**: 2D input tensor with class predictions. Required.
-* **3**: 3D input tensor with proposals. Required.
-* **4**: 2D input tensor with additional class predictions information described in the [article](https://arxiv.org/pdf/1711.06897.pdf). Optional.
-* **5**: 2D input tensor with additional box predictions information described in the [article](https://arxiv.org/pdf/1711.06897.pdf). Optional.
+* **1**: 2D input tensor with box logits with shape `[N, num_prior_boxes * num_loc_classes * 4]` and type *T*. `num_loc_classes` is equal to `num_classes` when `share_location` is 0 or it's equal to 1 otherwise. Required.
+* **2**: 2D input tensor with class predictions with shape `[N, num_prior_boxes * num_classes]` and type *T*. Required.
+* **3**: 3D input tensor with proposals with shape `[priors_batch_size, 1, num_prior_boxes * prior_box_size]` or `[priors_batch_size, 2, num_prior_boxes * prior_box_size]`. `priors_batch_size` is either 1 or `N`. Size of the second dimension depends on `variance_encoded_in_target`. If `variance_encoded_in_target` is equal to 0, the second dimension equals to 2 and variance values are provided for each boxes coordinates. If `variance_encoded_in_target` is equal to 1, the second dimension equals to 1 and this tensor contains proposals boxes only. `prior_box_size` is equal to 4 when `normalized` is set to 1 or it's equal to 5 otherwise. Required.
+ Required.
+* **4**: 2D input tensor with additional class predictions information described in the [article](https://arxiv.org/pdf/1711.06897.pdf). Its shape must be equal to `[N, num_prior_boxes * 2]`. Optional.
+* **5**: 2D input tensor with additional box predictions information described in the [article](https://arxiv.org/pdf/1711.06897.pdf). Its shape must be equal to first input tensor shape. Optional.
+
+**Outputs**
+
+* **1**: 4D output tensor with type *T*. Its shape depends on `keep_top_k` or `top_k` being set. It `keep_top_k[0]` is greater than zero, then the shape is `[1, 1, N * keep_top_k[0], 7]`. If `keep_top_k[0]` is set to -1 and `top_k` is greater than zero, then the shape is `[1, 1, N * top_k * num_classes, 7]`. Otherwise, the output shape is equal to `[1, 1, N * num_classes * num_prior_boxes, 7]`.
+
+**Types**
+
+* *T*: any supported floating point type.
+
**Example**
```xml
-
- ...
-
+
+
+
+ 1
+ 5376
+
+
+ 1
+ 2688
+
+
+ 1
+ 2
+ 5376
+
+
+
-```
\ No newline at end of file
+```
diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
index 5b45c48a1d2..33151de3111 100644
--- a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
+++ b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
@@ -931,11 +931,12 @@ void DetectionOutputValidator::parseParams(CNNLayer* layer) {
if (_nms_threshold < 0) {
THROW_IE_EXCEPTION << "nms_threshold parameter of DetectionOutput layer can't be less then zero";
}
- int _keep_top_k = layer->GetParamAsUInt("keep_top_k", -1);
+ int _keep_top_k = layer->GetParamAsInt("keep_top_k", -1);
if (layer->CheckParamPresence("background_label_id"))
- int _background_label_id = layer->GetParamAsUInt("background_label_id", -1);
- if (layer->CheckParamPresence("top_k")) int _top_k = layer->GetParamAsUInt("top_k", -1);
+ int _background_label_id = layer->GetParamAsInt("background_label_id", -1);
+ if (layer->CheckParamPresence("top_k"))
+ int _top_k = layer->GetParamAsInt("top_k", -1);
if (layer->CheckParamPresence("variance_encoded_in_target"))
bool _variance_encoded_in_target = static_cast(layer->GetParamAsUInt("variance_encoded_in_target", 0));
if (layer->CheckParamPresence("num_orient_classes"))
@@ -947,7 +948,7 @@ void DetectionOutputValidator::parseParams(CNNLayer* layer) {
if (layer->CheckParamPresence("confidence_threshold")) {
float _confidence_threshold = layer->GetParamAsFloat("confidence_threshold");
if (_confidence_threshold < 0) {
- THROW_IE_EXCEPTION << "_nms_threshold parameter of DetectionOutput layer can't be less then zero";
+ THROW_IE_EXCEPTION << "_confidence_threshold parameter of DetectionOutput layer can't be less then zero";
}
}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
index e96cf5ee32e..c53d55ffc4a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@@ -278,17 +278,23 @@ public:
}
}
+ const int num_results = outputs[0]->getTensorDesc().getDims()[2];
const int DETECTION_SIZE = outputs[0]->getTensorDesc().getDims()[3];
if (DETECTION_SIZE != 7) {
return NOT_IMPLEMENTED;
}
- auto dst_data_size = N * _keep_top_k * DETECTION_SIZE * sizeof(float);
+ int dst_data_size = 0;
+ if (_keep_top_k > 0)
+ dst_data_size = N * _keep_top_k * DETECTION_SIZE * sizeof(float);
+ else if (_top_k > 0)
+ dst_data_size = N * _top_k * _num_classes * DETECTION_SIZE * sizeof(float);
+ else
+ dst_data_size = N * _num_classes * _num_priors * DETECTION_SIZE * sizeof(float);
if (dst_data_size > outputs[0]->byteSize()) {
return OUT_OF_BOUNDS;
}
-
memset(dst_data, 0, dst_data_size);
int count = 0;
@@ -331,7 +337,7 @@ public:
}
}
- if (count < N*_keep_top_k) {
+ if (count < num_results) {
// marker at end of boxes list
dst_data[count * DETECTION_SIZE + 0] = -1;
}
diff --git a/ngraph/core/include/ngraph/op/detection_output.hpp b/ngraph/core/include/ngraph/op/detection_output.hpp
index ac7972d9b2b..55457bf4f0f 100644
--- a/ngraph/core/include/ngraph/op/detection_output.hpp
+++ b/ngraph/core/include/ngraph/op/detection_output.hpp
@@ -28,11 +28,11 @@ namespace ngraph
int background_label_id = 0;
int top_k = -1;
bool variance_encoded_in_target = false;
- std::vector keep_top_k = {1};
+ std::vector keep_top_k;
std::string code_type = std::string{"caffe.PriorBoxParameter.CORNER"};
bool share_location = true;
float nms_threshold;
- float confidence_threshold = std::numeric_limits::min();
+ float confidence_threshold = 0;
bool clip_after_nms = false;
bool clip_before_nms = false;
bool decrease_label_id = false;
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/detection_output.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/detection_output.hpp
index 9d372b62c63..c2fb331f714 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/detection_output.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/detection_output.hpp
@@ -9,6 +9,7 @@
#include
#include
+#include "ngraph/op/detection_output.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
@@ -37,7 +38,6 @@ namespace ngraph
dataType ymin = dataType(0);
dataType xmax = dataType(0);
dataType ymax = dataType(0);
- dataType size = dataType(0);
};
using LabelBBox = std::map>;
@@ -45,8 +45,11 @@ namespace ngraph
size_t numImages;
size_t priorSize;
size_t numPriors;
+ size_t priorsBatchSize;
size_t numLocClasses;
size_t offset;
+ size_t numResults;
+ size_t outTotalSize;
void GetLocPredictions(const dataType* locData, std::vector& locations)
{
@@ -145,13 +148,12 @@ namespace ngraph
std::vector>& priorBboxes,
std::vector>>& priorVariances)
{
- priorBboxes.resize(numImages);
- priorVariances.resize(numImages);
- for (int n = 0; n < numImages; n++)
+ priorBboxes.resize(priorsBatchSize);
+ priorVariances.resize(priorsBatchSize);
+ int off = attrs.variance_encoded_in_target ? (numPriors * priorSize)
+ : (2 * numPriors * priorSize);
+ for (int n = 0; n < priorsBatchSize; n++)
{
- priorData += attrs.variance_encoded_in_target
- ? n * numPriors * priorSize
- : 2 * n * numPriors * priorSize;
std::vector& currPrBbox = priorBboxes[n];
std::vector>& currPrVar = priorVariances[n];
for (int i = 0; i < numPriors; ++i)
@@ -162,8 +164,6 @@ namespace ngraph
bbox.ymin = priorData[start_idx + 1 + offset];
bbox.xmax = priorData[start_idx + 2 + offset];
bbox.ymax = priorData[start_idx + 3 + offset];
- dataType bbox_size = BBoxSize(bbox);
- bbox.size = bbox_size;
currPrBbox.push_back(bbox);
}
if (!attrs.variance_encoded_in_target)
@@ -172,14 +172,15 @@ namespace ngraph
for (int i = 0; i < numPriors; ++i)
{
int start_idx = i * 4;
- std::vector var;
+ std::vector var(4);
for (int j = 0; j < 4; ++j)
{
- var.push_back(priorVar[start_idx + j]);
+ var[j] = (priorVar[start_idx + j]);
}
currPrVar.push_back(var);
}
}
+ priorData += off;
}
}
@@ -200,22 +201,13 @@ namespace ngraph
priorXmax /= attrs.input_width;
priorYmax /= attrs.input_height;
}
+
if (attrs.code_type == "caffe.PriorBoxParameter.CORNER")
{
- if (attrs.variance_encoded_in_target)
- {
- decodeBbox.xmin = priorXmin + bbox.xmin;
- decodeBbox.ymin = priorYmin + bbox.ymin;
- decodeBbox.xmax = priorXmax + bbox.xmax;
- decodeBbox.ymax = priorYmax + bbox.ymax;
- }
- else
- {
- decodeBbox.xmin = priorXmin + priorVariances[0] * bbox.xmin;
- decodeBbox.ymin = priorYmin + priorVariances[1] * bbox.ymin;
- decodeBbox.xmax = priorXmax + priorVariances[2] * bbox.xmax;
- decodeBbox.ymax = priorYmax + priorVariances[3] * bbox.ymax;
- }
+ decodeBbox.xmin = priorXmin + priorVariances[0] * bbox.xmin;
+ decodeBbox.ymin = priorYmin + priorVariances[1] * bbox.ymin;
+ decodeBbox.xmax = priorXmax + priorVariances[2] * bbox.xmax;
+ decodeBbox.ymax = priorYmax + priorVariances[3] * bbox.ymax;
}
else if (attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE")
{
@@ -225,41 +217,60 @@ namespace ngraph
dataType priorCenterY = (priorYmin + priorYmax) / 2;
dataType decodeBboxCenterX, decodeBboxCenterY;
dataType decodeBboxWidth, decodeBboxHeight;
- if (attrs.variance_encoded_in_target)
- {
- decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
- decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
- decodeBboxWidth = std::exp(bbox.xmax) * priorWidth;
- decodeBboxHeight = std::exp(bbox.ymax) * priorHeight;
- }
- else
- {
- decodeBboxCenterX =
- priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
- decodeBboxCenterY =
- priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
- decodeBboxWidth = std::exp(priorVariances[2] * bbox.xmax) * priorWidth;
- decodeBboxHeight =
- std::exp(priorVariances[3] * bbox.ymax) * priorHeight;
- }
+ decodeBboxCenterX =
+ priorVariances[0] * bbox.xmin * priorWidth + priorCenterX;
+ decodeBboxCenterY =
+ priorVariances[1] * bbox.ymin * priorHeight + priorCenterY;
+ decodeBboxWidth = std::exp(priorVariances[2] * bbox.xmax) * priorWidth;
+ decodeBboxHeight = std::exp(priorVariances[3] * bbox.ymax) * priorHeight;
decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
decodeBbox.ymax = decodeBboxCenterY + decodeBboxHeight / 2;
}
- if (attrs.clip_before_nms)
+ }
+
+ void DecodeBBox(const NormalizedBBox& priorBboxes,
+ const NormalizedBBox& bbox,
+ NormalizedBBox& decodeBbox)
+ {
+ dataType priorXmin = priorBboxes.xmin;
+ dataType priorYmin = priorBboxes.ymin;
+ dataType priorXmax = priorBboxes.xmax;
+ dataType priorYmax = priorBboxes.ymax;
+
+ if (!attrs.normalized)
{
- decodeBbox.xmin =
- std::max(0, std::min(1, decodeBbox.xmin));
- decodeBbox.ymin =
- std::max(0, std::min(1, decodeBbox.ymin));
- decodeBbox.xmax =
- std::max(0, std::min(1, decodeBbox.xmax));
- decodeBbox.ymax =
- std::max(0, std::min(1, decodeBbox.ymax));
+ priorXmin /= attrs.input_width;
+ priorYmin /= attrs.input_height;
+ priorXmax /= attrs.input_width;
+ priorYmax /= attrs.input_height;
+ }
+
+ if (attrs.code_type == "caffe.PriorBoxParameter.CORNER")
+ {
+ decodeBbox.xmin = priorXmin + bbox.xmin;
+ decodeBbox.ymin = priorYmin + bbox.ymin;
+ decodeBbox.xmax = priorXmax + bbox.xmax;
+ decodeBbox.ymax = priorYmax + bbox.ymax;
+ }
+ else if (attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE")
+ {
+ dataType priorWidth = priorXmax - priorXmin;
+ dataType priorHeight = priorYmax - priorYmin;
+ dataType priorCenterX = (priorXmin + priorXmax) / 2;
+ dataType priorCenterY = (priorYmin + priorYmax) / 2;
+ dataType decodeBboxCenterX, decodeBboxCenterY;
+ dataType decodeBboxWidth, decodeBboxHeight;
+ decodeBboxCenterX = bbox.xmin * priorWidth + priorCenterX;
+ decodeBboxCenterY = bbox.ymin * priorHeight + priorCenterY;
+ decodeBboxWidth = std::exp(bbox.xmax) * priorWidth;
+ decodeBboxHeight = std::exp(bbox.ymax) * priorHeight;
+ decodeBbox.xmin = decodeBboxCenterX - decodeBboxWidth / 2;
+ decodeBbox.ymin = decodeBboxCenterY - decodeBboxHeight / 2;
+ decodeBbox.xmax = decodeBboxCenterX + decodeBboxWidth / 2;
+ decodeBbox.ymax = decodeBboxCenterY + decodeBboxHeight / 2;
}
- dataType bboxSize = BBoxSize(decodeBbox);
- decodeBbox.size = bboxSize;
}
void DecodeBBoxes(const std::vector& priorBboxes,
@@ -271,7 +282,27 @@ namespace ngraph
for (int i = 0; i < numBboxes; ++i)
{
NormalizedBBox decodeBbox;
- DecodeBBox(priorBboxes[i], priorVariances[i], labelLocPreds[i], decodeBbox);
+
+ if (attrs.variance_encoded_in_target)
+ {
+ DecodeBBox(priorBboxes[i], labelLocPreds[i], decodeBbox);
+ }
+ else
+ {
+ DecodeBBox(
+ priorBboxes[i], priorVariances[i], labelLocPreds[i], decodeBbox);
+ }
+ if (attrs.clip_before_nms)
+ {
+ decodeBbox.xmin =
+ std::max(0, std::min(1, decodeBbox.xmin));
+ decodeBbox.ymin =
+ std::max(0, std::min(1, decodeBbox.ymin));
+ decodeBbox.xmax =
+ std::max(0, std::min(1, decodeBbox.xmax));
+ decodeBbox.ymax =
+ std::max(0, std::min(1, decodeBbox.ymax));
+ }
decodeBboxes.push_back(decodeBbox);
}
}
@@ -286,12 +317,19 @@ namespace ngraph
for (int i = 0; i < numImages; ++i)
{
LabelBBox& decodeBboxesImage = decodeBboxes[i];
- const std::vector& currPrBbox = priorBboxes[i];
- const std::vector>& currPrVar = priorVariances[i];
+ int pboxIdx = i;
+ if (priorBboxes.size() == 1)
+ {
+ pboxIdx = 0;
+ }
+ const std::vector& currPrBbox = priorBboxes[pboxIdx];
+ const std::vector>& currPrVar =
+ priorVariances[pboxIdx];
for (int c = 0; c < numLocClasses; ++c)
{
int label = attrs.share_location ? -1 : c;
- if (label == attrs.background_label_id)
+ if (attrs.background_label_id > -1 &&
+ label == attrs.background_label_id)
{
continue;
}
@@ -319,7 +357,8 @@ namespace ngraph
for (int c = 0; c < numLocClasses; ++c)
{
int label = attrs.share_location ? -1 : c;
- if (label == attrs.background_label_id)
+ if (attrs.background_label_id > -1 &&
+ label == attrs.background_label_id)
{
continue;
}
@@ -360,6 +399,7 @@ namespace ngraph
std::stable_sort(
scoreIndexVec.begin(), scoreIndexVec.end(), SortScorePairDescend);
+
if (topK > -1 && topK < scoreIndexVec.size())
{
scoreIndexVec.resize(topK);
@@ -391,6 +431,7 @@ namespace ngraph
{
NormalizedBBox intersectBbox;
IntersectBBox(bbox1, bbox2, intersectBbox);
+
dataType intersectWidth, intersectHeight;
intersectWidth = intersectBbox.xmax - intersectBbox.xmin;
intersectHeight = intersectBbox.ymax - intersectBbox.ymin;
@@ -399,7 +440,6 @@ namespace ngraph
dataType intersect_size = intersectWidth * intersectHeight;
dataType bbox1_size = BBoxSize(bbox1);
dataType bbox2_size = BBoxSize(bbox2);
-
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
}
else
@@ -423,6 +463,7 @@ namespace ngraph
{
const int kept_idx = indices[k];
dataType overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
+
if (overlap > attrs.nms_threshold)
{
keep = false;
@@ -448,6 +489,8 @@ namespace ngraph
int id = 0;
for (int c = 1; c < attrs.num_classes; c++)
{
+ if (attrs.background_label_id > -1 && c == attrs.background_label_id)
+ continue;
dataType temp = confScores.at(c)[p];
if (temp > conf)
{
@@ -497,15 +540,19 @@ namespace ngraph
public:
referenceDetectionOutput(const ngraph::op::DetectionOutputAttrs& _attrs,
const ngraph::Shape& locShape,
- const ngraph::Shape& priorsShape)
+ const ngraph::Shape& priorsShape,
+ const ngraph::Shape& outShape)
: attrs(_attrs)
{
numImages = locShape[0];
priorSize = _attrs.normalized ? 4 : 5;
offset = _attrs.normalized ? 0 : 1;
numPriors = priorsShape[2] / priorSize;
+ priorsBatchSize = priorsShape[0];
numLocClasses =
_attrs.share_location ? 1 : static_cast(_attrs.num_classes);
+ numResults = outShape[2];
+ outTotalSize = shape_size(outShape);
}
void run(const dataType* _location,
@@ -515,6 +562,7 @@ namespace ngraph
const dataType* _armLocation,
dataType* result)
{
+ std::memset(result, 0, outTotalSize * sizeof(dataType));
bool withAddBoxPred = _armConfidence != nullptr && _armLocation != nullptr;
std::vector armLocPreds;
if (withAddBoxPred)
@@ -566,6 +614,7 @@ namespace ngraph
if (confScores.find(c) == confScores.end())
continue;
const std::vector& scores = confScores.find(c)->second;
+
int label = attrs.share_location ? -1 : c;
if (decodeBboxesImage.find(label) == decodeBboxesImage.end())
continue;
@@ -666,7 +715,7 @@ namespace ngraph
}
}
}
- if (count < numImages * attrs.keep_top_k[0])
+ if (count < numResults)
{
result[count * 7 + 0] = -1;
}
diff --git a/ngraph/core/src/op/detection_output.cpp b/ngraph/core/src/op/detection_output.cpp
index 86a107deb5d..e0471495bb0 100644
--- a/ngraph/core/src/op/detection_output.cpp
+++ b/ngraph/core/src/op/detection_output.cpp
@@ -45,16 +45,223 @@ op::DetectionOutput::DetectionOutput(const Output& box_logits,
void op::DetectionOutput::validate_and_infer_types()
{
- if (get_input_partial_shape(0).is_static())
+ NODE_VALIDATION_CHECK(
+ this, m_attrs.num_classes > 0, "Number of classes must be greater than zero");
+
+ NODE_VALIDATION_CHECK(
+ this, m_attrs.keep_top_k.size() > 0, "keep_top_k attribute must be provided");
+
+ NODE_VALIDATION_CHECK(this,
+ m_attrs.code_type == "caffe.PriorBoxParameter.CORNER" ||
+ m_attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE",
+ "code_type must be either \"caffe.PriorBoxParameter.CORNER\" or "
+ "\"caffe.PriorBoxParameter.CENTER_SIZE\"");
+
+ auto box_logits_et = get_input_element_type(0);
+ NODE_VALIDATION_CHECK(this,
+ box_logits_et.is_real(),
+ "Box logits' data type must be floating point. Got " +
+ box_logits_et.get_type_name());
+ auto class_preds_et = get_input_element_type(1);
+ NODE_VALIDATION_CHECK(this,
+ class_preds_et == box_logits_et,
+ "Class predictions' data type must be the same as box logits type (" +
+ box_logits_et.get_type_name() + "). Got " +
+ class_preds_et.get_type_name());
+ auto proposals_et = get_input_element_type(2);
+ NODE_VALIDATION_CHECK(this,
+ proposals_et.is_real(),
+ "Proposals' data type must be floating point. Got " +
+ proposals_et.get_type_name());
+
+ const PartialShape& box_logits_pshape = get_input_partial_shape(0);
+ const PartialShape& class_preds_pshape = get_input_partial_shape(1);
+ const PartialShape& proposals_pshape = get_input_partial_shape(2);
+
+ int num_loc_classes = m_attrs.share_location ? 1 : m_attrs.num_classes;
+ int prior_box_size = m_attrs.normalized ? 4 : 5;
+
+ Dimension num_images = Dimension::dynamic();
+ Dimension num_prior_boxes = Dimension::dynamic();
+ if (box_logits_pshape.rank().is_static())
{
- auto box_logits_shape = get_input_partial_shape(0).to_shape();
- set_output_type(
- 0, element::f32, Shape{1, 1, m_attrs.keep_top_k[0] * box_logits_shape[0], 7});
+ NODE_VALIDATION_CHECK(this,
+ box_logits_pshape.rank().get_length() == 2,
+ "Box logits rank must be 2. Got " +
+ std::to_string(box_logits_pshape.rank().get_length()));
+ num_images = box_logits_pshape[0];
+ if (box_logits_pshape[1].is_static())
+ {
+ NODE_VALIDATION_CHECK(
+ this,
+ (box_logits_pshape[1].get_length() % (num_loc_classes * 4)) == 0,
+ "Box logits' second dimension must be a multiply of num_loc_classes * 4 (" +
+ std::to_string(num_loc_classes * 4) + "). Current value is: ",
+ box_logits_pshape[1].get_length(),
+ ".");
+ num_prior_boxes = box_logits_pshape[1].get_length() / (num_loc_classes * 4);
+ }
+ }
+ if (class_preds_pshape.rank().is_static())
+ {
+ NODE_VALIDATION_CHECK(this,
+ class_preds_pshape.rank().get_length() == 2,
+ "Class predictions rank must be 2. Got " +
+ std::to_string(class_preds_pshape.rank().get_length()));
+ if (num_images.is_dynamic() && class_preds_pshape[0].is_static())
+ {
+ num_images = class_preds_pshape[0];
+ }
+ else
+ {
+ NODE_VALIDATION_CHECK(
+ this,
+ class_preds_pshape[0].compatible(num_images),
+ "Class predictions' first dimension is not compatible with batch size.");
+ }
+ if (class_preds_pshape[1].is_static())
+ {
+ if (num_prior_boxes.is_dynamic())
+ {
+ NODE_VALIDATION_CHECK(
+ this,
+ class_preds_pshape[1].get_length() % m_attrs.num_classes == 0,
+ "Class predictions' second dimension must be a multiply of num_classes (" +
+ std::to_string(m_attrs.num_classes) + "). Current value is: ",
+ class_preds_pshape[1].get_length(),
+ ".");
+ num_prior_boxes = class_preds_pshape[1].get_length() / m_attrs.num_classes;
+ }
+ else
+ {
+ int num_prior_boxes_val = num_prior_boxes.get_length();
+ NODE_VALIDATION_CHECK(
+ this,
+ class_preds_pshape[1].get_length() == num_prior_boxes_val * m_attrs.num_classes,
+ "Class predictions' second dimension must be equal to num_prior_boxes * "
+ "num_classes (" +
+ std::to_string(num_prior_boxes_val * m_attrs.num_classes) +
+ "). Current value is: ",
+ class_preds_pshape[1].get_length(),
+ ".");
+ }
+ }
+ }
+ if (proposals_pshape.rank().is_static())
+ {
+ NODE_VALIDATION_CHECK(this,
+ proposals_pshape.rank().get_length() == 3,
+ "Proposals rank must be 3. Got " +
+ std::to_string(proposals_pshape.rank().get_length()));
+ if (num_images.is_static() && proposals_pshape[0].is_static())
+ {
+ int64_t proposals_1st_dim = proposals_pshape[0].get_length();
+ int64_t num_images_val = num_images.get_length();
+ NODE_VALIDATION_CHECK(
+ this,
+ proposals_1st_dim == 1 || proposals_1st_dim == num_images_val,
+ "Proposals' first dimension is must be equal to either batch size (" +
+ std::to_string(num_images_val) + ") or 1. Got: " +
+ std::to_string(proposals_1st_dim) + ".");
+ }
+ if (proposals_pshape[1].is_static())
+ {
+ size_t proposals_expected_2nd_dim = m_attrs.variance_encoded_in_target ? 1 : 2;
+ NODE_VALIDATION_CHECK(this,
+ proposals_pshape[1].compatible(proposals_expected_2nd_dim),
+ "Proposals' second dimension is mismatched. Current value is: ",
+ proposals_pshape[1].get_length(),
+ ", expected: ",
+ proposals_expected_2nd_dim,
+ ".");
+ }
+ if (proposals_pshape[2].is_static())
+ {
+ if (num_prior_boxes.is_dynamic())
+ {
+ NODE_VALIDATION_CHECK(
+ this,
+ proposals_pshape[2].get_length() % prior_box_size == 0,
+ "Proposals' third dimension must be a multiply of prior_box_size (" +
+ std::to_string(prior_box_size) + "). Current value is: ",
+ proposals_pshape[2].get_length(),
+ ".");
+ num_prior_boxes = proposals_pshape[2].get_length() / prior_box_size;
+ }
+ else
+ {
+ int num_prior_boxes_val = num_prior_boxes.get_length();
+ NODE_VALIDATION_CHECK(this,
+ proposals_pshape[2].get_length() ==
+ num_prior_boxes_val * prior_box_size,
+ "Proposals' third dimension must be equal to num_prior_boxes "
+ "* prior_box_size (" +
+ std::to_string(num_prior_boxes_val * prior_box_size) +
+ "). Current value is: ",
+ proposals_pshape[2].get_length(),
+ ".");
+ }
+ }
+ }
+
+ if (get_input_size() > 3)
+ {
+ auto aux_class_preds_et = get_input_element_type(3);
+ NODE_VALIDATION_CHECK(this,
+ aux_class_preds_et == class_preds_et,
+ "Additional class predictions' data type must be the same as class "
+ "predictions data type (" +
+ class_preds_et.get_type_name() + "). Got " +
+ aux_class_preds_et.get_type_name());
+ auto aux_box_preds_et = get_input_element_type(4);
+ NODE_VALIDATION_CHECK(
+ this,
+ aux_box_preds_et == box_logits_et,
+ "Additional box predictions' data type must be the same as box logits data type (" +
+ box_logits_et.get_type_name() + "). Got " + aux_box_preds_et.get_type_name());
+
+ const PartialShape& aux_class_preds_pshape = get_input_partial_shape(3);
+ const PartialShape& aux_box_preds_pshape = get_input_partial_shape(4);
+ if (aux_class_preds_pshape.rank().is_static())
+ {
+ NODE_VALIDATION_CHECK(this,
+ aux_class_preds_pshape[0].compatible(num_images),
+ "Additional class predictions' first dimension must be "
+ "compatible with batch size.");
+ if (num_prior_boxes.is_static())
+ {
+ int num_prior_boxes_val = num_prior_boxes.get_length();
+ NODE_VALIDATION_CHECK(
+ this,
+ aux_class_preds_pshape[1].get_length() == num_prior_boxes_val * 2,
+ "Additional class predictions' second dimension must be equal to "
+ "num_prior_boxes * 2 (" +
+ std::to_string(num_prior_boxes_val * 2) + "). Got " +
+ std::to_string(aux_class_preds_pshape[1].get_length()) + ".");
+ }
+ }
+ NODE_VALIDATION_CHECK(
+ this,
+ aux_box_preds_pshape.compatible(box_logits_pshape),
+ "Additional box predictions' shape must be compatible with box logits shape.");
+ }
+
+ std::vector output_shape{1, 1};
+ if (m_attrs.keep_top_k[0] > 0)
+ {
+ output_shape.push_back(num_images * m_attrs.keep_top_k[0]);
+ }
+ else if (m_attrs.top_k > 0)
+ {
+ output_shape.push_back(num_images * m_attrs.top_k * m_attrs.num_classes);
}
else
{
- set_output_type(0, element::f32, PartialShape::dynamic());
+ output_shape.push_back(num_images * num_prior_boxes * m_attrs.num_classes);
}
+ output_shape.push_back(7);
+
+ set_output_type(0, box_logits_et, output_shape);
}
shared_ptr op::DetectionOutput::clone_with_new_inputs(const OutputVector& new_args) const
diff --git a/ngraph/python/tests/test_ngraph/test_create_op.py b/ngraph/python/tests/test_ngraph/test_create_op.py
index 4a3b6d0eeef..c403c8ff022 100644
--- a/ngraph/python/tests/test_ngraph/test_create_op.py
+++ b/ngraph/python/tests/test_ngraph/test_create_op.py
@@ -932,11 +932,11 @@ def test_detection_output(int_dtype, fp_dtype):
"nms_threshold": fp_dtype(0.645),
}
- box_logits = ng.parameter([4, 1, 5, 5], fp_dtype, "box_logits")
- class_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "class_preds")
- proposals = ng.parameter([2, 1, 4, 5], fp_dtype, "proposals")
- aux_class_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "aux_class_preds")
- aux_box_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "aux_box_preds")
+ box_logits = ng.parameter([4, 8], fp_dtype, "box_logits")
+ class_preds = ng.parameter([4, 170], fp_dtype, "class_preds")
+ proposals = ng.parameter([4, 2, 10], fp_dtype, "proposals")
+ aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds")
+ aux_box_preds = ng.parameter([4, 8], fp_dtype, "aux_box_preds")
node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds)
diff --git a/ngraph/python/tests/test_ngraph/test_dyn_attributes.py b/ngraph/python/tests/test_ngraph/test_dyn_attributes.py
index c56a7ab7837..c4ee4c427e4 100644
--- a/ngraph/python/tests/test_ngraph/test_dyn_attributes.py
+++ b/ngraph/python/tests/test_ngraph/test_dyn_attributes.py
@@ -71,7 +71,7 @@ def test_dynamic_get_attribute_value(int_dtype, fp_dtype):
"top_k": int_dtype(16),
"variance_encoded_in_target": True,
"keep_top_k": np.array([64, 32, 16, 8], dtype=int_dtype),
- "code_type": "pytorch.some_parameter_name",
+ "code_type": "caffe.PriorBoxParameter.CENTER_SIZE",
"share_location": False,
"nms_threshold": fp_dtype(0.645),
"confidence_threshold": fp_dtype(0.111),
@@ -84,11 +84,11 @@ def test_dynamic_get_attribute_value(int_dtype, fp_dtype):
"objectness_score": fp_dtype(0.77),
}
- box_logits = ng.parameter([4, 1, 5, 5], fp_dtype, "box_logits")
- class_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "class_preds")
- proposals = ng.parameter([2, 1, 4, 5], fp_dtype, "proposals")
- aux_class_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "aux_class_preds")
- aux_box_preds = ng.parameter([2, 1, 4, 5], fp_dtype, "aux_box_preds")
+ box_logits = ng.parameter([4, 680], fp_dtype, "box_logits")
+ class_preds = ng.parameter([4, 170], fp_dtype, "class_preds")
+ proposals = ng.parameter([4, 1, 8], fp_dtype, "proposals")
+ aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds")
+ aux_box_preds = ng.parameter([4, 680], fp_dtype, "aux_box_preds")
node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds)
@@ -97,7 +97,7 @@ def test_dynamic_get_attribute_value(int_dtype, fp_dtype):
assert node.get_top_k() == int_dtype(16)
assert node.get_variance_encoded_in_target()
assert np.all(np.equal(node.get_keep_top_k(), np.array([64, 32, 16, 8], dtype=int_dtype)))
- assert node.get_code_type() == "pytorch.some_parameter_name"
+ assert node.get_code_type() == "caffe.PriorBoxParameter.CENTER_SIZE"
assert not node.get_share_location()
assert np.isclose(node.get_nms_threshold(), fp_dtype(0.645))
assert np.isclose(node.get_confidence_threshold(), fp_dtype(0.111))
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 651957fa1e6..43d79f9bc57 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -117,6 +117,7 @@ set(SRC
type_prop/ctc_loss.cpp
type_prop/deformable_convolution.cpp
type_prop/deformable_psroi_pooling.cpp
+ type_prop/detection_output.cpp
type_prop/depth_to_space.cpp
type_prop/dyn_reshape.cpp
type_prop/strided_slice.cpp
@@ -280,6 +281,7 @@ set(MULTI_TEST_SRC
backend/cosh.in.cpp
backend/ctc_greedy_decoder.in.cpp
backend/cum_sum.in.cpp
+ backend/detection_output.in.cpp
backend/divide.in.cpp
backend/dyn_reshape.in.cpp
backend/strided_slice.in.cpp
diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp
index f015be27f03..3772615337b 100644
--- a/ngraph/test/attributes.cpp
+++ b/ngraph/test/attributes.cpp
@@ -1473,11 +1473,11 @@ TEST(attributes, interpolate_op)
TEST(attributes, detection_output_op)
{
FactoryRegistry::get().register_factory();
- const auto box_logits = make_shared(element::f32, Shape{1, 3, 32, 32});
- const auto class_preds = make_shared(element::f32, Shape{32});
- const auto proposals = make_shared(element::f32, Shape{128, 2});
- const auto aux_class_preds = make_shared(element::f32, Shape{16});
- const auto aux_box_pred = make_shared(element::f32, Shape{32, 2});
+ const auto box_logits = make_shared(element::f32, Shape{1, 2 * 1 * 4});
+ const auto class_preds = make_shared(element::f32, Shape{1, 2 * 32});
+ const auto proposals = make_shared(element::f32, Shape{1, 2, 2 * 4});
+ const auto aux_class_preds = make_shared(element::f32, Shape{1, 2 * 2});
+ const auto aux_box_pred = make_shared(element::f32, Shape{1, 2 * 1 * 4});
op::DetectionOutputAttrs attrs;
attrs.num_classes = 32;
diff --git a/ngraph/test/backend/detection_output.in.cpp b/ngraph/test/backend/detection_output.in.cpp
new file mode 100644
index 00000000000..103ca24d6f0
--- /dev/null
+++ b/ngraph/test/backend/detection_output.in.cpp
@@ -0,0 +1,872 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// clang-format off
+#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#endif
+
+#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#endif
+// clang-format on
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+static string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_3_inputs)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 3;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = true;
+ attrs.keep_top_k = {2};
+ attrs.code_type = "caffe.PriorBoxParameter.CORNER";
+ attrs.share_location = false;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 2;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ 1, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto f = make_shared(make_shared(loc, conf, prior_boxes, attrs),
+ ParameterVector{loc, conf, prior_boxes});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0, class 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 0, class 1
+ 0.3,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.42,
+ 0.66,
+ // batch 0, class 2
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.33,
+ 0.44,
+ // batch 1, class 0
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ // batch 1, class 1
+ 0.1,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.1,
+ 0.1,
+ 0.12,
+ 0.34,
+ // batch 1, class 2
+ 0.25,
+ 0.11,
+ 0.4,
+ 0.32,
+ 0.2,
+ 0.12,
+ 0.38,
+ 0.24,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ 0,
+ 0.2,
+ // batch 1
+ 0.7,
+ 0.8,
+ 0.42,
+ 0.33,
+ 0.81,
+ 0.2,
+ });
+ test_case.add_input({
+ // prior box 0
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ // prior box 1
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ });
+ Shape output_shape{1, 1, num_images * static_cast(attrs.keep_top_k[0]), 7};
+ test_case.add_expected_output(
+ output_shape, {0, 0, 0.7, 0.2, 0.4, 0.52, 1, 0, 1, 0.9, 0, 0.6, 0.3, 0.35,
+ 1, 1, 0.81, 0.25, 0.41, 0.5, 0.67, 1, 1, 0.8, 0.1, 0.55, 0.3, 0.45});
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_3_inputs_share_location)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 3;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = true;
+ attrs.keep_top_k = {2};
+ attrs.code_type = "caffe.PriorBoxParameter.CORNER";
+ attrs.share_location = true;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 2;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto f = make_shared(make_shared(loc, conf, prior_boxes, attrs),
+ ParameterVector{loc, conf, prior_boxes});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 1
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ 0,
+ 0.2,
+ // batch 1
+ 0.7,
+ 0.8,
+ 0.42,
+ 0.33,
+ 0.81,
+ 0.2,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ // batch 1
+ 0.33,
+ 0.2,
+ 0.52,
+ 0.37,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ });
+ Shape output_shape{1, 1, num_images * static_cast(attrs.keep_top_k[0]), 7};
+ test_case.add_expected_output(output_shape,
+ {
+ 0, 0, 0.7, 0, 0.4, 0.3, 0.5, 0, 1, 0.9,
+ 0.1, 0.6, 0.3, 0.4, 1, 1, 0.81, 0.32, 0.15, 0.52,
+ 0.61, 1, 1, 0.8, 0.53, 0.3, 0.92, 0.57,
+
+ });
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_3_inputs_normalized)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 3;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = true;
+ attrs.keep_top_k = {2};
+ attrs.code_type = "caffe.PriorBoxParameter.CORNER";
+ attrs.share_location = true;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 2;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto f = make_shared(make_shared(loc, conf, prior_boxes, attrs),
+ ParameterVector{loc, conf, prior_boxes});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 1
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ 0,
+ 0.2,
+ // batch 1
+ 0.7,
+ 0.8,
+ 0.42,
+ 0.33,
+ 0.81,
+ 0.2,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ // batch 1
+ 0.33,
+ 0.2,
+ 0.52,
+ 0.37,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ });
+ Shape output_shape{1, 1, num_images * static_cast(attrs.keep_top_k[0]), 7};
+ test_case.add_expected_output(output_shape,
+ {
+ 0, 0, 0.7, 0, 0.4, 0.3, 0.5, 0, 1, 0.9,
+ 0.1, 0.6, 0.3, 0.4, 1, 1, 0.81, 0.32, 0.15, 0.52,
+ 0.61, 1, 1, 0.8, 0.53, 0.3, 0.92, 0.57,
+
+ });
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_3_inputs_keep_all_bboxes)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 2;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = false;
+ attrs.keep_top_k = {-1};
+ attrs.code_type = "caffe.PriorBoxParameter.CORNER";
+ attrs.share_location = false;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 3;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto f = make_shared(make_shared(loc, conf, prior_boxes, attrs),
+ ParameterVector{loc, conf, prior_boxes});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0, class 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 0, class 1
+ 0.3,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.42,
+ 0.66,
+ // batch 1, class 0
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.33,
+ 0.44,
+ // batch 1, class 1
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ // batch 2, class 0
+ 0.1,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.1,
+ 0.1,
+ 0.12,
+ 0.34,
+ // batch 2, class 1
+ 0.25,
+ 0.11,
+ 0.4,
+ 0.32,
+ 0.2,
+ 0.12,
+ 0.38,
+ 0.24,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ // batch 1
+ 0.7,
+ 0.8,
+ 0.42,
+ 0.33,
+ // batch 1
+ 0.1,
+ 0.2,
+ 0.32,
+ 0.43,
+ });
+ test_case.add_input({
+ // batch 0 priors
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ // batch 0 variances
+ 0.12,
+ 0.11,
+ 0.32,
+ 0.02,
+ 0.02,
+ 0.20,
+ 0.09,
+ 0.71,
+ // batch 1 priors
+ 0.33,
+ 0.2,
+ 0.52,
+ 0.37,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ // batch 1 variances
+ 0.01,
+ 0.07,
+ 0.12,
+ 0.13,
+ 0.41,
+ 0.33,
+ 0.2,
+ 0.1,
+ // batch 2 priors
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ // batch 2 variances
+ 0.32,
+ 0.02,
+ 0.13,
+ 0.41,
+ 0.33,
+ 0.2,
+ 0.02,
+ 0.20,
+ });
+ Shape output_shape{1, 1, num_images * attrs.num_classes * num_prior_boxes, 7};
+ test_case.add_expected_output(
+ output_shape,
+ {
+
+ 0, 0, 0.4, 0.006, 0.34, 0.145, 0.563, 0, 1, 0.9, 0, 0.511, 0.164, 0.203,
+ 0, 1, 0.7, 0.004, 0.32, 0.1378, 0.8186, 1, 0, 0.7, 0.3305, 0.207, 0.544, 0.409,
+ 1, 0, 0.42, 0.302, 0.133, 0.4, 0.38, 1, 1, 0.8, 0.332, 0.207, 0.5596, 0.4272,
+ 1, 1, 0.33, 0.261, 0.1165, 0.36, 0.385, 2, 0, 0.32, 0.3025, 0.122, 0.328, 0.424,
+ 2, 1, 0.43, 0.286, 0.124, 0.3276, 0.408, -1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ });
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_3_inputs_center_size)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 3;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = true;
+ attrs.keep_top_k = {2};
+ attrs.code_type = "caffe.PriorBoxParameter.CENTER_SIZE";
+ attrs.share_location = false;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 2;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto f = make_shared(make_shared(loc, conf, prior_boxes, attrs),
+ ParameterVector{loc, conf, prior_boxes});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0, class 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 0, class 1
+ 0.3,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.42,
+ 0.66,
+ // batch 0, class 2
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.33,
+ 0.44,
+ // batch 1, class 0
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ // batch 1, class 1
+ 0.1,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.1,
+ 0.1,
+ 0.12,
+ 0.34,
+ // batch 1, class 2
+ 0.25,
+ 0.11,
+ 0.4,
+ 0.32,
+ 0.2,
+ 0.12,
+ 0.38,
+ 0.24,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ 0,
+ 0.2,
+ // batch 1
+ 0.7,
+ 0.8,
+ 0.42,
+ 0.33,
+ 0.81,
+ 0.2,
+ });
+ test_case.add_input({
+ // batch 0
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ // batch 1
+ 0.33,
+ 0.2,
+ 0.52,
+ 0.37,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ });
+ Shape output_shape{1, 1, num_images * static_cast(attrs.keep_top_k[0]), 7};
+ test_case.add_expected_output(
+ output_shape,
+ {
+ 0, 0, 0.7, 0, 0.28163019, 0.14609808, 0.37836978,
+ 0, 1, 0.9, 0, 0.49427515, 0.11107014, 0.14572485,
+ 1, 1, 0.81, 0.22040875, 0.079573378, 0.36959124, 0.4376266,
+ 1, 1, 0.8, 0.32796675, 0.18435785, 0.56003326, 0.40264216,
+ });
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, detection_output_5_inputs)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.num_classes = 2;
+ attrs.background_label_id = -1;
+ attrs.top_k = -1;
+ attrs.variance_encoded_in_target = true;
+ attrs.keep_top_k = {2};
+ attrs.code_type = "caffe.PriorBoxParameter.CORNER";
+ attrs.share_location = false;
+ attrs.nms_threshold = 0.5;
+ attrs.confidence_threshold = 0.3;
+ attrs.clip_after_nms = false;
+ attrs.clip_before_nms = true;
+ attrs.decrease_label_id = false;
+ attrs.normalized = true;
+ attrs.input_height = 0;
+ attrs.input_width = 0;
+ attrs.objectness_score = 0.6;
+
+ size_t num_prior_boxes = 2;
+ size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes;
+ size_t prior_box_size = attrs.normalized ? 4 : 5;
+ size_t num_images = 2;
+ Shape loc_shape{num_images, num_prior_boxes * num_loc_classes * prior_box_size};
+ Shape conf_shape{num_images, num_prior_boxes * attrs.num_classes};
+ Shape prior_boxes_shape{
+ num_images, attrs.variance_encoded_in_target ? 1UL : 2UL, num_prior_boxes * prior_box_size};
+
+ auto loc = make_shared(element::f32, loc_shape);
+ auto conf = make_shared(element::f32, conf_shape);
+ auto prior_boxes = make_shared(element::f32, prior_boxes_shape);
+ auto aux_loc = make_shared(element::f32, loc_shape);
+ auto aux_conf = make_shared(element::f32, conf_shape);
+ auto f = make_shared(
+ make_shared(loc, conf, prior_boxes, aux_conf, aux_loc, attrs),
+ ParameterVector{loc, conf, prior_boxes, aux_conf, aux_loc});
+
+ auto test_case = test::TestCase(f);
+ // locations
+ test_case.add_input({
+ // batch 0, class 0
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.0,
+ 0.1,
+ 0.2,
+ 0.15,
+ // batch 0, class 1
+ 0.3,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.42,
+ 0.66,
+ // batch 1, class 0
+ 0.2,
+ 0.1,
+ 0.4,
+ 0.2,
+ 0.1,
+ 0.05,
+ 0.2,
+ 0.25,
+ // batch 1, class 1
+ 0.1,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.1,
+ 0.1,
+ 0.12,
+ 0.34,
+ });
+ // confidence
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.9,
+ 0.4,
+ 0.7,
+ // batch 1
+ 0.42,
+ 0.33,
+ 0.81,
+ 0.2,
+ });
+ // prior boxes
+ test_case.add_input({
+ // batch 0
+ 0.0,
+ 0.5,
+ 0.1,
+ 0.2,
+ 0.0,
+ 0.3,
+ 0.1,
+ 0.35,
+ // batch 1
+ 0.33,
+ 0.2,
+ 0.52,
+ 0.37,
+ 0.22,
+ 0.1,
+ 0.32,
+ 0.36,
+ });
+ // aux conf
+ test_case.add_input({
+ // batch 0
+ 0.1,
+ 0.3,
+ 0.5,
+ 0.8,
+ // batch 1
+ 0.5,
+ 0.8,
+ 0.01,
+ 0.1,
+ });
+ // aux locations
+ test_case.add_input({
+ // batch 0, class 0
+ 0.1,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.1,
+ 0.1,
+ 0.12,
+ 0.34,
+ // batch 0, class 1
+ 0.25,
+ 0.11,
+ 0.4,
+ 0.32,
+ 0.2,
+ 0.12,
+ 0.38,
+ 0.24,
+ // batch 1, class 0
+ 0.3,
+ 0.2,
+ 0.5,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.42,
+ 0.66,
+ // batch 1, class 1
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.3,
+ 0.2,
+ 0.1,
+ 0.33,
+ 0.44,
+ });
+
+ Shape output_shape{1, 1, num_images * static_cast(attrs.keep_top_k[0]), 7};
+ test_case.add_expected_output(
+ output_shape,
+ {
+ 0, 0, 0.4, 0.55, 0.61, 1, 0.97, 0, 1, 0.7, 0.4, 0.52, 0.9, 1,
+ 1, 0, 0.42, 0.83, 0.5, 1, 0.87, 1, 1, 0.33, 0.63, 0.35, 1, 1,
+
+ });
+ test_case.run();
+}
diff --git a/ngraph/test/models/onnx/detection_output.prototxt b/ngraph/test/models/onnx/detection_output.prototxt
index 04f00de63bf..3ce54672ee1 100644
--- a/ngraph/test/models/onnx/detection_output.prototxt
+++ b/ngraph/test/models/onnx/detection_output.prototxt
@@ -106,7 +106,7 @@ graph {
dim_value: 2
}
dim {
- dim_value: 15
+ dim_value: 12
}
}
}
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index 0625b6f2613..d6fc5163e04 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -3082,12 +3082,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output)
std::vector logits = gen_vector(12, -2, 2);
std::vector class_preds = gen_vector(9, 0, 1);
- std::vector proposals = gen_vector(15 * 2, 0, 1);
- std::vector output = {0, 1, 0.777778, 0.241012, 0.260378, 0.418248, 0.499622,
- 0, 1, 0.444444, 0.10963, 0.146239, 0.176296, 0.228576,
- 0, 2, 0.888889, 0.241012, 0.260378, 0.418248, 0.499622,
- 0, 2, 0.555556, 0.10963, 0.146239, 0.176296, 0.228576,
- 0, 2, 0.222222, -0.0378917, -0.00169918, -0.00210832, 0.0387362};
+ std::vector proposals = gen_vector(12 * 2, 0, 1);
+ std::vector output = {0, 1, 0.777778, 0.279849, 0.283779, 0.562743, 0.695387,
+ 0, 1, 0.444444, 0.12963, 0.176075, 0.212963, 0.284573,
+ 0, 2, 0.888889, 0.279849, 0.283779, 0.562743, 0.695387,
+ 0, 2, 0.555556, 0.12963, 0.176075, 0.212963, 0.284573,
+ 0, 2, 0.222222, -0.0608094, -0.0142007, -0.0225239, 0.0304044};
test_case.add_input(logits);
test_case.add_input(class_preds);
test_case.add_input(proposals);
diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp
index c96a14b3d90..462808b1fb4 100644
--- a/ngraph/test/runtime/interpreter/evaluates_map.cpp
+++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp
@@ -577,8 +577,10 @@ namespace
const HostTensorVector& inputs)
{
using T = typename element_type_traits::value_type;
- runtime::reference::referenceDetectionOutput refDetOut(
- op->get_attrs(), op->get_input_shape(0), op->get_input_shape(2));
+ runtime::reference::referenceDetectionOutput refDetOut(op->get_attrs(),
+ op->get_input_shape(0),
+ op->get_input_shape(2),
+ op->get_output_shape(0));
if (op->get_input_size() == 3)
{
refDetOut.run(inputs[0]->get_data_ptr(),
diff --git a/ngraph/test/type_prop/detection_output.cpp b/ngraph/test/type_prop/detection_output.cpp
new file mode 100644
index 00000000000..44dd87cbf91
--- /dev/null
+++ b/ngraph/test/type_prop/detection_output.cpp
@@ -0,0 +1,783 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "gtest/gtest.h"
+
+#include "ngraph/ngraph.hpp"
+#include "ngraph/op/detection_output.hpp"
+#include "util/type_prop.hpp"
+
+#include
+
+using namespace std;
+using namespace ngraph;
+
+std::shared_ptr
+ create_detection_output(const PartialShape& box_logits_shape,
+ const PartialShape& class_preds_shape,
+ const PartialShape& proposals_shape,
+ const PartialShape& aux_class_preds_shape,
+ const PartialShape& aux_box_preds_shape,
+ const op::DetectionOutputAttrs& attrs,
+ element::Type input_type,
+ element::Type proposals_type)
+{
+ auto box_logits = make_shared(input_type, box_logits_shape);
+ auto class_preds = make_shared(input_type, class_preds_shape);
+ auto proposals = make_shared(proposals_type, proposals_shape);
+ auto aux_class_preds = make_shared(input_type, aux_class_preds_shape);
+ auto aux_box_preds = make_shared(input_type, aux_box_preds_shape);
+ return make_shared(
+ box_logits, class_preds, proposals, aux_class_preds, aux_box_preds, attrs);
+}
+
+TEST(type_prop_layers, detection_output)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 800, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_f16)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f16,
+ element::f16);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 800, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f16);
+}
+
+TEST(type_prop_layers, detection_f16_with_proposals_f32)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f16,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 800, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f16);
+}
+
+TEST(type_prop_layers, detection_output_not_normalized)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = false;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 25},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 800, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_negative_keep_top_k)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.top_k = -1;
+ attrs.normalized = true;
+ attrs.num_classes = 2;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 40, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_no_share_location)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.top_k = -1;
+ attrs.normalized = true;
+ attrs.num_classes = 2;
+ attrs.share_location = false;
+ auto op = create_detection_output(Shape{4, 40},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 40},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 40, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_top_k)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.top_k = 7;
+ attrs.normalized = true;
+ attrs.num_classes = 2;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_shape(), (Shape{1, 1, 56, 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_all_dynamic_shapes)
+{
+ PartialShape dyn_shape = PartialShape::dynamic();
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 1;
+ auto op = create_detection_output(
+ dyn_shape, dyn_shape, dyn_shape, dyn_shape, dyn_shape, attrs, element::f32, element::f32);
+ ASSERT_EQ(op->get_output_partial_shape(0), (PartialShape{1, 1, Dimension::dynamic(), 7}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+TEST(type_prop_layers, detection_output_dynamic_batch)
+{
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(PartialShape{Dimension::dynamic(), 20},
+ PartialShape{Dimension::dynamic(), 10},
+ PartialShape{Dimension::dynamic(), 2, 20},
+ PartialShape{Dimension::dynamic(), 10},
+ PartialShape{Dimension::dynamic(), 20},
+ attrs,
+ element::f32,
+ element::f32);
+ ASSERT_EQ(op->get_output_partial_shape(0), (PartialShape{{1, 1, Dimension::dynamic(), 7}}));
+ ASSERT_EQ(op->get_element_type(), element::f32);
+}
+
+void detection_output_invalid_data_type_test(element::Type box_logits_et,
+ element::Type class_preds_et,
+ element::Type proposals_et,
+ element::Type aux_class_preds_et,
+ element::Type aux_box_preds_et,
+ const std::string& expected_msg)
+{
+ try
+ {
+ auto box_logits = make_shared(box_logits_et, Shape{4, 20});
+ auto class_preds = make_shared(class_preds_et, Shape{4, 10});
+ auto proposals = make_shared(proposals_et, Shape{4, 2, 20});
+ auto aux_class_preds = make_shared(aux_class_preds_et, Shape{4, 10});
+ auto aux_box_preds = make_shared(aux_box_preds_et, Shape{4, 20});
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = make_shared(
+ box_logits, class_preds, proposals, aux_class_preds, aux_box_preds, attrs);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(), expected_msg);
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_data_type)
+{
+ detection_output_invalid_data_type_test(
+ element::i32,
+ element::f32,
+ element::f32,
+ element::f32,
+ element::f32,
+ "Box logits' data type must be floating point. Got i32");
+ detection_output_invalid_data_type_test(
+ element::f32,
+ element::i32,
+ element::f32,
+ element::f32,
+ element::f32,
+ "Class predictions' data type must be the same as box logits type (f32). Got i32");
+ detection_output_invalid_data_type_test(element::f32,
+ element::f32,
+ element::i32,
+ element::f32,
+ element::f32,
+ "Proposals' data type must be floating point. Got i32");
+ detection_output_invalid_data_type_test(element::f32,
+ element::f32,
+ element::f32,
+ element::i32,
+ element::f32,
+ "Additional class predictions' data type must be the "
+ "same as class predictions data type (f32). Got i32");
+ detection_output_invalid_data_type_test(element::f32,
+ element::f32,
+ element::f32,
+ element::f32,
+ element::i32,
+ "Additional box predictions' data type must be the "
+ "same as box logits data type (f32). Got i32");
+}
+
+TEST(type_prop_layers, detection_output_mismatched_batch_size)
+{
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{5, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Class predictions' first dimension is not compatible with batch size."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{5, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(),
+ std::string("Proposals' first dimension is must be equal to "
+ "either batch size (4) or 1. Got: 5."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_ranks)
+{
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20, 1},
+ Shape{4, 10},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(), std::string("Box logits rank must be 2. Got 3"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10, 1},
+ Shape{4, 2, 20},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(),
+ std::string("Class predictions rank must be 2. Got 3"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {200};
+ attrs.num_classes = 2;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 20},
+ Shape{4, 10},
+ Shape{4, 2},
+ Shape{4, 10},
+ Shape{4, 20},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(), std::string("Proposals rank must be 3. Got 2"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_box_logits_shape)
+{
+ // share_location = true
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 13},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Box logits' second dimension must be a multiply of num_loc_classes * 4 (4)"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // share_location = false
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = false;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 37},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Box logits' second dimension must be a multiply of num_loc_classes * 4 (12)"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_class_preds_shape)
+{
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 10},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string("Class predictions' second dimension must be equal to "
+ "num_prior_boxes * num_classes (9). Current value is: 10."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_proposals_shape)
+{
+ // variance_encoded_in_target = false
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 1, 12},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Proposals' second dimension is mismatched. Current value is: 1, expected: 2"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // variance_encoded_in_target = true
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = true;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Proposals' second dimension is mismatched. Current value is: 2, expected: 1"));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // normalized = false
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = false;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 16},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string("Proposals' third dimension must be equal to num_prior_boxes * "
+ "prior_box_size (15). Current value is: 16."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // normalized = true
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 13},
+ Shape{4, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string("Proposals' third dimension must be equal to num_prior_boxes * "
+ "prior_box_size (12). Current value is: 13."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_aux_class_preds)
+{
+ // invalid batch size
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{5, 6},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(),
+ std::string("Additional class predictions' first dimension must "
+ "be compatible with batch size."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // invalid 2nd dimension
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 7},
+ Shape{4, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(error.what(),
+ std::string("Additional class predictions' second dimension must "
+ "be equal to num_prior_boxes * 2 (6). Got 7."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
+
+TEST(type_prop_layers, detection_output_invalid_aux_box_preds)
+{
+ // invalid batch size
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{5, 12},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Additional box predictions' shape must be compatible with box logits shape."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+ // invalid 2nd dimension
+ {
+ try
+ {
+ op::DetectionOutputAttrs attrs;
+ attrs.keep_top_k = {-1};
+ attrs.num_classes = 3;
+ attrs.share_location = true;
+ attrs.variance_encoded_in_target = false;
+ attrs.normalized = true;
+ auto op = create_detection_output(Shape{4, 12},
+ Shape{4, 9},
+ Shape{4, 2, 12},
+ Shape{4, 6},
+ Shape{4, 22},
+ attrs,
+ element::f32,
+ element::f32);
+ FAIL() << "Exception expected";
+ }
+ catch (const NodeValidationFailure& error)
+ {
+ EXPECT_HAS_SUBSTRING(
+ error.what(),
+ std::string(
+ "Additional box predictions' shape must be compatible with box logits shape."));
+ }
+ catch (...)
+ {
+ FAIL() << "Unknown exception was thrown";
+ }
+ }
+}
diff --git a/ngraph/test/type_prop_layers.cpp b/ngraph/test/type_prop_layers.cpp
index 10050741c43..483e57d72e0 100644
--- a/ngraph/test/type_prop_layers.cpp
+++ b/ngraph/test/type_prop_layers.cpp
@@ -18,7 +18,6 @@
#include "ngraph/ngraph.hpp"
#include "ngraph/op/ctc_greedy_decoder.hpp"
-#include "ngraph/op/detection_output.hpp"
#include "ngraph/op/interpolate.hpp"
#include "ngraph/op/prior_box.hpp"
#include "ngraph/op/prior_box_clustered.hpp"
@@ -38,20 +37,6 @@ TEST(type_prop_layers, ctc_greedy_decoder)
ASSERT_EQ(op->get_shape(), (Shape{2, 88, 1, 1}));
}
-TEST(type_prop_layers, detection_output)
-{
- auto box_logits = make_shared(element::f32, Shape{4, 1, 5, 5});
- auto class_preds = make_shared(element::f32, Shape{2, 1, 4, 5});
- auto proposals = make_shared(element::f32, Shape{2, 1, 4, 5});
- auto aux_class_preds = make_shared(element::f32, Shape{2, 1, 4, 5});
- auto aux_box_preds = make_shared(element::f32, Shape{2, 1, 4, 5});
- op::DetectionOutputAttrs attrs;
- attrs.keep_top_k = {200};
- auto op = make_shared(
- box_logits, class_preds, proposals, aux_class_preds, aux_box_preds, attrs);
- ASSERT_EQ(op->get_shape(), (Shape{1, 1, 800, 7}));
-}
-
TEST(type_prop_layers, interpolate)
{
auto image = make_shared(element::f32, Shape{2, 2, 33, 65});