[CPU] MulticlassNms/MatrixNms support dynamic shape (#8161)

This commit is contained in:
Luo Cheng 2021-11-27 01:23:26 +08:00 committed by GitHub
parent f59ece3cde
commit 38aebd4463
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 950 additions and 578 deletions

View File

@ -423,8 +423,9 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
::ngraph::pass::Manager manager;
// resolves dynamism by replacing dynamic operation with static version
manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false);
manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(false);
manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false);
manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
manager.register_pass<::ngraph::pass::ConstantFolding>();
// OneHotToLegacy changes output precision

View File

@ -314,7 +314,35 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
for (size_t i = 0; i < node->get_output_size(); i++) {
const auto outputs = node->get_output_target_inputs(i);
for (const auto &out : outputs) {
if (out.get_node()->get_type_info() != ngraph::op::v0::Result::get_type_info_static()) {
if (!ngraph::op::is_output(out.get_node())) {
return false;
}
}
}
return true;
});
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
pass_config->set_callback<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(
[](const_node_ptr &node) -> bool {
for (size_t i = 0; i < node->get_output_size(); i++) {
const auto outputs = node->get_output_target_inputs(i);
for (const auto &out : outputs) {
if (!ngraph::op::is_output(out.get_node())) {
return false;
}
}
}
return true;
});
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
pass_config->set_callback<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(
[](const_node_ptr &node) -> bool {
for (size_t i = 0; i < node->get_output_size(); i++) {
const auto outputs = node->get_output_target_inputs(i);
for (const auto &out : outputs) {
if (!ngraph::op::is_output(out.get_node())) {
return false;
}
}

View File

@ -12,25 +12,19 @@
#include "ie_parallel.hpp"
#include "ngraph/opsets/opset8.hpp"
#include "ngraph_ops/nms_static_shape_ie.hpp"
#include "utils/general_utils.h"
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MatrixNms>;
using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction;
bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MatrixNms>(op);
if (!nms) {
errorMessage = "Only internal MatrixNms operation is supported";
errorMessage = "Only MatrixNms operation is supported";
return false;
}
const auto& attrs = nms->get_attrs();
@ -57,36 +51,16 @@ MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op
IE_THROW(NotImplemented) << errorMessage;
}
errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";
const auto matrix_nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
m_errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";
if (getOriginalInputsNumber() != 2)
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
if (getOriginalOutputsNumber() != 3)
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
IE_THROW() << m_errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
}
const auto matrix_nms = std::dynamic_pointer_cast<const ngraph::op::v8::MatrixNms>(op);
m_numBatches = boxes_dims[0];
m_numBoxes = boxes_dims[1];
if (boxes_dims.size() != 3)
IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
if (boxes_dims[2] != 4)
IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
m_numClasses = scores_dims[1];
if (scores_dims.size() != 3)
IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
if (m_numBatches != scores_dims[0])
IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
if (m_numBoxes != scores_dims[2])
IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
auto& attrs = matrix_nms->get_attrs();
if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID)
m_sortResultType = MatrixNmsSortResultType::CLASSID;
@ -109,35 +83,6 @@ MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op
m_gaussianSigma = attrs.gaussian_sigma;
m_postThreshold = attrs.post_threshold;
m_normalized = attrs.normalized;
int64_t max_output_boxes_per_class = 0;
size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
if (m_nmsTopk >= 0)
max_output_boxes_per_class = std::min(m_numBoxes, static_cast<size_t>(m_nmsTopk));
else
max_output_boxes_per_class = m_numBoxes;
m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
if (m_keepTopk >= 0)
m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopk));
}
void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast<int>(m_numBoxes));
m_numPerBatch.resize(m_numBatches);
m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes);
m_numPerBatchClass.resize(m_numBatches, std::vector<int64_t>(m_numClasses, 0));
m_classOffset.resize(m_numClasses, 0);
for (size_t i = 0, count = 0; i < m_numClasses; i++) {
if (i == m_backgroundClass)
continue;
m_classOffset[i] = (count++) * m_realNumBoxes;
}
if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) {
m_decay_fn = [](float iou, float max_iou, float sigma) -> float {
return (1. - iou) / (1. - max_iou + 1e-10f);
@ -148,16 +93,29 @@ void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
};
}
const auto& boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims();
if (boxes_dims.size() != 3)
IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
if (boxes_dims[2] != 4)
IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims();
if (scores_dims.size() != 3)
IE_THROW() << m_errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
}
void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
const std::vector<Precision> supportedFloatPrecision = {Precision::FP32};
const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", m_inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", m_outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", m_outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", m_outType);
addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
{LayoutType::ncsp, Precision::FP32}},
@ -282,6 +240,54 @@ size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* score
return numDet;
}
void MKLDNNMatrixNmsNode::createPrimitive() {
if (inputShapesDefined()) {
prepareParams();
updateLastInputDims();
}
}
void MKLDNNMatrixNmsNode::prepareParams() {
const auto& boxes_dims = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();
const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims();
if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
IE_THROW() << m_errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
}
m_numBatches = boxes_dims[0];
m_numBoxes = boxes_dims[1];
m_numClasses = scores_dims[1];
int64_t max_output_boxes_per_class = 0;
size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses :
m_backgroundClass < m_numClasses ? m_numClasses - 1 : m_numClasses;
if (m_nmsTopk >= 0)
max_output_boxes_per_class = std::min(m_numBoxes, static_cast<size_t>(m_nmsTopk));
else
max_output_boxes_per_class = m_numBoxes;
m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
if (m_keepTopk >= 0)
m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopk));
m_realNumClasses = real_num_classes;
m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast<int>(m_numBoxes));
m_numPerBatch.resize(m_numBatches);
m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes);
m_numPerBatchClass.resize(m_numBatches);
for (auto &numPerBatch : m_numPerBatchClass) {
numPerBatch.resize(m_numClasses, 0);
}
m_classOffset.resize(m_numClasses, 0);
for (size_t i = 0, count = 0; i < m_numClasses; i++) {
if (i == m_backgroundClass)
continue;
m_classOffset[i] = (count++) * m_realNumBoxes;
}
}
void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
@ -352,9 +358,20 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
}
}
float* selectedOutputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
int* selectedIndices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr());
int* validOutputs = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
auto selectedOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr();
auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr();
auto validOutputsMemPtr = getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr();
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (isDynamicNode()) {
size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), 0);
selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_OUTPUTS)->cloneWithNewDims({totalBox, 6}));
selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_INDICES)->cloneWithNewDims({totalBox, 1}));
validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_VALID_OUTPUTS)->cloneWithNewDims({m_numBatches}));
}
float* selectedOutputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());
int* selectedIndices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());
int* validOutputs = reinterpret_cast<int*>(validOutputsMemPtr->GetPtr());
std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs);
int64_t outputOffset = 0;
@ -372,16 +389,22 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
selectedBase[4] = m_filteredBoxes[originalIndex].box.x2;
selectedBase[5] = m_filteredBoxes[originalIndex].box.y2;
}
std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1);
std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
outputOffset += m_maxBoxesPerBatch;
originalOffset += real_boxes;
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (!isDynamicNode()) {
std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1);
std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
outputOffset += m_maxBoxesPerBatch;
originalOffset += real_boxes;
} else {
outputOffset += real_boxes;
originalOffset += real_boxes;
}
}
}
void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
if (std::find(precList.begin(), precList.end(), prec) == precList.end())
IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
}
REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms);

View File

@ -27,12 +27,17 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override {};
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
bool needShapeInfer() const override { return false; }
void prepareParams() override;
private:
// input
static const size_t NMS_BOXES = 0;
@ -82,8 +87,8 @@ private:
int64_t classIndex = -1;
float score = 0.0f;
};
std::string errorPrefix;
const std::string inType = "input", outType = "output";
std::string m_errorPrefix;
const std::string m_inType = "input", m_outType = "output";
std::vector<int64_t> m_numPerBatch;
std::vector<std::vector<int64_t>> m_numPerBatchClass;
std::vector<BoxInfo> m_filteredBoxes;

View File

@ -9,7 +9,6 @@
#include <chrono>
#include <cmath>
#include <ie_ngraph_utils.hpp>
#include <ngraph_ops/nms_static_shape_ie.hpp>
#include <queue>
#include <string>
#include <utility>
@ -22,17 +21,12 @@ using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MulticlassNms>;
bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MulticlassNms>(op);
if (!nms) {
errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported";
errorMessage = "Only MulticlassNms operation is supported";
return false;
}
const auto& atrri = nms->get_attrs();
@ -53,79 +47,55 @@ MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr<ngraph::N
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";
const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
m_errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";
if (getOriginalInputsNumber() != 2)
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
if (getOriginalOutputsNumber() != 3)
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
IE_THROW() << m_errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MulticlassNms>(op);
auto& atrri = nms->get_attrs();
sort_result_across_batch = atrri.sort_result_across_batch;
max_output_boxes_per_class = atrri.nms_top_k;
iou_threshold = atrri.iou_threshold;
score_threshold = atrri.score_threshold;
background_class = atrri.background_class;
keep_top_k = atrri.keep_top_k;
m_sortResultAcrossBatch = atrri.sort_result_across_batch;
m_nmsTopK = atrri.nms_top_k;
m_iouThreshold = atrri.iou_threshold;
m_scoreThreshold = atrri.score_threshold;
m_backgroundClass = atrri.background_class;
m_keepTopK = atrri.keep_top_k;
if (atrri.sort_result_type == ngNmsSortResultType::CLASSID)
sort_result_type = MulticlassNmsSortResultType::CLASSID;
m_sortResultType = MulticlassNmsSortResultType::CLASSID;
else if (atrri.sort_result_type == ngNmsSortResultType::SCORE)
sort_result_type = MulticlassNmsSortResultType::SCORE;
m_sortResultType = MulticlassNmsSortResultType::SCORE;
else if (atrri.sort_result_type == ngNmsSortResultType::NONE)
sort_result_type = MulticlassNmsSortResultType::NONE;
nms_eta = atrri.nms_eta;
normalized = atrri.normalized;
m_sortResultType = MulticlassNmsSortResultType::NONE;
m_nmsEta = atrri.nms_eta;
m_normalized = atrri.normalized;
const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
const auto& boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims();
if (boxes_dims.size() != 3)
IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
if (boxes_dims[2] != 4)
IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims();
if (scores_dims.size() != 3)
IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
if (boxes_dims[0] != scores_dims[0])
IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
if (boxes_dims[1] != scores_dims[2])
IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
const SizeVector& valid_outputs_dims = outputShapes[NMS_SELECTEDNUM].getStaticDims();
if (valid_outputs_dims.size() != 1)
IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
if (valid_outputs_dims[0] != boxes_dims[0]) // valid_outputs_dims[0] != num_batches
IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0];
IE_THROW() << m_errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
}
void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
num_batches = boxes_dims[0];
num_boxes = boxes_dims[1];
const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
num_classes = scores_dims[1];
numFiltBox.resize(num_batches, std::vector<size_t>(num_classes)); // batches
numBoxOffset.resize(num_batches);
if (max_output_boxes_per_class) {
max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class;
filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes);
}
const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", m_inType);
checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", m_outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", m_outType);
checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", m_outType);
addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
{LayoutType::ncsp, Precision::FP32}},
@ -135,100 +105,138 @@ void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNMultiClassNmsNode::createPrimitive() {
if (inputShapesDefined()) {
prepareParams();
updateLastInputDims();
}
}
void MKLDNNMultiClassNmsNode::prepareParams() {
const auto& boxes_dims = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();
const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims();
if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
IE_THROW() << m_errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
}
m_numBatches = boxes_dims[0];
m_numBoxes = boxes_dims[1];
m_numClasses = scores_dims[1];
int max_output_boxes_per_class = 0;
size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses :
m_backgroundClass < m_numClasses ? m_numClasses - 1 : m_numClasses;
if (m_nmsTopK) {
max_output_boxes_per_class = (m_nmsTopK == -1) ? m_numBoxes :
std::min(m_nmsTopK, static_cast<int>(m_numBoxes));
m_filtBoxes.resize(max_output_boxes_per_class * m_numBatches * m_numClasses);
}
m_nmsRealTopk = max_output_boxes_per_class;
m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
if (m_keepTopK >= 0)
m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopK));
m_numFiltBox.resize(m_numBatches);
for (auto &numPerBatch : m_numFiltBox) {
numPerBatch.resize(m_numClasses, 0);
}
m_numBoxOffset.resize(m_numBatches);
}
void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();
if (max_output_boxes_per_class == 0)
if (m_nmsRealTopk == 0)
return;
int* selected_indices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr());
float* selected_outputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
int* selected_num = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr());
auto selectedOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr();
auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr();
auto validOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr();
auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
if ((nms_eta >= 0) && (nms_eta < 1)) {
if ((m_nmsEta >= 0) && (m_nmsEta < 1)) {
nmsWithEta(boxes, scores, boxesStrides, scoresStrides);
} else {
nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides);
}
size_t startOffset = numFiltBox[0][0];
numBoxOffset[0] = 0;
for (size_t b = 0; b < numFiltBox.size(); b++) {
size_t startOffset = m_numFiltBox[0][0];
m_numBoxOffset[0] = 0;
for (size_t b = 0; b < m_numFiltBox.size(); b++) {
size_t batchOffsetNew = 0;
size_t batchOffset = b * num_classes * max_output_boxes_per_class;
for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) {
size_t offset = batchOffset + c * max_output_boxes_per_class;
for (size_t i = 0; i < numFiltBox[b][c]; i++) {
filtBoxes[startOffset + i] = filtBoxes[offset + i];
size_t batchOffset = b * m_numClasses * m_nmsRealTopk;
for (size_t c = (b == 0 ? 1 : 0); c < m_numFiltBox[b].size(); c++) {
size_t offset = batchOffset + c * m_nmsRealTopk;
for (size_t i = 0; i < m_numFiltBox[b][c]; i++) {
m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
}
startOffset += numFiltBox[b][c];
batchOffsetNew += numFiltBox[b][c];
startOffset += m_numFiltBox[b][c];
batchOffsetNew += m_numFiltBox[b][c];
}
numBoxOffset[b] = batchOffsetNew;
m_numBoxOffset[b] = batchOffsetNew;
if (b == 0)
numBoxOffset[b] += numFiltBox[0][0];
m_numBoxOffset[b] += m_numFiltBox[0][0];
}
// sort element before go through keep_top_k
parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
return ((l.batch_index < r.batch_index) ||
((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) ||
((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index))));
});
if (keep_top_k > -1) {
if (m_keepTopK > -1) {
startOffset = 0;
size_t offset = 0;
for (size_t b = 0; b < numFiltBox.size(); b++) {
if (numBoxOffset[b] > keep_top_k) {
for (size_t b = 0; b < m_numFiltBox.size(); b++) {
if (m_numBoxOffset[b] > m_keepTopK) {
if (startOffset == offset) {
startOffset += keep_top_k;
offset += numBoxOffset[b];
startOffset += m_keepTopK;
offset += m_numBoxOffset[b];
} else {
for (size_t i = 0; i < keep_top_k; i++) {
filtBoxes[startOffset + i] = filtBoxes[offset + i];
for (size_t i = 0; i < m_keepTopK; i++) {
m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
}
startOffset += keep_top_k;
offset += numBoxOffset[b];
startOffset += m_keepTopK;
offset += m_numBoxOffset[b];
}
} else {
if (startOffset == offset) {
startOffset += numBoxOffset[b];
offset += numBoxOffset[b];
startOffset += m_numBoxOffset[b];
offset += m_numBoxOffset[b];
} else {
for (size_t i = 0; i < numBoxOffset[b]; i++) {
filtBoxes[startOffset + i] = filtBoxes[offset + i];
for (size_t i = 0; i < m_numBoxOffset[b]; i++) {
m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
}
startOffset += numBoxOffset[b];
offset += numBoxOffset[b];
startOffset += m_numBoxOffset[b];
offset += m_numBoxOffset[b];
}
}
}
}
if (sort_result_across_batch) {
if (sort_result_type == MulticlassNmsSortResultType::SCORE) {
parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
if (m_sortResultAcrossBatch) {
if (m_sortResultType == MulticlassNmsSortResultType::SCORE) {
parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) ||
(l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) ||
(l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index);
});
} else if (sort_result_type == MulticlassNmsSortResultType::CLASSID) {
parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
} else if (m_sortResultType == MulticlassNmsSortResultType::CLASSID) {
parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) ||
(l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) ||
(l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index);
});
}
} else if (sort_result_type == MulticlassNmsSortResultType::CLASSID) {
parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
} else if (m_sortResultType == MulticlassNmsSortResultType::CLASSID) {
parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
return ((l.batch_index < r.batch_index) ||
((l.batch_index == r.batch_index) &&
((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) ||
@ -236,18 +244,28 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
});
}
const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().getStaticDims()[0];
const size_t validOutputs = std::min(startOffset, selectedBoxesNum);
const size_t validOutputs = std::min(startOffset, m_maxBoxesPerBatch * dims_boxes[0]);
std::vector<size_t> m_selected_num;
m_selected_num.resize(dims_boxes[0]);
const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0];
const size_t selectedBoxesNum_perBatch = m_maxBoxesPerBatch;
for (size_t idx = 0lu; idx < validOutputs; idx++) {
m_selected_num[filtBoxes[idx].batch_index]++;
m_selected_num[m_filtBoxes[idx].batch_index]++;
}
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (isDynamicNode()) {
size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), 0);
selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDOUTPUTS)->cloneWithNewDims({totalBox, 6}));
selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims({totalBox, 1}));
validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDNUM)->cloneWithNewDims({m_numBatches}));
}
int* selected_indices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());
float* selected_outputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());
int* selected_num = reinterpret_cast<int*>(validOutputsMemPtr->GetPtr());
int64_t output_offset = 0;
int64_t original_offset = 0;
for (size_t i = 0; i < dims_boxes[0]; i++) {
@ -256,19 +274,25 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
for (size_t j = 0; j < real_boxes; j++) {
auto original_index = original_offset + j;
selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index;
selected_indices[j + output_offset] = m_filtBoxes[original_index].batch_index * dims_boxes[1] + m_filtBoxes[original_index].box_index;
auto selected_base = selected_outputs + (output_offset + j) * 6;
selected_base[0] = filtBoxes[original_index].class_index;
selected_base[1] = filtBoxes[original_index].score;
selected_base[0] = m_filtBoxes[original_index].class_index;
selected_base[1] = m_filtBoxes[original_index].score;
selected_base[2] = boxes[selected_indices[j + output_offset] * 4];
selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1];
selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2];
selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3];
}
std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1);
std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
output_offset += selectedBoxesNum_perBatch;
original_offset += real_boxes;
// TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
if (!isDynamicNode()) {
std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1);
std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
output_offset += selectedBoxesNum_perBatch;
original_offset += real_boxes;
} else {
output_offset += real_boxes;
original_offset += real_boxes;
}
}
}
@ -309,21 +333,21 @@ void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores
return iou <= adaptive_threshold ? 1.0f : 0.0f;
};
parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
if (class_idx != background_class) {
parallel_for2d(m_numBatches, m_numClasses, [&](int batch_idx, int class_idx) {
if (class_idx != m_backgroundClass) {
std::vector<filteredBoxes> fb;
const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
std::priority_queue<boxInfo, std::vector<boxInfo>, decltype(less)> sorted_boxes(less);
for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
if (scoresPtr[box_idx] >= score_threshold) // algin with ref
for (int box_idx = 0; box_idx < m_numBoxes; box_idx++) {
if (scoresPtr[box_idx] >= m_scoreThreshold) // algin with ref
sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0}));
}
fb.reserve(sorted_boxes.size());
if (sorted_boxes.size() > 0) {
auto adaptive_threshold = iou_threshold;
int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
auto adaptive_threshold = m_iouThreshold;
int max_out_box = (m_nmsRealTopk > sorted_boxes.size()) ? sorted_boxes.size() : m_nmsRealTopk;
while (max_out_box && !sorted_boxes.empty()) {
boxInfo currBox = sorted_boxes.top();
float origScore = currBox.score;
@ -332,49 +356,49 @@ void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores
bool box_is_selected = true;
for (int idx = static_cast<int>(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) {
float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized);
float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], m_normalized);
currBox.score *= func(iou, adaptive_threshold);
if (iou >= adaptive_threshold) {
box_is_selected = false;
break;
}
if (currBox.score <= score_threshold)
if (currBox.score <= m_scoreThreshold)
break;
}
currBox.suppress_begin_index = fb.size();
if (box_is_selected) {
if (nms_eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= nms_eta;
if (m_nmsEta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= m_nmsEta;
}
if (currBox.score == origScore) {
fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx});
continue;
}
if (currBox.score > score_threshold) {
if (currBox.score > m_scoreThreshold) {
sorted_boxes.push(currBox);
}
}
}
}
numFiltBox[batch_idx][class_idx] = fb.size();
size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
m_numFiltBox[batch_idx][class_idx] = fb.size();
size_t offset = batch_idx * m_numClasses * m_nmsRealTopk + class_idx * m_nmsRealTopk;
for (size_t i = 0; i < fb.size(); i++) {
filtBoxes[offset + i] = fb[i];
m_filtBoxes[offset + i] = fb[i];
}
}
});
}
void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) {
parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
if (class_idx != background_class) {
parallel_for2d(m_numBatches, m_numClasses, [&](int batch_idx, int class_idx) {
if (class_idx != m_backgroundClass) {
const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
std::vector<std::pair<float, int>> sorted_boxes;
for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
if (scoresPtr[box_idx] >= score_threshold) // algin with ref
for (int box_idx = 0; box_idx < m_numBoxes; box_idx++) {
if (scoresPtr[box_idx] >= m_scoreThreshold) // algin with ref
sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx));
}
@ -383,35 +407,36 @@ void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* sco
parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair<float, int>& l, const std::pair<float, int>& r) {
return (l.first > r.first || ((l.first == r.first) && (l.second < r.second)));
});
int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
int offset = batch_idx * m_numClasses * m_nmsRealTopk + class_idx * m_nmsRealTopk;
m_filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
io_selection_size++;
int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
int max_out_box = (m_nmsRealTopk > sorted_boxes.size()) ? sorted_boxes.size() : m_nmsRealTopk;
for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) {
bool box_is_selected = true;
for (int idx = io_selection_size - 1; idx >= 0; idx--) {
float iou =
intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized);
if (iou >= iou_threshold) {
float iou = intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4],
&boxesPtr[m_filtBoxes[offset + idx].box_index * 4], m_normalized);
if (iou >= m_iouThreshold) {
box_is_selected = false;
break;
}
}
if (box_is_selected) {
filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second);
m_filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx,
sorted_boxes[box_idx].second);
io_selection_size++;
}
}
}
numFiltBox[batch_idx][class_idx] = io_selection_size;
m_numFiltBox[batch_idx][class_idx] = io_selection_size;
}
});
}
void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
if (std::find(precList.begin(), precList.end(), prec) == precList.end())
IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
}
REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms)

View File

@ -23,12 +23,17 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override {};
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
bool needShapeInfer() const override { return false; }
void prepareParams() override;
private:
// input (port Num)
const size_t NMS_BOXES = 0;
@ -39,27 +44,29 @@ private:
const size_t NMS_SELECTEDINDICES = 1;
const size_t NMS_SELECTEDNUM = 2;
bool sort_result_across_batch = false;
MulticlassNmsSortResultType sort_result_type = MulticlassNmsSortResultType::NONE;
bool m_sortResultAcrossBatch = false;
MulticlassNmsSortResultType m_sortResultType = MulticlassNmsSortResultType::NONE;
size_t num_batches = 0;
size_t num_boxes = 0;
size_t num_classes = 0;
size_t m_numBatches = 0;
size_t m_numBoxes = 0;
size_t m_numClasses = 0;
size_t m_maxBoxesPerBatch = 0;
int max_output_boxes_per_class = 0;
float iou_threshold = 0.0f;
float score_threshold = 0.0f;
int m_nmsRealTopk = 0;
int m_nmsTopK = 0;
float m_iouThreshold = 0.0f;
float m_scoreThreshold = 0.0f;
int32_t background_class = 0;
int32_t keep_top_k = 0;
float nms_eta = 0.0f;
bool normalized = true;
int32_t m_backgroundClass = 0;
int32_t m_keepTopK = 0;
float m_nmsEta = 0.0f;
bool m_normalized = true;
std::string errorPrefix;
std::string m_errorPrefix;
std::vector<std::vector<size_t>> numFiltBox;
std::vector<size_t> numBoxOffset;
const std::string inType = "input", outType = "output";
std::vector<std::vector<size_t>> m_numFiltBox;
std::vector<size_t> m_numBoxOffset;
const std::string m_inType = "input", m_outType = "output";
struct filteredBoxes {
float score;
@ -77,7 +84,7 @@ private:
int suppress_begin_index;
};
std::vector<filteredBoxes> filtBoxes;
std::vector<filteredBoxes> m_filtBoxes;
void checkPrecision(const InferenceEngine::Precision prec, const std::vector<InferenceEngine::Precision> precList, const std::string name,
const std::string type);

View File

@ -64,8 +64,8 @@ void NmsStaticShapeIE<BaseNmsOp>::validate_and_infer_types() {
if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) {
const auto num_boxes = num_boxes_boxes.get_length();
auto num_classes = scores_ps[1].get_length();
if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) {
num_classes = num_classes - 1;
if (this->m_attrs.background_class >= 0 && this->m_attrs.background_class < num_classes) {
num_classes = std::max(int64_t{1}, num_classes - 1);
}
int64_t max_output_boxes_per_class = 0;
if (this->m_attrs.nms_top_k >= 0)

View File

@ -22,5 +22,5 @@ class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE;
class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertMatrixNmsToMatrixNmsIE();
ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type = true);
};

View File

@ -22,5 +22,5 @@ class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE;
class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertMulticlassNmsToMulticlassNmsIE();
ConvertMulticlassNmsToMulticlassNmsIE(bool force_i32_output_type = true);
};

View File

@ -18,13 +18,18 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0);
ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() {
ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type) {
MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE);
auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MatrixNms>();
ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
auto nms = std::dynamic_pointer_cast<ngraph::opset8::MatrixNms>(m.get_match_root());
if (!nms) {
if (!nms || transformation_callback(nms)) {
return false;
}
// if input shape is dynamic force the output shape must be dynamic too
if (nms->get_input_partial_shape(0).is_dynamic() || nms->get_input_partial_shape(1).is_dynamic()) {
return false;
}
@ -32,7 +37,7 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() {
// vector of new nGraph operations
NodeVector new_ops;
auto attrs = nms->get_attrs();
attrs.output_type = element::i32;
attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type();
auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>>(
new_args.at(0),
new_args.at(1),

View File

@ -18,13 +18,18 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0);
ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() {
ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE(bool force_i32_output_type) {
MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE);
auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MulticlassNms>();
ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
auto nms = std::dynamic_pointer_cast<ngraph::opset8::MulticlassNms>(m.get_match_root());
if (!nms) {
if (!nms || transformation_callback(nms)) {
return false;
}
// if input shape is dynamic force the output shape must be dynamic too
if (nms->get_input_partial_shape(0).is_dynamic() || nms->get_input_partial_shape(1).is_dynamic()) {
return false;
}
@ -32,7 +37,7 @@ ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulti
// vector of new nGraph operations
NodeVector new_ops;
auto attrs = nms->get_attrs();
attrs.output_type = element::i32;
attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type();
auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MulticlassNms>>(
new_args.at(0),

View File

@ -7,22 +7,31 @@
#include "shared_test_classes/single_layer/matrix_nms.hpp"
using namespace ngraph;
using namespace LayerTestsDefinitions;
using namespace ov::test::subgraph;
namespace {
TEST_P(MatrixNmsLayerTest, Serialize) {
Serialize();
serialize();
}
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
const std::vector<ov::test::ElementType> netPrecisions = {
ov::element::f32,
ov::element::f16
};
const std::vector<InputShapeParams> inShapeParams = {
InputShapeParams{3, 100, 5},
InputShapeParams{1, 10, 50},
InputShapeParams{2, 50, 50}
const std::vector<std::vector<ov::test::InputShape>> shapeParams = {
// num_batches, num_boxes, 4
{{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
{{1, 10, 4}, {2, 100, 4}}},
// num_batches, num_classes, num_boxes
{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
{{1, 3, 10}, {2, 5, 100}}}},
// num_batches, num_boxes, 4
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
{{1, 10, 4}, {2, 100, 4}}},
// num_batches, num_classes, num_boxes
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
{{1, 3, 10}, {2, 5, 100}}}}
};
const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
@ -43,10 +52,10 @@ namespace {
const std::vector<bool> normalized = {true, false};
const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
op::v8::MatrixNms::DecayFunction::LINEAR};
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::I32),
::testing::Values(InferenceEngine::Precision::FP32)),
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(shapeParams),
::testing::Combine(::testing::Values(ov::element::f32),
::testing::Values(ov::element::i32),
::testing::Values(ov::element::f32)),
::testing::ValuesIn(sortResultType),
::testing::ValuesIn(outType),
::testing::ValuesIn(topKParams),

View File

@ -7,19 +7,25 @@
#include "shared_test_classes/single_layer/multiclass_nms.hpp"
using namespace ngraph;
using namespace LayerTestsDefinitions;
using namespace ov::test::subgraph;
namespace {
TEST_P(MulticlassNmsLayerTest, Serialize) {
Serialize();
serialize();
}
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16};
const std::vector<InputShapeParams> inShapeParams = {
InputShapeParams{3, 100, 5}, InputShapeParams{1, 10, 50},
InputShapeParams{2, 50, 50}};
const std::vector<std::vector<ov::test::InputShape>> shapeParams = {
// num_batches, num_boxes, 4
{{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
{{1, 10, 4}, {2, 100, 4}}},
// num_batches, num_classes, num_boxes
{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
{{1, 3, 10}, {2, 5, 100}}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
{{1, 10, 4}, {2, 100, 4}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
{{1, 3, 10}, {2, 5, 100}}}}
};
const std::vector<int32_t> nmsTopK = {-1, 20};
const std::vector<float> iouThreshold = {0.7f};
@ -37,10 +43,10 @@ const std::vector<float> nmsEta = {0.6f, 1.0f};
const std::vector<bool> normalized = {true, false};
const auto nmsParams = ::testing::Combine(
::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::I32),
::testing::Values(InferenceEngine::Precision::FP32)),
::testing::ValuesIn(shapeParams),
::testing::Combine(::testing::Values(ov::element::f32),
::testing::Values(ov::element::i32),
::testing::Values(ov::element::f32)),
::testing::ValuesIn(nmsTopK),
::testing::Combine(::testing::ValuesIn(iouThreshold),
::testing::ValuesIn(scoreThreshold),

View File

@ -14,6 +14,7 @@
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset4.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/opsets/opset8.hpp>
#include <transformations/convert_precision.hpp>
#include <transformations/utils/utils.hpp>
#include <ngraph/pass/manager.hpp>
@ -125,6 +126,58 @@ TEST(TransformationTests, ConvertPrecision_NMS5) {
ASSERT_FALSE(has_type<ngraph::element::Type_t::f32>(f));
}
TEST(TransformationTests, ConvertPrecision_MatrixNms) {
std::shared_ptr<ngraph::Function> f;
{
auto boxes = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1000, 4});
auto scores = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1, 1000});
op::v8::MatrixNms::Attributes attrs;
attrs.output_type = ngraph::element::i64;
auto nms = std::make_shared<ngraph::opset8::MatrixNms>(boxes, scores, attrs);
auto result1 = std::make_shared<ngraph::opset8::Result>(nms->output(0));
auto result2 = std::make_shared<ngraph::opset8::Result>(nms->output(1));
auto result3 = std::make_shared<ngraph::opset8::Result>(nms->output(2));
f = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3}, ngraph::ParameterVector{boxes, scores});
}
pass::Manager manager;
static const precisions_array precisions = {
{ ngraph::element::i64, ngraph::element::i32 },
{ ngraph::element::f16, ngraph::element::f32 }
};
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions);
manager.run_passes(f);
ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(f));
ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
}
TEST(TransformationTests, ConvertPrecision_MulticlassNms) {
std::shared_ptr<ngraph::Function> f;
{
auto boxes = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1000, 4});
auto scores = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1, 1000});
op::v8::MulticlassNms::Attributes attrs;
attrs.output_type = ngraph::element::i64;
auto nms = std::make_shared<ngraph::opset8::MulticlassNms>(boxes, scores, attrs);
auto result1 = std::make_shared<ngraph::opset8::Result>(nms->output(0));
auto result2 = std::make_shared<ngraph::opset8::Result>(nms->output(1));
auto result3 = std::make_shared<ngraph::opset8::Result>(nms->output(2));
f = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3}, ngraph::ParameterVector{boxes, scores});
}
pass::Manager manager;
static const precisions_array precisions = {
{ ngraph::element::i64, ngraph::element::i32 },
{ ngraph::element::f16, ngraph::element::f32 }
};
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions);
manager.run_passes(f);
ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(f));
ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
}
TEST(TransformationTests, ConvertPrecision_ShapeOf) {
std::shared_ptr<Function> f(nullptr);
{

View File

@ -8,14 +8,25 @@
#include "single_layer_tests/matrix_nms.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ov::test::subgraph;
using namespace InferenceEngine;
using namespace ngraph;
const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {
{{3, 100, 4}, {3, 1, 100}},
{{1, 10, 4}, {1, 100, 10 }}
};
const std::vector<InputShapeParams> inShapeParams = {
InputShapeParams{3, 100, 5},
InputShapeParams{1, 10, 50},
InputShapeParams{2, 50, 50}
const std::vector<std::vector<ov::test::InputShape>> inDynamicShapeParams = {
// num_batches, num_boxes, 4
{{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
{{1, 10, 4}, {2, 100, 4}}},
// num_batches, num_classes, num_boxes
{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
{{1, 3, 10}, {2, 5, 100}}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
{{1, 10, 4}, {2, 100, 4}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
{{1, 3, 10}, {2, 5, 100}}}}
};
const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
@ -32,23 +43,38 @@ const std::vector<ThresholdParams> thresholdParams = {
};
const std::vector<int> nmsTopK = {-1, 100};
const std::vector<int> keepTopK = {-1, 5};
const std::vector<int> backgroudClass = {-1, 0};
const std::vector<int> backgroudClass = {-1, 1};
const std::vector<bool> normalized = {true, false};
const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
op::v8::MatrixNms::DecayFunction::LINEAR};
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::Values(Precision::FP32),
::testing::Values(Precision::I32),
::testing::Values(Precision::FP32)),
::testing::ValuesIn(sortResultType),
::testing::ValuesIn(outType),
::testing::ValuesIn(topKParams),
::testing::ValuesIn(thresholdParams),
::testing::ValuesIn(backgroudClass),
::testing::ValuesIn(normalized),
::testing::ValuesIn(decayFunction),
::testing::Values(CommonTestUtils::DEVICE_CPU)
const auto nmsParamsStatic = ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
::testing::Combine(::testing::Values(ov::element::f32),
::testing::Values(ov::element::i32),
::testing::Values(ov::element::f32)),
::testing::ValuesIn(sortResultType),
::testing::ValuesIn(outType),
::testing::ValuesIn(topKParams),
::testing::ValuesIn(thresholdParams),
::testing::ValuesIn(backgroudClass),
::testing::ValuesIn(normalized),
::testing::ValuesIn(decayFunction),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName);
const auto nmsParamsDynamic = ::testing::Combine(::testing::ValuesIn(inDynamicShapeParams),
::testing::Combine(::testing::Values(ov::element::f32),
::testing::Values(ov::element::i32),
::testing::Values(ov::element::f32)),
::testing::ValuesIn(sortResultType),
::testing::ValuesIn(outType),
::testing::ValuesIn(topKParams),
::testing::ValuesIn(thresholdParams),
::testing::ValuesIn(backgroudClass),
::testing::ValuesIn(normalized),
::testing::ValuesIn(decayFunction),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_static, MatrixNmsLayerTest, nmsParamsStatic, MatrixNmsLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_dynamic, MatrixNmsLayerTest, nmsParamsDynamic, MatrixNmsLayerTest::getTestCaseName);

View File

@ -8,16 +8,32 @@
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ov::test::subgraph;
using namespace InferenceEngine;
using namespace ngraph;
const std::vector<InputShapeParams> inShapeParams = {InputShapeParams {3, 100, 5}, InputShapeParams {1, 10, 50}, InputShapeParams {2, 50, 50}};
const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {
{{3, 100, 4}, {3, 1, 100}},
{{1, 10, 4}, {1, 100, 10 }}
};
const std::vector<std::vector<ov::test::InputShape>> inDynamicShapeParams = {
// num_batches, num_boxes, 4
{{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
{{1, 10, 4}, {2, 100, 4}}},
// num_batches, num_classes, num_boxes
{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
{{1, 3, 10}, {2, 5, 100}}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
{{1, 10, 4}, {2, 100, 4}}},
{{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
{{1, 3, 10}, {2, 5, 100}}}}
};
const std::vector<int32_t> nmsTopK = {-1, 20};
const std::vector<float> iouThreshold = {0.7f};
const std::vector<float> scoreThreshold = {0.7f};
const std::vector<int32_t> backgroundClass = {-1, 0};
const std::vector<int32_t> backgroundClass = {-1, 1};
const std::vector<int32_t> keepTopK = {-1, 30};
const std::vector<element::Type> outType = {element::i32, element::i64};
@ -27,11 +43,29 @@ const std::vector<bool> sortResDesc = {true, false};
const std::vector<float> nmsEta = {0.6f, 1.0f};
const std::vector<bool> normalized = {true, false};
const auto nmsParams = ::testing::Combine(
::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::Values(Precision::FP32), ::testing::Values(Precision::I32), ::testing::Values(Precision::FP32)), ::testing::ValuesIn(nmsTopK),
const auto nmsParamsStatic = ::testing::Combine(
::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
::testing::Combine(::testing::Values(ov::element::f32), ::testing::Values(ov::element::i32), ::testing::Values(ov::element::f32)),
::testing::ValuesIn(nmsTopK),
::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)),
::testing::ValuesIn(backgroundClass), ::testing::ValuesIn(keepTopK), ::testing::ValuesIn(outType), ::testing::ValuesIn(sortResultType),
::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)), ::testing::Values(CommonTestUtils::DEVICE_CPU));
::testing::ValuesIn(backgroundClass),
::testing::ValuesIn(keepTopK),
::testing::ValuesIn(outType),
::testing::ValuesIn(sortResultType),
::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
::testing::Values(CommonTestUtils::DEVICE_CPU));
INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest, MulticlassNmsLayerTest, nmsParams, MulticlassNmsLayerTest::getTestCaseName);
const auto nmsParamsDynamic = ::testing::Combine(
::testing::ValuesIn(inDynamicShapeParams),
::testing::Combine(::testing::Values(ov::element::f32), ::testing::Values(ov::element::i32), ::testing::Values(ov::element::f32)),
::testing::ValuesIn(nmsTopK),
::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)),
::testing::ValuesIn(backgroundClass),
::testing::ValuesIn(keepTopK),
::testing::ValuesIn(outType),
::testing::ValuesIn(sortResultType),
::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
::testing::Values(CommonTestUtils::DEVICE_CPU));
INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_static, MulticlassNmsLayerTest, nmsParamsStatic, MulticlassNmsLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_dynamic, MulticlassNmsLayerTest, nmsParamsDynamic, MulticlassNmsLayerTest::getTestCaseName);

View File

@ -6,10 +6,14 @@
#include "shared_test_classes/single_layer/matrix_nms.hpp"
namespace LayerTestsDefinitions {
namespace ov {
namespace test {
namespace subgraph {
TEST_P(MatrixNmsLayerTest, CompareWithRefs) {
Run();
run();
};
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -6,10 +6,14 @@
#include "shared_test_classes/single_layer/multiclass_nms.hpp"
namespace LayerTestsDefinitions {
namespace ov {
namespace test {
namespace subgraph {
TEST_P(MulticlassNmsLayerTest, CompareWithRefs) {
Run();
run();
};
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -7,18 +7,18 @@
#include <tuple>
#include <string>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "common_test_utils/common_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
namespace LayerTestsDefinitions {
namespace ov {
namespace test {
namespace subgraph {
using InputShapeParams = std::tuple<size_t, // Number of batches
size_t, // Number of boxes
size_t>; // Number of classes
using InputPrecisions = std::tuple<InferenceEngine::Precision, // boxes and scores precisions
InferenceEngine::Precision, // max_output_boxes_per_class precision
InferenceEngine::Precision>; // iou_threshold, score_threshold, soft_nms_sigma precisions
using InputPrecisions = std::tuple<ElementType, // boxes and scores precisions
ElementType, // max_output_boxes_per_class
// precision
ElementType>; // iou_threshold, score_threshold,
using TopKParams = std::tuple<int, // Maximum number of boxes to be selected per class
int>; // Maximum number of boxes to be selected per batch element
@ -27,7 +27,7 @@ using ThresholdParams = std::tuple<float, // minimum score to consider box for
float, // gaussian_sigma parameter for gaussian decay_function
float>; // filter out boxes with low confidence score after decaying
using NmsParams = std::tuple<InputShapeParams, // Params using to create 1st and 2nd inputs
using NmsParams = std::tuple<std::vector<InputShape>, // Params using to create 1st and 2nd inputs
InputPrecisions, // Input precisions
ngraph::op::v8::MatrixNms::SortResultType, // Order of output elements
ngraph::element::Type, // Output type
@ -38,21 +38,22 @@ using NmsParams = std::tuple<InputShapeParams,
ngraph::op::v8::MatrixNms::DecayFunction, // Decay function
std::string>; // Device name
class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<NmsParams>& obj);
void GenerateInputs() override;
void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
const std::vector<InferenceEngine::Blob::Ptr> &actualOutputs)
override;
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
void compare(const std::vector<ov::runtime::Tensor> &expected, const std::vector<ov::runtime::Tensor> &actual) override;
protected:
void SetUp() override;
private:
size_t numBatches, numBoxes, numClasses;
size_t maxOutputBoxesPerClass;
size_t maxOutputBoxesPerBatch;
void GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch);
ngraph::op::v8::MatrixNms::Attributes m_attrs;
bool m_outStaticShape;
};
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -7,20 +7,19 @@
#include <string>
#include <tuple>
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "common_test_utils/common_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
namespace LayerTestsDefinitions {
namespace ov {
namespace test {
namespace subgraph {
using InputShapeParams = std::tuple<size_t, // Number of batches
size_t, // Number of boxes
size_t>; // Number of classes
using InputPrecisions = std::tuple<InferenceEngine::Precision, // boxes and scores precisions
InferenceEngine::Precision, // max_output_boxes_per_class
// precision
InferenceEngine::Precision>; // iou_threshold, score_threshold,
// soft_nms_sigma precisions
using InputPrecisions = std::tuple<ElementType, // boxes and scores precisions
ElementType, // max_output_boxes_per_class
// precision
ElementType>; // iou_threshold, score_threshold,
// soft_nms_sigma precisions
using InputfloatVar = std::tuple<float, // iouThreshold
float, // scoreThreshold
@ -29,7 +28,7 @@ using InputfloatVar = std::tuple<float, // iouThreshold
using InputboolVar = std::tuple<bool, // nmsEta
bool>; // normalized
using MulticlassNmsParams = std::tuple<InputShapeParams, // Params using to create 1st and 2nd inputs
using MulticlassNmsParams = std::tuple<std::vector<InputShape>, // Params using to create 1st and 2nd inputs
InputPrecisions, // Input precisions
int32_t, // Max output boxes per class
InputfloatVar, // iouThreshold, scoreThreshold, nmsEta
@ -40,20 +39,21 @@ using MulticlassNmsParams = std::tuple<InputShapeParams,
InputboolVar, // Sort result across batch, normalized
std::string>;
class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<MulticlassNmsParams>& obj);
void GenerateInputs() override;
void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override;
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
void compare(const std::vector<ov::runtime::Tensor> &expected, const std::vector<ov::runtime::Tensor> &actual) override;
protected:
void SetUp() override;
private:
size_t numBatches, numBoxes, numClasses;
size_t maxOutputBoxesPerClass;
size_t maxOutputBoxesPerBatch;
void GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch);
ngraph::op::v8::MulticlassNms::Attributes m_attrs;
bool m_outStaticShape;
};
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -2,16 +2,22 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/builders.hpp"
#include "functional_test_utils/ov_tensor_utils.hpp"
#include "shared_test_classes/single_layer/matrix_nms.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
namespace LayerTestsDefinitions {
#include "functional_test_utils/plugin_cache.hpp"
namespace ov {
namespace test {
namespace subgraph {
using namespace ngraph;
using namespace InferenceEngine;
using namespace FuncTestUtils::PrecisionUtils;
std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<NmsParams>& obj) {
InputShapeParams inShapeParams;
std::vector<InputShape> shapes;
InputPrecisions inPrecisions;
op::v8::MatrixNms::SortResultType sortResultType;
element::Type outType;
@ -21,13 +27,10 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
ThresholdParams thresholdParams;
bool normalized;
std::string targetDevice;
std::tie(inShapeParams, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
std::tie(shapes, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
backgroudClass, normalized, decayFunction, targetDevice) = obj.param;
size_t numBatches, numBoxes, numClasses;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
Precision paramsPrec, maxBoxPrec, thrPrec;
ElementType paramsPrec, maxBoxPrec, thrPrec;
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
int nmsTopK, keepTopK;
@ -37,8 +40,18 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
std::tie(score_threshold, gaussian_sigma, post_threshold) = thresholdParams;
std::ostringstream result;
result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
result << "IS=(";
for (const auto& shape : shapes) {
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
}
result << ")_TS=(";
for (const auto& shape : shapes) {
for (const auto& item : shape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
}
result << ")_paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
result << "sortResultType=" << sortResultType << "_normalized=" << normalized << "_";
result << "outType=" << outType << "_nmsTopK=" << nmsTopK << "_keepTopK=" << keepTopK << "_";
result << "backgroudClass=" << backgroudClass << "_decayFunction=" << decayFunction << "_";
@ -47,38 +60,86 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
return result.str();
}
void MatrixNmsLayerTest::GenerateInputs() {
size_t it = 0;
for (const auto &input : cnnNetwork.getInputsInfo()) {
const auto &info = input.second;
Blob::Ptr blob;
void MatrixNmsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
inputs.clear();
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 100000);
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::runtime::Tensor tensor;
if (i == 1) {
tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
const size_t range = 1;
const size_t startFrom = 0;
const size_t k = 1000;
const int seed = 1;
std::default_random_engine random(seed);
std::uniform_int_distribution<int32_t> distribution(k * startFrom, k * (startFrom + range));
auto *dataPtr = tensor.data<float>();
for (size_t i = 0; i < tensor.get_size(); i++) {
auto value = static_cast<float>(distribution(random));
dataPtr[i] = value / static_cast<float>(k);
}
} else {
blob = GenerateInput(*info);
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
}
inputs.push_back(blob);
it++;
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
const std::vector<Blob::Ptr> &actualOutputs) {
void MatrixNmsLayerTest::GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch) {
size_t it = 0;
size_t numBoxes = 0, numClasses = 0;
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
const auto& dims = inputs[funcInput.get_node_shared_ptr()].get_shape();
if (it == 1) {
numClasses = dims[1];
} else {
numBatches = dims[0];
numBoxes = dims[1];
}
it++;
}
ASSERT_TRUE(numBatches > 0 && numBoxes > 0 && numClasses > 0)
<< "Expected numBatches, numBoxes, numClasses > 0, got:" << numBatches << ", " << numBoxes << ", " << numClasses;
auto realClasses = numClasses;
if (m_attrs.background_class >= 0 && m_attrs.background_class < numClasses) {
realClasses = realClasses - 1;
}
size_t maxOutputBoxesPerClass = 0;
if (m_attrs.nms_top_k >= 0)
maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(m_attrs.nms_top_k));
else
maxOutputBoxesPerClass = numBoxes;
maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
if (m_attrs.keep_top_k >= 0)
maxOutputBoxesPerBatch =
std::min(maxOutputBoxesPerBatch, static_cast<size_t>(m_attrs.keep_top_k));
}
void MatrixNmsLayerTest::compare(const std::vector<ov::runtime::Tensor> &expectedOutputs,
const std::vector<ov::runtime::Tensor> &actualOutputs) {
auto batchIndex = -1;
size_t numBatches, maxOutputBoxesPerBatch;
GetOutputParams(numBatches, maxOutputBoxesPerBatch);
std::vector<int32_t> numPerBatch(numBatches);
for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
const auto& actual = actualOutputs[outputIndex];
const auto _dims = actual->getTensorDesc().getDims();
const auto _dims = actual.get_shape();
if (_dims.size() == 1 && _dims[0] == numBatches) {
batchIndex = outputIndex;
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t *>();
auto buffer = reinterpret_cast<const int32_t *>(actualBuffer);
auto buffer = reinterpret_cast<const int32_t*>(actual.data());
std::copy_n(buffer, numBatches, numPerBatch.begin());
}
}
@ -86,39 +147,30 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
const auto& expected = expectedOutputs[outputIndex];
const auto& actual = actualOutputs[outputIndex];
const auto actualBuffer = static_cast<uint8_t*>(actual.data());
const auto expectedBuffer = static_cast<uint8_t*>(expected.data());
//Compare Selected Outputs & Selected Indices
if (outputIndex != batchIndex) {
const auto &expectedBuffer = expected.second.data();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t *>();
auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
// W/A for int4, uint4
if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
k /= 2;
}
if (outputIndex == 2) {
if (expected.second.size() != k * actual->byteSize())
if (expected.get_size() != actual.get_size())
throw std::runtime_error("Expected and actual size 3rd output have different size");
}
const auto &precision = actual->getTensorDesc().getPrecision();
const auto& precision = actual.get_element_type();
auto expected_offset = 0;
auto actual_offset = 0;
for (size_t i = 0; i < numPerBatch.size(); i++) {
auto validNums = numPerBatch[i];
switch (precision) {
case InferenceEngine::Precision::FP32: {
switch (expected.first) {
case ngraph::element::Type_t::f32:
case ov::element::f32: {
switch (expected.get_element_type()) {
case ov::element::f32:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const float *>(expectedBuffer) + expected_offset * 6,
reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
break;
case ngraph::element::Type_t::f64:
case ov::element::f64:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const double *>(expectedBuffer) + expected_offset * 6,
reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums *6, 1e-5f);
@ -126,22 +178,23 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
default:
break;
}
const auto fBuffer = lockedMemory.as<const float *>();
for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
<< "Invalid default value: " << fBuffer[i] << " at index: " << i;
if (m_outStaticShape) {
const auto fBuffer = static_cast<float*>(actual.data());
for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
<< "Invalid default value: " << fBuffer[i] << " at index: " << i;
}
}
break;
}
case InferenceEngine::Precision::I32: {
switch (expected.first) {
case ngraph::element::Type_t::i32:
case ov::element::i32: {
switch (expected.get_element_type()) {
case ov::element::i32:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const int32_t *>(expectedBuffer) + expected_offset,
reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
break;
case ngraph::element::Type_t::i64:
case ov::element::i64:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const int64_t *>(expectedBuffer) + expected_offset,
reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
@ -149,46 +202,42 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
default:
break;
}
const auto iBuffer = lockedMemory.as<const int *>();
for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
if (m_outStaticShape) {
const auto iBuffer = static_cast<int*>(actual.data());
for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
}
}
break;
}
default:
FAIL() << "Comparator for " << precision << " precision isn't supported";
}
expected_offset += validNums;
actual_offset += maxOutputBoxesPerBatch;
if (!m_outStaticShape) {
expected_offset += validNums;
actual_offset += validNums;
} else {
expected_offset += validNums;
actual_offset += maxOutputBoxesPerBatch;
}
}
} else {
const auto &expectedBuffer = expected.second.data();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t *>();
auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
// W/A for int4, uint4
if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
k /= 2;
}
if (outputIndex == 2) {
if (expected.second.size() != k * actual->byteSize())
if (expected.get_size() != actual.get_size())
throw std::runtime_error("Expected and actual size 3rd output have different size");
}
const auto &precision = actual->getTensorDesc().getPrecision();
size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
const auto& precision = actual.get_element_type();
size_t size = expected.get_size();
switch (precision) {
case InferenceEngine::Precision::I32: {
switch (expected.first) {
case ngraph::element::Type_t::i32:
case ov::element::i32: {
switch (expected.get_element_type()) {
case ov::element::i32:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const int32_t *>(expectedBuffer),
reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
break;
case ngraph::element::Type_t::i64:
case ov::element::i64:
LayerTestsUtils::LayerTestsCommon::Compare(
reinterpret_cast<const int64_t *>(expectedBuffer),
reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
@ -206,45 +255,48 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
}
void MatrixNmsLayerTest::SetUp() {
InputShapeParams inShapeParams;
std::vector<InputShape> shapes;
InputPrecisions inPrecisions;
op::v8::MatrixNms::Attributes attrs;
TopKParams topKParams;
ThresholdParams thresholdParams;
std::tie(inShapeParams, inPrecisions, attrs.sort_result_type, attrs.output_type, topKParams, thresholdParams,
attrs.background_class, attrs.normalized, attrs.decay_function, targetDevice) = this->GetParam();
std::tie(shapes, inPrecisions, m_attrs.sort_result_type, m_attrs.output_type, topKParams, thresholdParams,
m_attrs.background_class, m_attrs.normalized, m_attrs.decay_function, targetDevice) = this->GetParam();
std::tie(attrs.nms_top_k, attrs.keep_top_k) = topKParams;
std::tie(attrs.score_threshold, attrs.gaussian_sigma, attrs.post_threshold) = thresholdParams;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
auto realClasses = numClasses;
if (attrs.background_class >=0 && attrs.background_class <= numClasses) {
realClasses = realClasses - 1;
}
std::tie(m_attrs.nms_top_k, m_attrs.keep_top_k) = topKParams;
std::tie(m_attrs.score_threshold, m_attrs.gaussian_sigma, m_attrs.post_threshold) = thresholdParams;
maxOutputBoxesPerClass = 0;
if (attrs.nms_top_k >= 0)
maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(attrs.nms_top_k));
else
maxOutputBoxesPerClass = numBoxes;
init_input_shapes(shapes);
maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
if (attrs.keep_top_k >= 0)
maxOutputBoxesPerBatch =
std::min(maxOutputBoxesPerBatch, static_cast<size_t>(attrs.keep_top_k));
Precision paramsPrec, maxBoxPrec, thrPrec;
// input is dynamic shape -> output will be dynamic shape
// input is static shape -> output will be static shape
const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
return shape.rank() == 0;
});
ElementType paramsPrec, maxBoxPrec, thrPrec;
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
const std::vector<size_t> boxesShape{numBatches, numBoxes, 4}, scoresShape{numBatches, numClasses, numBoxes};
auto ngPrc = convertIE2nGraphPrc(paramsPrec);
auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], attrs);
auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
function = std::make_shared<Function>(OutputVector{nms_0_identity, nms_1_identity, nms_2_identity}, params, "NMS");
const auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
const auto paramOuts =
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], m_attrs);
if (!m_outStaticShape) {
auto result = std::make_shared<opset5::Result>(nms);
function = std::make_shared<Function>(result, params, "MatrixNMS");
} else {
auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
OutputVector results = {
std::make_shared<opset5::Result>(nms_0_identity),
std::make_shared<opset5::Result>(nms_1_identity),
std::make_shared<opset5::Result>(nms_2_identity)
};
function = std::make_shared<Function>(results, params, "MatrixNMS");
}
}
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -2,16 +2,22 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/builders.hpp"
#include "functional_test_utils/ov_tensor_utils.hpp"
#include "shared_test_classes/single_layer/multiclass_nms.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
namespace LayerTestsDefinitions {
#include "functional_test_utils/plugin_cache.hpp"
namespace ov {
namespace test {
namespace subgraph {
using namespace ngraph;
using namespace InferenceEngine;
using namespace FuncTestUtils::PrecisionUtils;
std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo<MulticlassNmsParams>& obj) {
InputShapeParams inShapeParams;
std::vector<InputShape> shapes;
InputPrecisions inPrecisions;
int32_t nmsTopK, backgroundClass, keepTopK;
element::Type outType;
@ -23,12 +29,9 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
std::string targetDevice;
std::tie(inShapeParams, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;
std::tie(shapes, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;
size_t numBatches, numBoxes, numClasses;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
Precision paramsPrec, maxBoxPrec, thrPrec;
ElementType paramsPrec, maxBoxPrec, thrPrec;
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
float iouThr, scoreThr, nmsEta;
@ -38,8 +41,18 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
std::tie(sortResCB, normalized) = inboolVar;
std::ostringstream result;
result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
result << "IS=(";
for (const auto& shape : shapes) {
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
}
result << ")_TS=(";
for (const auto& shape : shapes) {
for (const auto& item : shape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
}
result << ")_paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
result << "nmsTopK=" << nmsTopK << "_";
result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_backgroundClass=" << backgroundClass << "_";
result << "keepTopK=" << keepTopK << "_outType=" << outType << "_";
@ -48,38 +61,86 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
return result.str();
}
void MulticlassNmsLayerTest::GenerateInputs() {
size_t it = 0;
for (const auto& input : cnnNetwork.getInputsInfo()) {
const auto& info = input.second;
Blob::Ptr blob;
void MulticlassNmsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
inputs.clear();
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 1000);
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::runtime::Tensor tensor;
if (i == 1) {
tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
const size_t range = 1;
const size_t startFrom = 0;
const size_t k = 1000;
const int seed = 1;
std::default_random_engine random(seed);
std::uniform_int_distribution<int32_t> distribution(k * startFrom, k * (startFrom + range));
auto *dataPtr = tensor.data<float>();
for (size_t i = 0; i < tensor.get_size(); i++) {
auto value = static_cast<float>(distribution(random));
dataPtr[i] = value / static_cast<float>(k);
}
} else {
blob = GenerateInput(*info);
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
}
inputs.push_back(blob);
it++;
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
}
}
void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
const std::vector<Blob::Ptr>& actualOutputs) {
void MulticlassNmsLayerTest::GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch) {
size_t it = 0;
size_t numBoxes = 0, numClasses = 0;
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
const auto& dims = inputs[funcInput.get_node_shared_ptr()].get_shape();
if (it == 1) {
numClasses = dims[1];
} else {
numBatches = dims[0];
numBoxes = dims[1];
}
it++;
}
ASSERT_TRUE(numBatches > 0 && numBoxes > 0 && numClasses > 0)
<< "Expected numBatches, numBoxes, numClasses > 0, got:" << numBatches << ", " << numBoxes << ", " << numClasses;
auto realClasses = numClasses;
if (m_attrs.background_class >= 0 && m_attrs.background_class < numClasses) {
realClasses = realClasses - 1;
}
size_t maxOutputBoxesPerClass = 0;
if (m_attrs.nms_top_k >= 0)
maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(m_attrs.nms_top_k));
else
maxOutputBoxesPerClass = numBoxes;
maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
if (m_attrs.keep_top_k >= 0)
maxOutputBoxesPerBatch =
std::min(maxOutputBoxesPerBatch, static_cast<size_t>(m_attrs.keep_top_k));
}
void MulticlassNmsLayerTest::compare(const std::vector<ov::runtime::Tensor> &expectedOutputs,
const std::vector<ov::runtime::Tensor> &actualOutputs) {
auto batchIndex = -1;
size_t numBatches, maxOutputBoxesPerBatch;
GetOutputParams(numBatches, maxOutputBoxesPerBatch);
std::vector<int32_t> numPerBatch(numBatches);
for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
const auto& actual = actualOutputs[outputIndex];
const auto _dims = actual->getTensorDesc().getDims();
const auto _dims = actual.get_shape();
if (_dims.size() == 1 && _dims[0] == numBatches) {
batchIndex = outputIndex;
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t*>();
auto buffer = reinterpret_cast<const int32_t*>(actualBuffer);
auto buffer = reinterpret_cast<const int32_t*>(actual.data());
std::copy_n(buffer, numBatches, numPerBatch.begin());
}
}
@ -87,39 +148,30 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
const auto& expected = expectedOutputs[outputIndex];
const auto& actual = actualOutputs[outputIndex];
const auto actualBuffer = static_cast<uint8_t*>(actual.data());
const auto expectedBuffer = static_cast<uint8_t*>(expected.data());
// Compare Selected Outputs & Selected Indices
if (outputIndex != batchIndex) {
const auto& expectedBuffer = expected.second.data();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t*>();
auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
// W/A for int4, uint4
if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
k /= 2;
}
if (outputIndex == 2) {
if (expected.second.size() != k * actual->byteSize())
if (expected.get_size() != actual.get_size())
throw std::runtime_error("Expected and actual size 3rd output have different "
"size");
}
const auto& precision = actual->getTensorDesc().getPrecision();
const auto& precision = actual.get_element_type();
auto expected_offset = 0;
auto actual_offset = 0;
for (size_t i = 0; i < numPerBatch.size(); i++) {
auto validNums = numPerBatch[i];
switch (precision) {
case InferenceEngine::Precision::FP32: {
switch (expected.first) {
case ngraph::element::Type_t::f32:
case ov::element::f32: {
switch (expected.get_element_type()) {
case ov::element::f32:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer) + expected_offset * 6,
reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
break;
case ngraph::element::Type_t::f64:
case ov::element::f64:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const double*>(expectedBuffer) + expected_offset * 6,
reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
break;
@ -127,66 +179,64 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
break;
}
const auto fBuffer = lockedMemory.as<const float*>();
for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
<< "Invalid default value: " << fBuffer[i] << " at index: " << i;
if (m_outStaticShape) {
const auto fBuffer = static_cast<float*>(actual.data());
for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
<< "Invalid default value: " << fBuffer[i] << " at index: " << i;
}
}
break;
}
case InferenceEngine::Precision::I32: {
switch (expected.first) {
case ngraph::element::Type_t::i32:
case ov::element::i32: {
switch (expected.get_element_type()) {
case ov::element::i32:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer) + expected_offset,
reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
break;
case ngraph::element::Type_t::i64:
case ov::element::i64:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer) + expected_offset,
reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
break;
default:
break;
}
const auto iBuffer = lockedMemory.as<const int*>();
for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
if (m_outStaticShape) {
const auto iBuffer = static_cast<int*>(actual.data());
for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
}
}
break;
}
default:
FAIL() << "Comparator for " << precision << " precision isn't supported";
}
expected_offset += validNums;
actual_offset += maxOutputBoxesPerBatch;
if (!m_outStaticShape) {
expected_offset += validNums;
actual_offset += validNums;
} else {
expected_offset += validNums;
actual_offset += maxOutputBoxesPerBatch;
}
}
} else {
const auto& expectedBuffer = expected.second.data();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const uint8_t*>();
auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
// W/A for int4, uint4
if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
k /= 2;
}
if (outputIndex == 2) {
if (expected.second.size() != k * actual->byteSize())
if (expected.get_size() != actual.get_size())
throw std::runtime_error("Expected and actual size 3rd output have different "
"size");
}
const auto& precision = actual->getTensorDesc().getPrecision();
size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
const auto& precision = actual.get_element_type();
size_t size = expected.get_size();
switch (precision) {
case InferenceEngine::Precision::I32: {
switch (expected.first) {
case ngraph::element::Type_t::i32:
case ov::element::i32: {
switch (expected.get_element_type()) {
case ov::element::i32:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
size, 0);
break;
case ngraph::element::Type_t::i64:
case ov::element::i64:
LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
size, 0);
break;
@ -203,9 +253,8 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
}
void MulticlassNmsLayerTest::SetUp() {
InputShapeParams inShapeParams;
std::vector<InputShape> shapes;
InputPrecisions inPrecisions;
op::v8::MulticlassNms::Attributes attrs;
size_t maxOutBoxesPerClass, backgroundClass, keepTopK;
element::Type outType;
@ -214,27 +263,19 @@ void MulticlassNmsLayerTest::SetUp() {
InputfloatVar inFloatVar;
InputboolVar inboolVar;
std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
std::tie(shapes, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
this->GetParam();
// size_t numBatches, numBoxes, numClasses;
std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
auto realClasses = numClasses;
if (backgroundClass >= 0 && backgroundClass <= numClasses) {
realClasses = realClasses - 1;
}
init_input_shapes(shapes);
maxOutputBoxesPerClass = 0;
if (maxOutBoxesPerClass >= 0)
maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(maxOutBoxesPerClass));
else
maxOutputBoxesPerClass = numBoxes;
// input is dynamic shape -> output will be dynamic shape
// input is static shape -> output will be static shape
const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
return shape.rank() == 0;
});
maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
if (keepTopK >= 0)
maxOutputBoxesPerBatch = std::min(maxOutputBoxesPerBatch, static_cast<size_t>(keepTopK));
Precision paramsPrec, maxBoxPrec, thrPrec;
ElementType paramsPrec, maxBoxPrec, thrPrec;
std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
float iouThr, scoreThr, nmsEta;
@ -243,28 +284,39 @@ void MulticlassNmsLayerTest::SetUp() {
bool sortResCB, normalized;
std::tie(sortResCB, normalized) = inboolVar;
const std::vector<size_t> boxesShape {numBatches, numBoxes, 4}, scoresShape {numBatches, numClasses, numBoxes};
auto ngPrc = convertIE2nGraphPrc(paramsPrec);
auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
const auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
const auto paramOuts =
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
attrs.iou_threshold = iouThr;
attrs.score_threshold = scoreThr;
attrs.nms_eta = nmsEta;
attrs.sort_result_type = sortResultType;
attrs.sort_result_across_batch = sortResCB;
attrs.output_type = outType;
attrs.nms_top_k = maxOutBoxesPerClass;
attrs.keep_top_k = keepTopK;
attrs.background_class = backgroundClass;
attrs.normalized = normalized;
m_attrs.iou_threshold = iouThr;
m_attrs.score_threshold = scoreThr;
m_attrs.nms_eta = nmsEta;
m_attrs.sort_result_type = sortResultType;
m_attrs.sort_result_across_batch = sortResCB;
m_attrs.output_type = outType;
m_attrs.nms_top_k = maxOutBoxesPerClass;
m_attrs.keep_top_k = keepTopK;
m_attrs.background_class = backgroundClass;
m_attrs.normalized = normalized;
auto nms = std::make_shared<opset8::MulticlassNms>(paramOuts[0], paramOuts[1], attrs);
auto nms = std::make_shared<opset8::MulticlassNms>(paramOuts[0], paramOuts[1], m_attrs);
auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(ngPrc, Shape {1}, {1}));
auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
function = std::make_shared<Function>(OutputVector {nms_0_identity, nms_1_identity, nms_2_identity}, params, "MulticlassNMS");
if (!m_outStaticShape) {
auto result = std::make_shared<opset5::Result>(nms);
function = std::make_shared<Function>(result, params, "MulticlassNMS");
} else {
auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(paramsPrec, Shape {1}, {1}));
auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
OutputVector results = {
std::make_shared<opset5::Result>(nms_0_identity),
std::make_shared<opset5::Result>(nms_1_identity),
std::make_shared<opset5::Result>(nms_2_identity)
};
function = std::make_shared<Function>(results, params, "MulticlassNMS");
}
}
} // namespace LayerTestsDefinitions
} // namespace subgraph
} // namespace test
} // namespace ov

View File

@ -30,6 +30,8 @@ bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ngraph::ele
bool fuse_type_to_nms3(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_nms4(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_nms5(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_matrix_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_multiclass_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_topk(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_maxpool(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
bool fuse_type_to_nonzero(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
@ -253,6 +255,8 @@ bool ngraph::pass::ConvertPrecision::run_on_function(std::shared_ptr<ngraph::Fun
{opset3::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms3},
{opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4},
{opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5},
{opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms},
{opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
{opset6::CTCGreedyDecoderSeqLen::get_type_info_static(), fuse_type_to_ctc_greedy_decoder_seq_len},
{opset4::TopK::get_type_info_static(), fuse_type_to_topk},
{opset8::MaxPool::get_type_info_static(), fuse_type_to_maxpool},
@ -385,6 +389,34 @@ bool fuse_type_to_nms5(const std::shared_ptr<ngraph::Node>& node, ngraph::elemen
return true;
}
bool fuse_type_to_matrix_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
auto nms = ov::as_type_ptr<opset8::MatrixNms>(node);
if (!nms) {
return false;
}
if ((idx == 1 || idx == 2) && (to == element::i32 || to == element::i64)) {
nms->set_output_type(to);
return true;
}
return false;
}
bool fuse_type_to_multiclass_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
auto nms = ov::as_type_ptr<opset8::MulticlassNms>(node);
if (!nms) {
return false;
}
if ((idx == 1 || idx == 2) && (to == element::i32 || to == element::i64)) {
nms->set_output_type(to);
return true;
}
return false;
}
bool fuse_type_to_topk(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
if (auto topk = ov::as_type_ptr<opset4::TopK>(node)) {
if (idx == 1 && (to == element::i32 || to == element::i64)) {