[CPU] MulticlassNms/MatrixNms support dynamic shape (#8161)

2021-11-27 01:23:26 +08:00 · 2021-11-27 01:23:26 +08:00 · 38aebd4463
commit 38aebd4463
parent f59ece3cde
23 changed files with 950 additions and 578 deletions
--- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
@ -423,8 +423,9 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
                ::ngraph::pass::Manager manager;
                // resolves dynamism by replacing dynamic operation with static version
                manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false);
-                manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
-                manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
+                // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+                manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(false);
+                manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(false);
                manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
                manager.register_pass<::ngraph::pass::ConstantFolding>();
                // OneHotToLegacy changes output precision
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -314,7 +314,35 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
                for (size_t i = 0; i < node->get_output_size(); i++) {
                    const auto outputs = node->get_output_target_inputs(i);
                    for (const auto &out : outputs) {
-                        if (out.get_node()->get_type_info() != ngraph::op::v0::Result::get_type_info_static()) {
+                        if (!ngraph::op::is_output(out.get_node())) {
+                            return false;
+                        }
+                    }
+                }
+                return true;
+            });
+
+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    pass_config->set_callback<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(
+            [](const_node_ptr &node) -> bool {
+                for (size_t i = 0; i < node->get_output_size(); i++) {
+                    const auto outputs = node->get_output_target_inputs(i);
+                    for (const auto &out : outputs) {
+                        if (!ngraph::op::is_output(out.get_node())) {
+                            return false;
+                        }
+                    }
+                }
+                return true;
+            });
+
+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    pass_config->set_callback<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(
+            [](const_node_ptr &node) -> bool {
+                for (size_t i = 0; i < node->get_output_size(); i++) {
+                    const auto outputs = node->get_output_target_inputs(i);
+                    for (const auto &out : outputs) {
+                        if (!ngraph::op::is_output(out.get_node())) {
                            return false;
                        }
                    }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp
@ -12,25 +12,19 @@

 #include "ie_parallel.hpp"
 #include "ngraph/opsets/opset8.hpp"
-#include "ngraph_ops/nms_static_shape_ie.hpp"
 #include "utils/general_utils.h"

 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
-using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MatrixNms>;

 using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
 using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction;

 bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-        const auto nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
+        const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MatrixNms>(op);
        if (!nms) {
-            errorMessage = "Only internal MatrixNms operation is supported";
+            errorMessage = "Only MatrixNms operation is supported";
            return false;
        }
        const auto& attrs = nms->get_attrs();
@ -57,36 +51,16 @@ MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op
        IE_THROW(NotImplemented) << errorMessage;
    }

-    errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";
-    const auto matrix_nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
+    m_errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";

    if (getOriginalInputsNumber() != 2)
-        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
+        IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();

    if (getOriginalOutputsNumber() != 3)
-        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
+        IE_THROW() << m_errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();

-    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
-    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
-    if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
-        IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
-    }
+    const auto matrix_nms = std::dynamic_pointer_cast<const ngraph::op::v8::MatrixNms>(op);

-    m_numBatches = boxes_dims[0];
-    m_numBoxes = boxes_dims[1];
-    if (boxes_dims.size() != 3)
-        IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
-    if (boxes_dims[2] != 4)
-        IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
-
-    m_numClasses = scores_dims[1];
-    if (scores_dims.size() != 3)
-        IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
-
-    if (m_numBatches != scores_dims[0])
-        IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
-    if (m_numBoxes != scores_dims[2])
-        IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
    auto& attrs = matrix_nms->get_attrs();
    if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID)
        m_sortResultType = MatrixNmsSortResultType::CLASSID;
@ -109,35 +83,6 @@ MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op
    m_gaussianSigma = attrs.gaussian_sigma;
    m_postThreshold = attrs.post_threshold;
    m_normalized = attrs.normalized;
-    int64_t max_output_boxes_per_class = 0;
-    size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
-    if (m_nmsTopk >= 0)
-        max_output_boxes_per_class = std::min(m_numBoxes, static_cast<size_t>(m_nmsTopk));
-    else
-        max_output_boxes_per_class = m_numBoxes;
-
-    m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
-    if (m_keepTopk >= 0)
-        m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopk));
-}
-
-void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
-    if (!supportedPrimitiveDescriptors.empty())
-        return;
-
-    m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
-    m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast<int>(m_numBoxes));
-    m_numPerBatch.resize(m_numBatches);
-    m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes);
-    m_numPerBatchClass.resize(m_numBatches, std::vector<int64_t>(m_numClasses, 0));
-    m_classOffset.resize(m_numClasses, 0);
-
-    for (size_t i = 0, count = 0; i < m_numClasses; i++) {
-        if (i == m_backgroundClass)
-            continue;
-        m_classOffset[i] = (count++) * m_realNumBoxes;
-    }
-
    if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) {
        m_decay_fn = [](float iou, float max_iou, float sigma) -> float {
            return (1. - iou) / (1. - max_iou + 1e-10f);
@ -148,16 +93,29 @@ void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
        };
    }

+    const auto& boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims();
+    if (boxes_dims.size() != 3)
+        IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
+    if (boxes_dims[2] != 4)
+        IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
+    const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims();
+    if (scores_dims.size() != 3)
+        IE_THROW() << m_errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
+}
+
+void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
    const std::vector<Precision> supportedFloatPrecision = {Precision::FP32};
    const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};

-    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType);
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", m_inType);

-    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
-
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType);
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", m_outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", m_outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", m_outType);

    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
                          {LayoutType::ncsp, Precision::FP32}},
@ -282,6 +240,54 @@ size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* score
    return numDet;
 }

+void MKLDNNMatrixNmsNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        prepareParams();
+        updateLastInputDims();
+    }
+}
+
+void MKLDNNMatrixNmsNode::prepareParams() {
+    const auto& boxes_dims = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();
+    const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims();
+    if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
+        IE_THROW() << m_errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
+    }
+
+    m_numBatches = boxes_dims[0];
+    m_numBoxes = boxes_dims[1];
+
+    m_numClasses = scores_dims[1];
+
+    int64_t max_output_boxes_per_class = 0;
+    size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses :
+        m_backgroundClass < m_numClasses ? m_numClasses - 1 : m_numClasses;
+    if (m_nmsTopk >= 0)
+        max_output_boxes_per_class = std::min(m_numBoxes, static_cast<size_t>(m_nmsTopk));
+    else
+        max_output_boxes_per_class = m_numBoxes;
+
+    m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
+    if (m_keepTopk >= 0)
+        m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopk));
+
+    m_realNumClasses = real_num_classes;
+    m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast<int>(m_numBoxes));
+    m_numPerBatch.resize(m_numBatches);
+    m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes);
+    m_numPerBatchClass.resize(m_numBatches);
+    for (auto &numPerBatch : m_numPerBatchClass) {
+        numPerBatch.resize(m_numClasses, 0);
+    }
+    m_classOffset.resize(m_numClasses, 0);
+
+    for (size_t i = 0, count = 0; i < m_numClasses; i++) {
+        if (i == m_backgroundClass)
+            continue;
+        m_classOffset[i] = (count++) * m_realNumBoxes;
+    }
+}
+
 void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
    const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
    const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
@ -352,9 +358,20 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
        }
    }

-    float* selectedOutputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
-    int* selectedIndices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr());
-    int* validOutputs = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
+    auto selectedOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr();
+    auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr();
+    auto validOutputsMemPtr = getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr();
+
+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    if (isDynamicNode()) {
+        size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), 0);
+        selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_OUTPUTS)->cloneWithNewDims({totalBox, 6}));
+        selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTED_INDICES)->cloneWithNewDims({totalBox, 1}));
+        validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_VALID_OUTPUTS)->cloneWithNewDims({m_numBatches}));
+    }
+    float* selectedOutputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());
+    int* selectedIndices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());
+    int* validOutputs = reinterpret_cast<int*>(validOutputsMemPtr->GetPtr());
    std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs);

    int64_t outputOffset = 0;
@ -372,16 +389,22 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
            selectedBase[4] = m_filteredBoxes[originalIndex].box.x2;
            selectedBase[5] = m_filteredBoxes[originalIndex].box.y2;
        }
-        std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1);
-        std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
-        outputOffset += m_maxBoxesPerBatch;
-        originalOffset += real_boxes;
+        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+        if (!isDynamicNode()) {
+            std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1);
+            std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
+            outputOffset += m_maxBoxesPerBatch;
+            originalOffset += real_boxes;
+        } else {
+            outputOffset += real_boxes;
+            originalOffset += real_boxes;
+        }
    }
 }

 void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
    if (std::find(precList.begin(), precList.end(), prec) == precList.end())
-        IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
+        IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
 }

 REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h
@ -27,12 +27,17 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+
+    bool needShapeInfer() const override { return false; }
+    void prepareParams() override;
+
 private:
    // input
    static const size_t NMS_BOXES = 0;
@ -82,8 +87,8 @@ private:
        int64_t classIndex = -1;
        float score = 0.0f;
    };
-    std::string errorPrefix;
-    const std::string inType = "input", outType = "output";
+    std::string m_errorPrefix;
+    const std::string m_inType = "input", m_outType = "output";
    std::vector<int64_t> m_numPerBatch;
    std::vector<std::vector<int64_t>> m_numPerBatchClass;
    std::vector<BoxInfo> m_filteredBoxes;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp
@ -9,7 +9,6 @@
 #include <chrono>
 #include <cmath>
 #include <ie_ngraph_utils.hpp>
-#include <ngraph_ops/nms_static_shape_ie.hpp>
 #include <queue>
 #include <string>
 #include <utility>
@ -22,17 +21,12 @@ using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

 using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
-using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MulticlassNms>;

 bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-        const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
+        const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MulticlassNms>(op);
        if (!nms) {
-            errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported";
+            errorMessage = "Only MulticlassNms operation is supported";
            return false;
        }
        const auto& atrri = nms->get_attrs();
@ -53,79 +47,55 @@ MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr<ngraph::N
    if (!isSupportedOperation(op, errorMessage)) {
        IE_THROW(NotImplemented) << errorMessage;
    }
-    errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";
-    const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
+    m_errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";

    if (getOriginalInputsNumber() != 2)
-        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
+        IE_THROW() << m_errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();

    if (getOriginalOutputsNumber() != 3)
-        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
+        IE_THROW() << m_errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
+
+    const auto nms = std::dynamic_pointer_cast<const ngraph::op::v8::MulticlassNms>(op);

    auto& atrri = nms->get_attrs();
-    sort_result_across_batch = atrri.sort_result_across_batch;
-    max_output_boxes_per_class = atrri.nms_top_k;
-    iou_threshold = atrri.iou_threshold;
-    score_threshold = atrri.score_threshold;
-    background_class = atrri.background_class;
-    keep_top_k = atrri.keep_top_k;
+    m_sortResultAcrossBatch = atrri.sort_result_across_batch;
+    m_nmsTopK = atrri.nms_top_k;
+    m_iouThreshold = atrri.iou_threshold;
+    m_scoreThreshold = atrri.score_threshold;
+    m_backgroundClass = atrri.background_class;
+    m_keepTopK = atrri.keep_top_k;
    if (atrri.sort_result_type == ngNmsSortResultType::CLASSID)
-        sort_result_type = MulticlassNmsSortResultType::CLASSID;
+        m_sortResultType = MulticlassNmsSortResultType::CLASSID;
    else if (atrri.sort_result_type == ngNmsSortResultType::SCORE)
-        sort_result_type = MulticlassNmsSortResultType::SCORE;
+        m_sortResultType = MulticlassNmsSortResultType::SCORE;
    else if (atrri.sort_result_type == ngNmsSortResultType::NONE)
-        sort_result_type = MulticlassNmsSortResultType::NONE;
-    nms_eta = atrri.nms_eta;
-    normalized = atrri.normalized;
+        m_sortResultType = MulticlassNmsSortResultType::NONE;
+    m_nmsEta = atrri.nms_eta;
+    m_normalized = atrri.normalized;

-    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
+    const auto& boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims();
    if (boxes_dims.size() != 3)
-        IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
+        IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
    if (boxes_dims[2] != 4)
-        IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
-
-    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
+        IE_THROW() << m_errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
+    const auto& scores_dims = getInputShapeAtPort(NMS_SCORES).getDims();
    if (scores_dims.size() != 3)
-        IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
-
-    if (boxes_dims[0] != scores_dims[0])
-        IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
-    if (boxes_dims[1] != scores_dims[2])
-        IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
-
-    const SizeVector& valid_outputs_dims = outputShapes[NMS_SELECTEDNUM].getStaticDims();
-    if (valid_outputs_dims.size() != 1)
-        IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
-    if (valid_outputs_dims[0] != boxes_dims[0])  // valid_outputs_dims[0] != num_batches
-        IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0];
+        IE_THROW() << m_errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
 }

 void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;
-    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
-    num_batches = boxes_dims[0];
-    num_boxes = boxes_dims[1];
-    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
-    num_classes = scores_dims[1];
-    numFiltBox.resize(num_batches, std::vector<size_t>(num_classes));  // batches
-    numBoxOffset.resize(num_batches);
-
-    if (max_output_boxes_per_class) {
-        max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class;
-        filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes);
-    }

    const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
    const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};

-    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", m_inType);
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", m_inType);

-    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
-
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType);
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
-    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", m_outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", m_outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", m_outType);

    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
                          {LayoutType::ncsp, Precision::FP32}},
@ -135,100 +105,138 @@ void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() {
                         impl_desc_type::ref_any);
 }

+void MKLDNNMultiClassNmsNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        prepareParams();
+        updateLastInputDims();
+    }
+}
+
+void MKLDNNMultiClassNmsNode::prepareParams() {
+    const auto& boxes_dims = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();
+    const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims();
+    if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
+        IE_THROW() << m_errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
+    }
+
+    m_numBatches = boxes_dims[0];
+    m_numBoxes = boxes_dims[1];
+
+    m_numClasses = scores_dims[1];
+
+    int max_output_boxes_per_class = 0;
+    size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses :
+        m_backgroundClass < m_numClasses ? m_numClasses - 1 : m_numClasses;
+    if (m_nmsTopK) {
+        max_output_boxes_per_class = (m_nmsTopK == -1) ? m_numBoxes :
+            std::min(m_nmsTopK, static_cast<int>(m_numBoxes));
+        m_filtBoxes.resize(max_output_boxes_per_class * m_numBatches * m_numClasses);
+    }
+    m_nmsRealTopk = max_output_boxes_per_class;
+
+    m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
+    if (m_keepTopK >= 0)
+        m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopK));
+
+    m_numFiltBox.resize(m_numBatches);
+    for (auto &numPerBatch : m_numFiltBox) {
+        numPerBatch.resize(m_numClasses, 0);
+    }
+    m_numBoxOffset.resize(m_numBatches);
+}
+
 void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
    const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
    const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());

    auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims();

-    if (max_output_boxes_per_class == 0)
+    if (m_nmsRealTopk == 0)
        return;

-    int* selected_indices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr());
-
-    float* selected_outputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
-
-    int* selected_num = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr());
+    auto selectedOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr();
+    auto selectedIndicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr();
+    auto validOutputsMemPtr = getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr();

    auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
    auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();

-    if ((nms_eta >= 0) && (nms_eta < 1)) {
+    if ((m_nmsEta >= 0) && (m_nmsEta < 1)) {
        nmsWithEta(boxes, scores, boxesStrides, scoresStrides);
    } else {
        nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides);
    }

-    size_t startOffset = numFiltBox[0][0];
-    numBoxOffset[0] = 0;
-    for (size_t b = 0; b < numFiltBox.size(); b++) {
+    size_t startOffset = m_numFiltBox[0][0];
+    m_numBoxOffset[0] = 0;
+    for (size_t b = 0; b < m_numFiltBox.size(); b++) {
        size_t batchOffsetNew = 0;
-        size_t batchOffset = b * num_classes * max_output_boxes_per_class;
-        for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) {
-            size_t offset = batchOffset + c * max_output_boxes_per_class;
-            for (size_t i = 0; i < numFiltBox[b][c]; i++) {
-                filtBoxes[startOffset + i] = filtBoxes[offset + i];
+        size_t batchOffset = b * m_numClasses * m_nmsRealTopk;
+        for (size_t c = (b == 0 ? 1 : 0); c < m_numFiltBox[b].size(); c++) {
+            size_t offset = batchOffset + c * m_nmsRealTopk;
+            for (size_t i = 0; i < m_numFiltBox[b][c]; i++) {
+                m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
            }
-            startOffset += numFiltBox[b][c];
-            batchOffsetNew += numFiltBox[b][c];
+            startOffset += m_numFiltBox[b][c];
+            batchOffsetNew += m_numFiltBox[b][c];
        }
-        numBoxOffset[b] = batchOffsetNew;
+        m_numBoxOffset[b] = batchOffsetNew;
        if (b == 0)
-            numBoxOffset[b] += numFiltBox[0][0];
+            m_numBoxOffset[b] += m_numFiltBox[0][0];
    }
    // sort element before go through keep_top_k
-    parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+    parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
        return ((l.batch_index < r.batch_index) ||
                ((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) ||
                                                      ((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index))));
    });

-    if (keep_top_k > -1) {
+    if (m_keepTopK > -1) {
        startOffset = 0;
        size_t offset = 0;
-        for (size_t b = 0; b < numFiltBox.size(); b++) {
-            if (numBoxOffset[b] > keep_top_k) {
+        for (size_t b = 0; b < m_numFiltBox.size(); b++) {
+            if (m_numBoxOffset[b] > m_keepTopK) {
                if (startOffset == offset) {
-                    startOffset += keep_top_k;
-                    offset += numBoxOffset[b];
+                    startOffset += m_keepTopK;
+                    offset += m_numBoxOffset[b];
                } else {
-                    for (size_t i = 0; i < keep_top_k; i++) {
-                        filtBoxes[startOffset + i] = filtBoxes[offset + i];
+                    for (size_t i = 0; i < m_keepTopK; i++) {
+                        m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
                    }
-                    startOffset += keep_top_k;
-                    offset += numBoxOffset[b];
+                    startOffset += m_keepTopK;
+                    offset += m_numBoxOffset[b];
                }
            } else {
                if (startOffset == offset) {
-                    startOffset += numBoxOffset[b];
-                    offset += numBoxOffset[b];
+                    startOffset += m_numBoxOffset[b];
+                    offset += m_numBoxOffset[b];
                } else {
-                    for (size_t i = 0; i < numBoxOffset[b]; i++) {
-                        filtBoxes[startOffset + i] = filtBoxes[offset + i];
+                    for (size_t i = 0; i < m_numBoxOffset[b]; i++) {
+                        m_filtBoxes[startOffset + i] = m_filtBoxes[offset + i];
                    }
-                    startOffset += numBoxOffset[b];
-                    offset += numBoxOffset[b];
+                    startOffset += m_numBoxOffset[b];
+                    offset += m_numBoxOffset[b];
                }
            }
        }
    }

-    if (sort_result_across_batch) {
-        if (sort_result_type == MulticlassNmsSortResultType::SCORE) {
-            parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+    if (m_sortResultAcrossBatch) {
+        if (m_sortResultType == MulticlassNmsSortResultType::SCORE) {
+            parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
                return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) ||
                       (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) ||
                       (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index);
            });
-        } else if (sort_result_type == MulticlassNmsSortResultType::CLASSID) {
-            parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+        } else if (m_sortResultType == MulticlassNmsSortResultType::CLASSID) {
+            parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
                return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) ||
                       (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) ||
                       (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index);
            });
        }
-    } else if (sort_result_type == MulticlassNmsSortResultType::CLASSID) {
-        parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+    } else if (m_sortResultType == MulticlassNmsSortResultType::CLASSID) {
+        parallel_sort(m_filtBoxes.begin(), m_filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
            return ((l.batch_index < r.batch_index) ||
                    ((l.batch_index == r.batch_index) &&
                     ((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) ||
@ -236,18 +244,28 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
        });
    }

-    const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().getStaticDims()[0];
-    const size_t validOutputs = std::min(startOffset, selectedBoxesNum);
+    const size_t validOutputs = std::min(startOffset, m_maxBoxesPerBatch * dims_boxes[0]);

    std::vector<size_t> m_selected_num;
    m_selected_num.resize(dims_boxes[0]);

-    const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0];
+    const size_t selectedBoxesNum_perBatch = m_maxBoxesPerBatch;

    for (size_t idx = 0lu; idx < validOutputs; idx++) {
-        m_selected_num[filtBoxes[idx].batch_index]++;
+        m_selected_num[m_filtBoxes[idx].batch_index]++;
    }

+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    if (isDynamicNode()) {
+        size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), 0);
+        selectedOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDOUTPUTS)->cloneWithNewDims({totalBox, 6}));
+        selectedIndicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims({totalBox, 1}));
+        validOutputsMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDNUM)->cloneWithNewDims({m_numBatches}));
+    }
+    int* selected_indices = reinterpret_cast<int*>(selectedIndicesMemPtr->GetPtr());
+    float* selected_outputs = reinterpret_cast<float*>(selectedOutputsMemPtr->GetPtr());
+    int* selected_num = reinterpret_cast<int*>(validOutputsMemPtr->GetPtr());
+
    int64_t output_offset = 0;
    int64_t original_offset = 0;
    for (size_t i = 0; i < dims_boxes[0]; i++) {
@ -256,19 +274,25 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {

        for (size_t j = 0; j < real_boxes; j++) {
            auto original_index = original_offset + j;
-            selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index;
+            selected_indices[j + output_offset] = m_filtBoxes[original_index].batch_index * dims_boxes[1] + m_filtBoxes[original_index].box_index;
            auto selected_base = selected_outputs + (output_offset + j) * 6;
-            selected_base[0] = filtBoxes[original_index].class_index;
-            selected_base[1] = filtBoxes[original_index].score;
+            selected_base[0] = m_filtBoxes[original_index].class_index;
+            selected_base[1] = m_filtBoxes[original_index].score;
            selected_base[2] = boxes[selected_indices[j + output_offset] * 4];
            selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1];
            selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2];
            selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3];
        }
-        std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1);
-        std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
-        output_offset += selectedBoxesNum_perBatch;
-        original_offset += real_boxes;
+        // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+        if (!isDynamicNode()) {
+            std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1);
+            std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
+            output_offset += selectedBoxesNum_perBatch;
+            original_offset += real_boxes;
+        } else {
+            output_offset += real_boxes;
+            original_offset += real_boxes;
+        }
    }
 }

@ -309,21 +333,21 @@ void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores
        return iou <= adaptive_threshold ? 1.0f : 0.0f;
    };

-    parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
-        if (class_idx != background_class) {
+    parallel_for2d(m_numBatches, m_numClasses, [&](int batch_idx, int class_idx) {
+        if (class_idx != m_backgroundClass) {
            std::vector<filteredBoxes> fb;
            const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
            const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];

            std::priority_queue<boxInfo, std::vector<boxInfo>, decltype(less)> sorted_boxes(less);
-            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
-                if (scoresPtr[box_idx] >= score_threshold)  // algin with ref
+            for (int box_idx = 0; box_idx < m_numBoxes; box_idx++) {
+                if (scoresPtr[box_idx] >= m_scoreThreshold)  // algin with ref
                    sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0}));
            }
            fb.reserve(sorted_boxes.size());
            if (sorted_boxes.size() > 0) {
-                auto adaptive_threshold = iou_threshold;
-                int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
+                auto adaptive_threshold = m_iouThreshold;
+                int max_out_box = (m_nmsRealTopk > sorted_boxes.size()) ? sorted_boxes.size() : m_nmsRealTopk;
                while (max_out_box && !sorted_boxes.empty()) {
                    boxInfo currBox = sorted_boxes.top();
                    float origScore = currBox.score;
@ -332,49 +356,49 @@ void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores

                    bool box_is_selected = true;
                    for (int idx = static_cast<int>(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) {
-                        float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized);
+                        float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], m_normalized);
                        currBox.score *= func(iou, adaptive_threshold);
                        if (iou >= adaptive_threshold) {
                            box_is_selected = false;
                            break;
                        }
-                        if (currBox.score <= score_threshold)
+                        if (currBox.score <= m_scoreThreshold)
                            break;
                    }

                    currBox.suppress_begin_index = fb.size();
                    if (box_is_selected) {
-                        if (nms_eta < 1 && adaptive_threshold > 0.5) {
-                            adaptive_threshold *= nms_eta;
+                        if (m_nmsEta < 1 && adaptive_threshold > 0.5) {
+                            adaptive_threshold *= m_nmsEta;
                        }
                        if (currBox.score == origScore) {
                            fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx});
                            continue;
                        }
-                        if (currBox.score > score_threshold) {
+                        if (currBox.score > m_scoreThreshold) {
                            sorted_boxes.push(currBox);
                        }
                    }
                }
            }
-            numFiltBox[batch_idx][class_idx] = fb.size();
-            size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
+            m_numFiltBox[batch_idx][class_idx] = fb.size();
+            size_t offset = batch_idx * m_numClasses * m_nmsRealTopk + class_idx * m_nmsRealTopk;
            for (size_t i = 0; i < fb.size(); i++) {
-                filtBoxes[offset + i] = fb[i];
+                m_filtBoxes[offset + i] = fb[i];
            }
        }
    });
 }

 void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) {
-    parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
-        if (class_idx != background_class) {
+    parallel_for2d(m_numBatches, m_numClasses, [&](int batch_idx, int class_idx) {
+        if (class_idx != m_backgroundClass) {
            const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
            const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];

            std::vector<std::pair<float, int>> sorted_boxes;
-            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
-                if (scoresPtr[box_idx] >= score_threshold)  // algin with ref
+            for (int box_idx = 0; box_idx < m_numBoxes; box_idx++) {
+                if (scoresPtr[box_idx] >= m_scoreThreshold)  // algin with ref
                    sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx));
            }

@ -383,35 +407,36 @@ void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* sco
                parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair<float, int>& l, const std::pair<float, int>& r) {
                    return (l.first > r.first || ((l.first == r.first) && (l.second < r.second)));
                });
-                int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
-                filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
+                int offset = batch_idx * m_numClasses * m_nmsRealTopk + class_idx * m_nmsRealTopk;
+                m_filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
                io_selection_size++;
-                int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
+                int max_out_box = (m_nmsRealTopk > sorted_boxes.size()) ? sorted_boxes.size() : m_nmsRealTopk;
                for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) {
                    bool box_is_selected = true;
                    for (int idx = io_selection_size - 1; idx >= 0; idx--) {
-                        float iou =
-                            intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized);
-                        if (iou >= iou_threshold) {
+                        float iou = intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4],
+                            &boxesPtr[m_filtBoxes[offset + idx].box_index * 4], m_normalized);
+                        if (iou >= m_iouThreshold) {
                            box_is_selected = false;
                            break;
                        }
                    }

                    if (box_is_selected) {
-                        filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second);
+                        m_filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx,
+                            sorted_boxes[box_idx].second);
                        io_selection_size++;
                    }
                }
            }
-            numFiltBox[batch_idx][class_idx] = io_selection_size;
+            m_numFiltBox[batch_idx][class_idx] = io_selection_size;
        }
    });
 }

 void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
    if (std::find(precList.begin(), precList.end(), prec) == precList.end())
-        IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
+        IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
 }

 REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp
@ -23,12 +23,17 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+
+    bool needShapeInfer() const override { return false; }
+    void prepareParams() override;
+
 private:
    // input (port Num)
    const size_t NMS_BOXES = 0;
@ -39,27 +44,29 @@ private:
    const size_t NMS_SELECTEDINDICES = 1;
    const size_t NMS_SELECTEDNUM = 2;

-    bool sort_result_across_batch = false;
-    MulticlassNmsSortResultType sort_result_type = MulticlassNmsSortResultType::NONE;
+    bool m_sortResultAcrossBatch = false;
+    MulticlassNmsSortResultType m_sortResultType = MulticlassNmsSortResultType::NONE;

-    size_t num_batches = 0;
-    size_t num_boxes = 0;
-    size_t num_classes = 0;
+    size_t m_numBatches = 0;
+    size_t m_numBoxes = 0;
+    size_t m_numClasses = 0;
+    size_t m_maxBoxesPerBatch = 0;

-    int max_output_boxes_per_class = 0;
-    float iou_threshold = 0.0f;
-    float score_threshold = 0.0f;
+    int m_nmsRealTopk = 0;
+    int m_nmsTopK = 0;
+    float m_iouThreshold = 0.0f;
+    float m_scoreThreshold = 0.0f;

-    int32_t background_class = 0;
-    int32_t keep_top_k = 0;
-    float nms_eta = 0.0f;
-    bool normalized = true;
+    int32_t m_backgroundClass = 0;
+    int32_t m_keepTopK = 0;
+    float m_nmsEta = 0.0f;
+    bool m_normalized = true;

-    std::string errorPrefix;
+    std::string m_errorPrefix;

-    std::vector<std::vector<size_t>> numFiltBox;
-    std::vector<size_t> numBoxOffset;
-    const std::string inType = "input", outType = "output";
+    std::vector<std::vector<size_t>> m_numFiltBox;
+    std::vector<size_t> m_numBoxOffset;
+    const std::string m_inType = "input", m_outType = "output";

    struct filteredBoxes {
        float score;
@ -77,7 +84,7 @@ private:
        int suppress_begin_index;
    };

-    std::vector<filteredBoxes> filtBoxes;
+    std::vector<filteredBoxes> m_filtBoxes;

    void checkPrecision(const InferenceEngine::Precision prec, const std::vector<InferenceEngine::Precision> precList, const std::string name,
                        const std::string type);
--- a/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp
+++ b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp
@ -64,8 +64,8 @@ void NmsStaticShapeIE<BaseNmsOp>::validate_and_infer_types() {
        if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) {
            const auto num_boxes = num_boxes_boxes.get_length();
            auto num_classes = scores_ps[1].get_length();
-            if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) {
-                num_classes = num_classes - 1;
+            if (this->m_attrs.background_class >= 0 && this->m_attrs.background_class < num_classes) {
+                num_classes = std::max(int64_t{1}, num_classes - 1);
            }
            int64_t max_output_boxes_per_class = 0;
            if (this->m_attrs.nms_top_k >= 0)
--- a/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp
@ -22,5 +22,5 @@ class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE;
 class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
-    ConvertMatrixNmsToMatrixNmsIE();
+    ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type = true);
 };
--- a/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp
@ -22,5 +22,5 @@ class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE;
 class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
-    ConvertMulticlassNmsToMulticlassNmsIE();
+    ConvertMulticlassNmsToMulticlassNmsIE(bool force_i32_output_type = true);
 };
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp
@ -18,13 +18,18 @@

 NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0);

-ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() {
+ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type) {
    MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE);
    auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MatrixNms>();

-    ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
        auto nms = std::dynamic_pointer_cast<ngraph::opset8::MatrixNms>(m.get_match_root());
-        if (!nms) {
+        if (!nms || transformation_callback(nms)) {
+            return false;
+        }
+
+        // if input shape is dynamic force the output shape must be dynamic too
+        if (nms->get_input_partial_shape(0).is_dynamic() || nms->get_input_partial_shape(1).is_dynamic()) {
            return false;
        }

@ -32,7 +37,7 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() {
        // vector of new nGraph operations
        NodeVector new_ops;
        auto attrs = nms->get_attrs();
-        attrs.output_type = element::i32;
+        attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type();
        auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>>(
                new_args.at(0),
                new_args.at(1),
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp
@ -18,13 +18,18 @@

 NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0);

-ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() {
+ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE(bool force_i32_output_type) {
    MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE);
    auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MulticlassNms>();

-    ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
        auto nms = std::dynamic_pointer_cast<ngraph::opset8::MulticlassNms>(m.get_match_root());
-        if (!nms) {
+        if (!nms || transformation_callback(nms)) {
+            return false;
+        }
+
+        // if input shape is dynamic force the output shape must be dynamic too
+        if (nms->get_input_partial_shape(0).is_dynamic() || nms->get_input_partial_shape(1).is_dynamic()) {
            return false;
        }

@ -32,7 +37,7 @@ ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulti
        // vector of new nGraph operations
        NodeVector new_ops;
        auto attrs = nms->get_attrs();
-        attrs.output_type = element::i32;
+        attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type();

        auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MulticlassNms>>(
                new_args.at(0),
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp
@ -7,22 +7,31 @@
 #include "shared_test_classes/single_layer/matrix_nms.hpp"

 using namespace ngraph;
-using namespace LayerTestsDefinitions;
+using namespace ov::test::subgraph;

 namespace {
    TEST_P(MatrixNmsLayerTest, Serialize) {
-        Serialize();
+        serialize();
    }

-    const std::vector<InferenceEngine::Precision> netPrecisions = {
-            InferenceEngine::Precision::FP32,
-            InferenceEngine::Precision::FP16
+    const std::vector<ov::test::ElementType> netPrecisions = {
+            ov::element::f32,
+            ov::element::f16
    };

-    const std::vector<InputShapeParams> inShapeParams = {
-        InputShapeParams{3, 100, 5},
-        InputShapeParams{1, 10, 50},
-        InputShapeParams{2, 50, 50}
+    const std::vector<std::vector<ov::test::InputShape>> shapeParams = {
+        // num_batches, num_boxes, 4
+        {{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
+            {{1, 10, 4}, {2, 100, 4}}},
+        // num_batches, num_classes, num_boxes
+        {{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
+            {{1, 3, 10}, {2, 5, 100}}}},
+        // num_batches, num_boxes, 4
+        {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
+            {{1, 10, 4}, {2, 100, 4}}},
+        // num_batches, num_classes, num_boxes
+        {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
+            {{1, 3, 10}, {2, 5, 100}}}}
    };

    const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
@ -43,10 +52,10 @@ namespace {
    const std::vector<bool> normalized = {true, false};
    const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
                                                    op::v8::MatrixNms::DecayFunction::LINEAR};
-    const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
-                                          ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
-                                                             ::testing::Values(InferenceEngine::Precision::I32),
-                                                             ::testing::Values(InferenceEngine::Precision::FP32)),
+    const auto nmsParams = ::testing::Combine(::testing::ValuesIn(shapeParams),
+                                          ::testing::Combine(::testing::Values(ov::element::f32),
+                                                             ::testing::Values(ov::element::i32),
+                                                             ::testing::Values(ov::element::f32)),
                                          ::testing::ValuesIn(sortResultType),
                                          ::testing::ValuesIn(outType),
                                          ::testing::ValuesIn(topKParams),
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp
@ -7,19 +7,25 @@
 #include "shared_test_classes/single_layer/multiclass_nms.hpp"

 using namespace ngraph;
-using namespace LayerTestsDefinitions;
+using namespace ov::test::subgraph;

 namespace {
 TEST_P(MulticlassNmsLayerTest, Serialize) {
-    Serialize();
+    serialize();
 }

-const std::vector<InferenceEngine::Precision> netPrecisions = {
-    InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16};
-
-const std::vector<InputShapeParams> inShapeParams = {
-    InputShapeParams{3, 100, 5}, InputShapeParams{1, 10, 50},
-    InputShapeParams{2, 50, 50}};
+const std::vector<std::vector<ov::test::InputShape>> shapeParams = {
+    // num_batches, num_boxes, 4
+    {{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    // num_batches, num_classes, num_boxes
+     {{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
+        {{1, 3, 10}, {2, 5, 100}}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
+        {{1, 3, 10}, {2, 5, 100}}}}
+};

 const std::vector<int32_t> nmsTopK = {-1, 20};
 const std::vector<float> iouThreshold = {0.7f};
@ -37,10 +43,10 @@ const std::vector<float> nmsEta = {0.6f, 1.0f};
 const std::vector<bool> normalized = {true, false};

 const auto nmsParams = ::testing::Combine(
-    ::testing::ValuesIn(inShapeParams),
-    ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
-                       ::testing::Values(InferenceEngine::Precision::I32),
-                       ::testing::Values(InferenceEngine::Precision::FP32)),
+    ::testing::ValuesIn(shapeParams),
+    ::testing::Combine(::testing::Values(ov::element::f32),
+                       ::testing::Values(ov::element::i32),
+                       ::testing::Values(ov::element::f32)),
    ::testing::ValuesIn(nmsTopK),
    ::testing::Combine(::testing::ValuesIn(iouThreshold),
                       ::testing::ValuesIn(scoreThreshold),
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp
@ -14,6 +14,7 @@
 #include <ngraph/opsets/opset3.hpp>
 #include <ngraph/opsets/opset4.hpp>
 #include <ngraph/opsets/opset5.hpp>
+#include <ngraph/opsets/opset8.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/utils/utils.hpp>
 #include <ngraph/pass/manager.hpp>
@ -125,6 +126,58 @@ TEST(TransformationTests, ConvertPrecision_NMS5) {
    ASSERT_FALSE(has_type<ngraph::element::Type_t::f32>(f));
 }

+TEST(TransformationTests, ConvertPrecision_MatrixNms) {
+    std::shared_ptr<ngraph::Function> f;
+    {
+        auto boxes = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1000, 4});
+        auto scores = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1, 1000});
+        op::v8::MatrixNms::Attributes attrs;
+        attrs.output_type = ngraph::element::i64;
+        auto nms = std::make_shared<ngraph::opset8::MatrixNms>(boxes, scores, attrs);
+
+        auto result1 = std::make_shared<ngraph::opset8::Result>(nms->output(0));
+        auto result2 = std::make_shared<ngraph::opset8::Result>(nms->output(1));
+        auto result3 = std::make_shared<ngraph::opset8::Result>(nms->output(2));
+        f = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3}, ngraph::ParameterVector{boxes, scores});
+    }
+
+    pass::Manager manager;
+    static const precisions_array precisions = {
+            { ngraph::element::i64, ngraph::element::i32 },
+            { ngraph::element::f16, ngraph::element::f32 }
+    };
+    manager.register_pass<ngraph::pass::ConvertPrecision>(precisions);
+    manager.run_passes(f);
+    ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(f));
+    ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
+}
+
+TEST(TransformationTests, ConvertPrecision_MulticlassNms) {
+    std::shared_ptr<ngraph::Function> f;
+    {
+        auto boxes = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1000, 4});
+        auto scores = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f16, ngraph::Shape{1, 1, 1000});
+        op::v8::MulticlassNms::Attributes attrs;
+        attrs.output_type = ngraph::element::i64;
+        auto nms = std::make_shared<ngraph::opset8::MulticlassNms>(boxes, scores, attrs);
+
+        auto result1 = std::make_shared<ngraph::opset8::Result>(nms->output(0));
+        auto result2 = std::make_shared<ngraph::opset8::Result>(nms->output(1));
+        auto result3 = std::make_shared<ngraph::opset8::Result>(nms->output(2));
+        f = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3}, ngraph::ParameterVector{boxes, scores});
+    }
+
+    pass::Manager manager;
+    static const precisions_array precisions = {
+            { ngraph::element::i64, ngraph::element::i32 },
+            { ngraph::element::f16, ngraph::element::f32 }
+    };
+    manager.register_pass<ngraph::pass::ConvertPrecision>(precisions);
+    manager.run_passes(f);
+    ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(f));
+    ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
+}
+
 TEST(TransformationTests, ConvertPrecision_ShapeOf) {
    std::shared_ptr<Function> f(nullptr);
    {
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
@ -8,14 +8,25 @@
 #include "single_layer_tests/matrix_nms.hpp"
 #include "common_test_utils/test_constants.hpp"

-using namespace LayerTestsDefinitions;
+using namespace ov::test::subgraph;
 using namespace InferenceEngine;
 using namespace ngraph;
+const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {
+    {{3, 100, 4}, {3,   1, 100}},
+    {{1, 10,  4}, {1, 100, 10 }}
+};

-const std::vector<InputShapeParams> inShapeParams = {
-    InputShapeParams{3, 100, 5},
-    InputShapeParams{1, 10, 50},
-    InputShapeParams{2, 50, 50}
+const std::vector<std::vector<ov::test::InputShape>> inDynamicShapeParams = {
+    // num_batches, num_boxes, 4
+    {{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    // num_batches, num_classes, num_boxes
+     {{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
+        {{1, 3, 10}, {2, 5, 100}}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
+        {{1, 3, 10}, {2, 5, 100}}}}
 };

 const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
@ -32,23 +43,38 @@ const std::vector<ThresholdParams> thresholdParams = {
 };
 const std::vector<int> nmsTopK = {-1, 100};
 const std::vector<int> keepTopK = {-1, 5};
-const std::vector<int> backgroudClass = {-1, 0};
+const std::vector<int> backgroudClass = {-1, 1};
 const std::vector<bool> normalized = {true, false};
 const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
                                                op::v8::MatrixNms::DecayFunction::LINEAR};

-const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
-                                          ::testing::Combine(::testing::Values(Precision::FP32),
-                                                             ::testing::Values(Precision::I32),
-                                                             ::testing::Values(Precision::FP32)),
-                                          ::testing::ValuesIn(sortResultType),
-                                          ::testing::ValuesIn(outType),
-                                          ::testing::ValuesIn(topKParams),
-                                          ::testing::ValuesIn(thresholdParams),
-                                          ::testing::ValuesIn(backgroudClass),
-                                          ::testing::ValuesIn(normalized),
-                                          ::testing::ValuesIn(decayFunction),
-                                          ::testing::Values(CommonTestUtils::DEVICE_CPU)
+const auto nmsParamsStatic = ::testing::Combine(::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
+                                                ::testing::Combine(::testing::Values(ov::element::f32),
+                                                                   ::testing::Values(ov::element::i32),
+                                                                   ::testing::Values(ov::element::f32)),
+                                                ::testing::ValuesIn(sortResultType),
+                                                ::testing::ValuesIn(outType),
+                                                ::testing::ValuesIn(topKParams),
+                                                ::testing::ValuesIn(thresholdParams),
+                                                ::testing::ValuesIn(backgroudClass),
+                                                ::testing::ValuesIn(normalized),
+                                                ::testing::ValuesIn(decayFunction),
+                                                ::testing::Values(CommonTestUtils::DEVICE_CPU)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName);
+const auto nmsParamsDynamic = ::testing::Combine(::testing::ValuesIn(inDynamicShapeParams),
+                                                 ::testing::Combine(::testing::Values(ov::element::f32),
+                                                                    ::testing::Values(ov::element::i32),
+                                                                    ::testing::Values(ov::element::f32)),
+                                                 ::testing::ValuesIn(sortResultType),
+                                                 ::testing::ValuesIn(outType),
+                                                 ::testing::ValuesIn(topKParams),
+                                                 ::testing::ValuesIn(thresholdParams),
+                                                 ::testing::ValuesIn(backgroudClass),
+                                                 ::testing::ValuesIn(normalized),
+                                                 ::testing::ValuesIn(decayFunction),
+                                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_static, MatrixNmsLayerTest, nmsParamsStatic, MatrixNmsLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_MatrixNmsLayerTest_dynamic, MatrixNmsLayerTest, nmsParamsDynamic, MatrixNmsLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
@ -8,16 +8,32 @@

 #include "common_test_utils/test_constants.hpp"

-using namespace LayerTestsDefinitions;
+using namespace ov::test::subgraph;
 using namespace InferenceEngine;
 using namespace ngraph;

-const std::vector<InputShapeParams> inShapeParams = {InputShapeParams {3, 100, 5}, InputShapeParams {1, 10, 50}, InputShapeParams {2, 50, 50}};
+const std::vector<std::vector<ov::Shape>> inStaticShapeParams = {
+    {{3, 100, 4}, {3,   1, 100}},
+    {{1, 10,  4}, {1, 100, 10 }}
+};
+
+const std::vector<std::vector<ov::test::InputShape>> inDynamicShapeParams = {
+    // num_batches, num_boxes, 4
+    {{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    // num_batches, num_classes, num_boxes
+     {{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
+        {{1, 3, 10}, {2, 5, 100}}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), 4},
+        {{1, 10, 4}, {2, 100, 4}}},
+    {{{ngraph::Dimension(1, 10), ngraph::Dimension(1, 100), ngraph::Dimension(1, 100)}},
+        {{1, 3, 10}, {2, 5, 100}}}}
+};

 const std::vector<int32_t> nmsTopK = {-1, 20};
 const std::vector<float> iouThreshold = {0.7f};
 const std::vector<float> scoreThreshold = {0.7f};
-const std::vector<int32_t> backgroundClass = {-1, 0};
+const std::vector<int32_t> backgroundClass = {-1, 1};
 const std::vector<int32_t> keepTopK = {-1, 30};
 const std::vector<element::Type> outType = {element::i32, element::i64};

@ -27,11 +43,29 @@ const std::vector<bool> sortResDesc = {true, false};
 const std::vector<float> nmsEta = {0.6f, 1.0f};
 const std::vector<bool> normalized = {true, false};

-const auto nmsParams = ::testing::Combine(
-    ::testing::ValuesIn(inShapeParams),
-    ::testing::Combine(::testing::Values(Precision::FP32), ::testing::Values(Precision::I32), ::testing::Values(Precision::FP32)), ::testing::ValuesIn(nmsTopK),
+const auto nmsParamsStatic = ::testing::Combine(
+    ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inStaticShapeParams)),
+    ::testing::Combine(::testing::Values(ov::element::f32), ::testing::Values(ov::element::i32), ::testing::Values(ov::element::f32)),
+    ::testing::ValuesIn(nmsTopK),
    ::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)),
-    ::testing::ValuesIn(backgroundClass), ::testing::ValuesIn(keepTopK), ::testing::ValuesIn(outType), ::testing::ValuesIn(sortResultType),
-    ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)), ::testing::Values(CommonTestUtils::DEVICE_CPU));
+    ::testing::ValuesIn(backgroundClass),
+    ::testing::ValuesIn(keepTopK),
+    ::testing::ValuesIn(outType),
+    ::testing::ValuesIn(sortResultType),
+    ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU));

-INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest, MulticlassNmsLayerTest, nmsParams, MulticlassNmsLayerTest::getTestCaseName);
+const auto nmsParamsDynamic = ::testing::Combine(
+    ::testing::ValuesIn(inDynamicShapeParams),
+    ::testing::Combine(::testing::Values(ov::element::f32), ::testing::Values(ov::element::i32), ::testing::Values(ov::element::f32)),
+    ::testing::ValuesIn(nmsTopK),
+    ::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)),
+    ::testing::ValuesIn(backgroundClass),
+    ::testing::ValuesIn(keepTopK),
+    ::testing::ValuesIn(outType),
+    ::testing::ValuesIn(sortResultType),
+    ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_static, MulticlassNmsLayerTest, nmsParamsStatic, MulticlassNmsLayerTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNmsLayerTest_dynamic, MulticlassNmsLayerTest, nmsParamsDynamic, MulticlassNmsLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp
@ -6,10 +6,14 @@

 #include "shared_test_classes/single_layer/matrix_nms.hpp"

-namespace LayerTestsDefinitions {
+namespace ov {
+namespace test {
+namespace subgraph {

 TEST_P(MatrixNmsLayerTest, CompareWithRefs) {
-    Run();
+    run();
 };

-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp
@ -6,10 +6,14 @@

 #include "shared_test_classes/single_layer/multiclass_nms.hpp"

-namespace LayerTestsDefinitions {
+namespace ov {
+namespace test {
+namespace subgraph {

 TEST_P(MulticlassNmsLayerTest, CompareWithRefs) {
-    Run();
+    run();
 };

-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
@ -7,18 +7,18 @@
 #include <tuple>
 #include <string>

-#include "shared_test_classes/base/layer_test_utils.hpp"
-#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"

-namespace LayerTestsDefinitions {
+namespace ov {
+namespace test {
+namespace subgraph {

-using InputShapeParams = std::tuple<size_t,  // Number of batches
-                                    size_t,  // Number of boxes
-                                    size_t>; // Number of classes
-
-using InputPrecisions = std::tuple<InferenceEngine::Precision,  // boxes and scores precisions
-                                   InferenceEngine::Precision,  // max_output_boxes_per_class precision
-                                   InferenceEngine::Precision>; // iou_threshold, score_threshold, soft_nms_sigma precisions
+using InputPrecisions = std::tuple<ElementType,   // boxes and scores precisions
+                                   ElementType,   // max_output_boxes_per_class
+                                                  // precision
+                                   ElementType>;  // iou_threshold, score_threshold,

 using TopKParams = std::tuple<int,      // Maximum number of boxes to be selected per class
                              int>;     // Maximum number of boxes to be selected per batch element
@ -27,7 +27,7 @@ using ThresholdParams = std::tuple<float,   // minimum score to consider box for
                                   float,   // gaussian_sigma parameter for gaussian decay_function
                                   float>;  // filter out boxes with low confidence score after decaying

-using NmsParams = std::tuple<InputShapeParams,                                   // Params using to create 1st and 2nd inputs
+using NmsParams = std::tuple<std::vector<InputShape>,                            // Params using to create 1st and 2nd inputs
                             InputPrecisions,                                    // Input precisions
                             ngraph::op::v8::MatrixNms::SortResultType,          // Order of output elements
                             ngraph::element::Type,                              // Output type
@ -38,21 +38,22 @@ using NmsParams = std::tuple<InputShapeParams,
                             ngraph::op::v8::MatrixNms::DecayFunction,           // Decay function
                             std::string>;                                       // Device name

-class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>,
+                           virtual public SubgraphBaseTest {
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<NmsParams>& obj);
-    void GenerateInputs() override;
-    void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
-                 const std::vector<InferenceEngine::Blob::Ptr> &actualOutputs)
-    override;
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void compare(const std::vector<ov::runtime::Tensor> &expected, const std::vector<ov::runtime::Tensor> &actual) override;

 protected:
    void SetUp() override;

 private:
-    size_t numBatches, numBoxes, numClasses;
-    size_t maxOutputBoxesPerClass;
-    size_t maxOutputBoxesPerBatch;
+    void GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch);
+    ngraph::op::v8::MatrixNms::Attributes m_attrs;
+    bool m_outStaticShape;
 };

-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
@ -7,20 +7,19 @@
 #include <string>
 #include <tuple>

-#include "ngraph_functions/builders.hpp"
-#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"

-namespace LayerTestsDefinitions {
+namespace ov {
+namespace test {
+namespace subgraph {

-using InputShapeParams = std::tuple<size_t,   // Number of batches
-                                    size_t,   // Number of boxes
-                                    size_t>;  // Number of classes
-
-using InputPrecisions = std::tuple<InferenceEngine::Precision,   // boxes and scores precisions
-                                   InferenceEngine::Precision,   // max_output_boxes_per_class
-                                                                 // precision
-                                   InferenceEngine::Precision>;  // iou_threshold, score_threshold,
-                                                                 // soft_nms_sigma precisions
+using InputPrecisions = std::tuple<ElementType,   // boxes and scores precisions
+                                   ElementType,   // max_output_boxes_per_class
+                                                  // precision
+                                   ElementType>;  // iou_threshold, score_threshold,
+                                                  // soft_nms_sigma precisions

 using InputfloatVar = std::tuple<float,   // iouThreshold
                                 float,   // scoreThreshold
@ -29,7 +28,7 @@ using InputfloatVar = std::tuple<float,   // iouThreshold
 using InputboolVar = std::tuple<bool,   // nmsEta
                                bool>;  // normalized

-using MulticlassNmsParams = std::tuple<InputShapeParams,                           // Params using to create 1st and 2nd inputs
+using MulticlassNmsParams = std::tuple<std::vector<InputShape>,                    // Params using to create 1st and 2nd inputs
                                       InputPrecisions,                            // Input precisions
                                       int32_t,                                    // Max output boxes per class
                                       InputfloatVar,                              // iouThreshold, scoreThreshold, nmsEta
@ -40,20 +39,21 @@ using MulticlassNmsParams = std::tuple<InputShapeParams,
                                       InputboolVar,                               // Sort result across batch, normalized
                                       std::string>;

-class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>,
+                               virtual public SubgraphBaseTest {
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<MulticlassNmsParams>& obj);
-    void GenerateInputs() override;
-    void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
-                 const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override;
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override;
+    void compare(const std::vector<ov::runtime::Tensor> &expected, const std::vector<ov::runtime::Tensor> &actual) override;

 protected:
    void SetUp() override;

 private:
-    size_t numBatches, numBoxes, numClasses;
-    size_t maxOutputBoxesPerClass;
-    size_t maxOutputBoxesPerBatch;
+    void GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch);
+    ngraph::op::v8::MulticlassNms::Attributes m_attrs;
+    bool m_outStaticShape;
 };
-
-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
@ -2,16 +2,22 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#include "ngraph_functions/builders.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
 #include "shared_test_classes/single_layer/matrix_nms.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"

-namespace LayerTestsDefinitions {
+#include "functional_test_utils/plugin_cache.hpp"
+
+namespace ov {
+namespace test {
+namespace subgraph {

 using namespace ngraph;
 using namespace InferenceEngine;
-using namespace FuncTestUtils::PrecisionUtils;

 std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<NmsParams>& obj) {
-    InputShapeParams inShapeParams;
+    std::vector<InputShape> shapes;
    InputPrecisions inPrecisions;
    op::v8::MatrixNms::SortResultType sortResultType;
    element::Type outType;
@ -21,13 +27,10 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
    ThresholdParams thresholdParams;
    bool normalized;
    std::string targetDevice;
-    std::tie(inShapeParams, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
+    std::tie(shapes, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
        backgroudClass, normalized, decayFunction, targetDevice) = obj.param;

-    size_t numBatches, numBoxes, numClasses;
-    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
-
-    Precision paramsPrec, maxBoxPrec, thrPrec;
+    ElementType paramsPrec, maxBoxPrec, thrPrec;
    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;

    int nmsTopK, keepTopK;
@ -37,8 +40,18 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
    std::tie(score_threshold, gaussian_sigma, post_threshold) = thresholdParams;

    std::ostringstream result;
-    result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
-    result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
+    result << "IS=(";
+    for (const auto& shape : shapes) {
+        result << CommonTestUtils::partialShape2str({shape.first}) << "_";
+    }
+    result << ")_TS=(";
+    for (const auto& shape : shapes) {
+        for (const auto& item : shape.second) {
+            result << CommonTestUtils::vec2str(item) << "_";
+        }
+    }
+
+    result << ")_paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
    result << "sortResultType=" << sortResultType << "_normalized=" << normalized << "_";
    result << "outType=" << outType << "_nmsTopK=" << nmsTopK << "_keepTopK=" << keepTopK << "_";
    result << "backgroudClass=" << backgroudClass << "_decayFunction=" << decayFunction << "_";
@ -47,38 +60,86 @@ std::string MatrixNmsLayerTest::getTestCaseName(const testing::TestParamInfo<Nms
    return result.str();
 }

-void MatrixNmsLayerTest::GenerateInputs() {
-    size_t it = 0;
-    for (const auto &input : cnnNetwork.getInputsInfo()) {
-        const auto &info = input.second;
-        Blob::Ptr blob;
+void MatrixNmsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
+    inputs.clear();

-        if (it == 1) {
-            blob = make_blob_with_precision(info->getTensorDesc());
-            blob->allocate();
-            CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 100000);
+    const auto& funcInputs = function->inputs();
+    for (int i = 0; i < funcInputs.size(); ++i) {
+        const auto& funcInput = funcInputs[i];
+        ov::runtime::Tensor tensor;
+
+        if (i == 1) {
+            tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+
+            const size_t range = 1;
+            const size_t startFrom = 0;
+            const size_t k = 1000;
+            const int seed = 1;
+            std::default_random_engine random(seed);
+            std::uniform_int_distribution<int32_t> distribution(k * startFrom, k * (startFrom + range));
+
+            auto *dataPtr = tensor.data<float>();
+            for (size_t i = 0; i < tensor.get_size(); i++) {
+                auto value = static_cast<float>(distribution(random));
+                dataPtr[i] = value / static_cast<float>(k);
+            }
        } else {
-            blob = GenerateInput(*info);
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
        }
-        inputs.push_back(blob);
-        it++;
+
+        inputs.insert({funcInput.get_node_shared_ptr(), tensor});
    }
 }

-void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
-                                     const std::vector<Blob::Ptr> &actualOutputs) {
+void MatrixNmsLayerTest::GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch) {
+    size_t it = 0;
+    size_t numBoxes = 0, numClasses = 0;
+    const auto& funcInputs = function->inputs();
+    for (int i = 0; i < funcInputs.size(); ++i) {
+        const auto& funcInput = funcInputs[i];
+        const auto& dims = inputs[funcInput.get_node_shared_ptr()].get_shape();
+
+        if (it == 1) {
+            numClasses = dims[1];
+        } else {
+            numBatches = dims[0];
+            numBoxes = dims[1];
+        }
+        it++;
+    }
+
+    ASSERT_TRUE(numBatches > 0 && numBoxes > 0 && numClasses > 0)
+        << "Expected numBatches, numBoxes, numClasses > 0, got:" << numBatches << ", " << numBoxes << ", " << numClasses;
+
+    auto realClasses = numClasses;
+    if (m_attrs.background_class >= 0 && m_attrs.background_class < numClasses) {
+       realClasses = realClasses - 1;
+    }
+
+    size_t maxOutputBoxesPerClass = 0;
+    if (m_attrs.nms_top_k >= 0)
+       maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(m_attrs.nms_top_k));
+    else
+       maxOutputBoxesPerClass = numBoxes;
+
+    maxOutputBoxesPerBatch  = maxOutputBoxesPerClass * realClasses;
+    if (m_attrs.keep_top_k >= 0)
+       maxOutputBoxesPerBatch =
+               std::min(maxOutputBoxesPerBatch, static_cast<size_t>(m_attrs.keep_top_k));
+}
+
+void MatrixNmsLayerTest::compare(const std::vector<ov::runtime::Tensor> &expectedOutputs,
+                                 const std::vector<ov::runtime::Tensor> &actualOutputs) {
    auto batchIndex = -1;
+    size_t numBatches, maxOutputBoxesPerBatch;
+    GetOutputParams(numBatches, maxOutputBoxesPerBatch);
    std::vector<int32_t> numPerBatch(numBatches);
    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
        const auto& actual = actualOutputs[outputIndex];
-        const auto _dims = actual->getTensorDesc().getDims();
+        const auto _dims = actual.get_shape();
        if (_dims.size() == 1 && _dims[0] == numBatches) {
            batchIndex = outputIndex;
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
-            auto buffer = reinterpret_cast<const int32_t *>(actualBuffer);
+            auto buffer = reinterpret_cast<const int32_t*>(actual.data());
            std::copy_n(buffer, numBatches, numPerBatch.begin());
        }
    }
@ -86,39 +147,30 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
        const auto& expected = expectedOutputs[outputIndex];
        const auto& actual = actualOutputs[outputIndex];
+        const auto actualBuffer = static_cast<uint8_t*>(actual.data());
+        const auto expectedBuffer = static_cast<uint8_t*>(expected.data());

        //Compare Selected Outputs & Selected Indices
        if (outputIndex != batchIndex) {
-            const auto &expectedBuffer = expected.second.data();
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
-
-            auto k =  static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
-            // W/A for int4, uint4
-            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
-                k /= 2;
-            }
            if (outputIndex == 2) {
-                if (expected.second.size() != k * actual->byteSize())
+                if (expected.get_size() != actual.get_size())
                    throw std::runtime_error("Expected and actual size 3rd output have different size");
            }

-            const auto &precision = actual->getTensorDesc().getPrecision();
+            const auto& precision = actual.get_element_type();
            auto expected_offset = 0;
            auto actual_offset = 0;
            for (size_t i = 0; i < numPerBatch.size(); i++) {
                auto validNums = numPerBatch[i];
                switch (precision) {
-                    case InferenceEngine::Precision::FP32: {
-                        switch (expected.first) {
-                            case ngraph::element::Type_t::f32:
+                    case ov::element::f32: {
+                        switch (expected.get_element_type()) {
+                            case ov::element::f32:
                                LayerTestsUtils::LayerTestsCommon::Compare(
                                        reinterpret_cast<const float *>(expectedBuffer) + expected_offset * 6,
                                        reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
                                break;
-                            case ngraph::element::Type_t::f64:
+                            case ov::element::f64:
                                LayerTestsUtils::LayerTestsCommon::Compare(
                                        reinterpret_cast<const double *>(expectedBuffer) + expected_offset * 6,
                                        reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums *6, 1e-5f);
@ -126,22 +178,23 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
                            default:
                                break;
                        }
-
-                        const auto fBuffer = lockedMemory.as<const float *>();
-                        for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
-                            ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
-                                << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                        if (m_outStaticShape) {
+                            const auto fBuffer = static_cast<float*>(actual.data());
+                            for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
+                                ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
+                                    << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                            }
                        }
                        break;
                    }
-                    case InferenceEngine::Precision::I32: {
-                        switch (expected.first) {
-                            case ngraph::element::Type_t::i32:
+                    case ov::element::i32: {
+                        switch (expected.get_element_type()) {
+                            case ov::element::i32:
                                LayerTestsUtils::LayerTestsCommon::Compare(
                                        reinterpret_cast<const int32_t *>(expectedBuffer) + expected_offset,
                                        reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
                                break;
-                            case ngraph::element::Type_t::i64:
+                            case ov::element::i64:
                                LayerTestsUtils::LayerTestsCommon::Compare(
                                        reinterpret_cast<const int64_t *>(expectedBuffer) + expected_offset,
                                        reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
@ -149,46 +202,42 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
                            default:
                                break;
                        }
-                        const auto iBuffer = lockedMemory.as<const int *>();
-                        for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
-                            ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                        if (m_outStaticShape) {
+                            const auto iBuffer = static_cast<int*>(actual.data());
+                            for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
+                                ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                            }
                        }
                        break;
                    }
                    default:
                        FAIL() << "Comparator for " << precision << " precision isn't supported";
                }
-                expected_offset += validNums;
-                actual_offset += maxOutputBoxesPerBatch;
+                if (!m_outStaticShape) {
+                    expected_offset += validNums;
+                    actual_offset += validNums;
+                } else {
+                    expected_offset += validNums;
+                    actual_offset += maxOutputBoxesPerBatch;
+                }
            }
        } else {
-            const auto &expectedBuffer = expected.second.data();
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
-
-            auto k =  static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
-            // W/A for int4, uint4
-            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
-                k /= 2;
-            }
            if (outputIndex == 2) {
-                if (expected.second.size() != k * actual->byteSize())
+                if (expected.get_size() != actual.get_size())
                    throw std::runtime_error("Expected and actual size 3rd output have different size");
            }

-            const auto &precision = actual->getTensorDesc().getPrecision();
-            size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
+            const auto& precision = actual.get_element_type();
+            size_t size = expected.get_size();
            switch (precision) {
-                case InferenceEngine::Precision::I32: {
-                    switch (expected.first) {
-                        case ngraph::element::Type_t::i32:
+                case ov::element::i32: {
+                    switch (expected.get_element_type()) {
+                        case ov::element::i32:
                            LayerTestsUtils::LayerTestsCommon::Compare(
                                    reinterpret_cast<const int32_t *>(expectedBuffer),
                                    reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
                            break;
-                        case ngraph::element::Type_t::i64:
+                        case ov::element::i64:
                            LayerTestsUtils::LayerTestsCommon::Compare(
                                    reinterpret_cast<const int64_t *>(expectedBuffer),
                                    reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
@ -206,45 +255,48 @@ void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Ty
 }

 void MatrixNmsLayerTest::SetUp() {
-    InputShapeParams inShapeParams;
+    std::vector<InputShape> shapes;
    InputPrecisions inPrecisions;
-    op::v8::MatrixNms::Attributes attrs;
    TopKParams topKParams;
    ThresholdParams thresholdParams;

-    std::tie(inShapeParams, inPrecisions, attrs.sort_result_type, attrs.output_type, topKParams, thresholdParams,
-        attrs.background_class, attrs.normalized, attrs.decay_function, targetDevice) = this->GetParam();
+    std::tie(shapes, inPrecisions, m_attrs.sort_result_type, m_attrs.output_type, topKParams, thresholdParams,
+        m_attrs.background_class, m_attrs.normalized, m_attrs.decay_function, targetDevice) = this->GetParam();

-    std::tie(attrs.nms_top_k, attrs.keep_top_k) = topKParams;
-    std::tie(attrs.score_threshold, attrs.gaussian_sigma, attrs.post_threshold) = thresholdParams;
-    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
-    auto realClasses = numClasses;
-    if (attrs.background_class >=0 && attrs.background_class <= numClasses) {
-        realClasses = realClasses - 1;
-    }
+    std::tie(m_attrs.nms_top_k, m_attrs.keep_top_k) = topKParams;
+    std::tie(m_attrs.score_threshold, m_attrs.gaussian_sigma, m_attrs.post_threshold) = thresholdParams;

-    maxOutputBoxesPerClass = 0;
-    if (attrs.nms_top_k >= 0)
-        maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(attrs.nms_top_k));
-    else
-        maxOutputBoxesPerClass = numBoxes;
+    init_input_shapes(shapes);

-    maxOutputBoxesPerBatch  = maxOutputBoxesPerClass * realClasses;
-    if (attrs.keep_top_k >= 0)
-        maxOutputBoxesPerBatch =
-                std::min(maxOutputBoxesPerBatch, static_cast<size_t>(attrs.keep_top_k));
-    Precision paramsPrec, maxBoxPrec, thrPrec;
+    // input is dynamic shape -> output will be dynamic shape
+    // input is static shape -> output will be static shape
+    const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
+    m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
+        return shape.rank() == 0;
+    });
+
+    ElementType paramsPrec, maxBoxPrec, thrPrec;
    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
-
-    const std::vector<size_t> boxesShape{numBatches, numBoxes, 4}, scoresShape{numBatches, numClasses, numBoxes};
-    auto ngPrc = convertIE2nGraphPrc(paramsPrec);
-    auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
-    auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
-    auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], attrs);
-    auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
-    auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
-    auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
-    function = std::make_shared<Function>(OutputVector{nms_0_identity, nms_1_identity, nms_2_identity}, params, "NMS");
+    const auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
+    const auto paramOuts =
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], m_attrs);
+    if (!m_outStaticShape) {
+        auto result = std::make_shared<opset5::Result>(nms);
+        function = std::make_shared<Function>(result, params, "MatrixNMS");
+    } else {
+        auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
+        auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
+        auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(m_attrs.output_type, Shape{1}, {1}));
+        OutputVector results = {
+            std::make_shared<opset5::Result>(nms_0_identity),
+            std::make_shared<opset5::Result>(nms_1_identity),
+            std::make_shared<opset5::Result>(nms_2_identity)
+        };
+        function = std::make_shared<Function>(results, params, "MatrixNMS");
+    }
 }

-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
@ -2,16 +2,22 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#include "ngraph_functions/builders.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
 #include "shared_test_classes/single_layer/multiclass_nms.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"

-namespace LayerTestsDefinitions {
+#include "functional_test_utils/plugin_cache.hpp"
+
+namespace ov {
+namespace test {
+namespace subgraph {

 using namespace ngraph;
 using namespace InferenceEngine;
-using namespace FuncTestUtils::PrecisionUtils;

 std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo<MulticlassNmsParams>& obj) {
-    InputShapeParams inShapeParams;
+    std::vector<InputShape> shapes;
    InputPrecisions inPrecisions;
    int32_t nmsTopK, backgroundClass, keepTopK;
    element::Type outType;
@ -23,12 +29,9 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo

    std::string targetDevice;

-    std::tie(inShapeParams, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;
+    std::tie(shapes, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;

-    size_t numBatches, numBoxes, numClasses;
-    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
-
-    Precision paramsPrec, maxBoxPrec, thrPrec;
+    ElementType paramsPrec, maxBoxPrec, thrPrec;
    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;

    float iouThr, scoreThr, nmsEta;
@ -38,8 +41,18 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
    std::tie(sortResCB, normalized) = inboolVar;

    std::ostringstream result;
-    result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
-    result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
+    result << "IS=(";
+    for (const auto& shape : shapes) {
+        result << CommonTestUtils::partialShape2str({shape.first}) << "_";
+    }
+    result << ")_TS=(";
+    for (const auto& shape : shapes) {
+        for (const auto& item : shape.second) {
+            result << CommonTestUtils::vec2str(item) << "_";
+        }
+    }
+
+    result << ")_paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
    result << "nmsTopK=" << nmsTopK << "_";
    result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_backgroundClass=" << backgroundClass << "_";
    result << "keepTopK=" << keepTopK << "_outType=" << outType << "_";
@ -48,38 +61,86 @@ std::string MulticlassNmsLayerTest::getTestCaseName(const testing::TestParamInfo
    return result.str();
 }

-void MulticlassNmsLayerTest::GenerateInputs() {
-    size_t it = 0;
-    for (const auto& input : cnnNetwork.getInputsInfo()) {
-        const auto& info = input.second;
-        Blob::Ptr blob;
+void MulticlassNmsLayerTest::generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) {
+    inputs.clear();

-        if (it == 1) {
-            blob = make_blob_with_precision(info->getTensorDesc());
-            blob->allocate();
-            CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 1000);
+    const auto& funcInputs = function->inputs();
+    for (int i = 0; i < funcInputs.size(); ++i) {
+        const auto& funcInput = funcInputs[i];
+        ov::runtime::Tensor tensor;
+
+        if (i == 1) {
+            tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+
+            const size_t range = 1;
+            const size_t startFrom = 0;
+            const size_t k = 1000;
+            const int seed = 1;
+            std::default_random_engine random(seed);
+            std::uniform_int_distribution<int32_t> distribution(k * startFrom, k * (startFrom + range));
+
+            auto *dataPtr = tensor.data<float>();
+            for (size_t i = 0; i < tensor.get_size(); i++) {
+                auto value = static_cast<float>(distribution(random));
+                dataPtr[i] = value / static_cast<float>(k);
+            }
        } else {
-            blob = GenerateInput(*info);
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
        }
-        inputs.push_back(blob);
-        it++;
+
+        inputs.insert({funcInput.get_node_shared_ptr(), tensor});
    }
 }

-void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
-                                     const std::vector<Blob::Ptr>& actualOutputs) {
+void MulticlassNmsLayerTest::GetOutputParams(size_t& numBatches, size_t& maxOutputBoxesPerBatch) {
+    size_t it = 0;
+    size_t numBoxes = 0, numClasses = 0;
+    const auto& funcInputs = function->inputs();
+    for (int i = 0; i < funcInputs.size(); ++i) {
+        const auto& funcInput = funcInputs[i];
+        const auto& dims = inputs[funcInput.get_node_shared_ptr()].get_shape();
+
+        if (it == 1) {
+            numClasses = dims[1];
+        } else {
+            numBatches = dims[0];
+            numBoxes = dims[1];
+        }
+        it++;
+    }
+
+    ASSERT_TRUE(numBatches > 0 && numBoxes > 0 && numClasses > 0)
+        << "Expected numBatches, numBoxes, numClasses > 0, got:" << numBatches << ", " << numBoxes << ", " << numClasses;
+
+    auto realClasses = numClasses;
+    if (m_attrs.background_class >= 0 && m_attrs.background_class < numClasses) {
+       realClasses = realClasses - 1;
+    }
+
+    size_t maxOutputBoxesPerClass = 0;
+    if (m_attrs.nms_top_k >= 0)
+       maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(m_attrs.nms_top_k));
+    else
+       maxOutputBoxesPerClass = numBoxes;
+
+    maxOutputBoxesPerBatch  = maxOutputBoxesPerClass * realClasses;
+    if (m_attrs.keep_top_k >= 0)
+       maxOutputBoxesPerBatch =
+               std::min(maxOutputBoxesPerBatch, static_cast<size_t>(m_attrs.keep_top_k));
+}
+
+void MulticlassNmsLayerTest::compare(const std::vector<ov::runtime::Tensor> &expectedOutputs,
+                                     const std::vector<ov::runtime::Tensor> &actualOutputs) {
    auto batchIndex = -1;
+    size_t numBatches, maxOutputBoxesPerBatch;
+    GetOutputParams(numBatches, maxOutputBoxesPerBatch);
    std::vector<int32_t> numPerBatch(numBatches);
    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
        const auto& actual = actualOutputs[outputIndex];
-        const auto _dims = actual->getTensorDesc().getDims();
+        const auto _dims = actual.get_shape();
        if (_dims.size() == 1 && _dims[0] == numBatches) {
            batchIndex = outputIndex;
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
-            auto buffer = reinterpret_cast<const int32_t*>(actualBuffer);
+            auto buffer = reinterpret_cast<const int32_t*>(actual.data());
            std::copy_n(buffer, numBatches, numPerBatch.begin());
        }
    }
@ -87,39 +148,30 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
        const auto& expected = expectedOutputs[outputIndex];
        const auto& actual = actualOutputs[outputIndex];
+        const auto actualBuffer = static_cast<uint8_t*>(actual.data());
+        const auto expectedBuffer = static_cast<uint8_t*>(expected.data());

        // Compare Selected Outputs & Selected Indices
        if (outputIndex != batchIndex) {
-            const auto& expectedBuffer = expected.second.data();
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
-
-            auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
-            // W/A for int4, uint4
-            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
-                k /= 2;
-            }
            if (outputIndex == 2) {
-                if (expected.second.size() != k * actual->byteSize())
+                if (expected.get_size() != actual.get_size())
                    throw std::runtime_error("Expected and actual size 3rd output have different "
                                             "size");
            }

-            const auto& precision = actual->getTensorDesc().getPrecision();
+            const auto& precision = actual.get_element_type();
            auto expected_offset = 0;
            auto actual_offset = 0;
            for (size_t i = 0; i < numPerBatch.size(); i++) {
                auto validNums = numPerBatch[i];
                switch (precision) {
-                case InferenceEngine::Precision::FP32: {
-                    switch (expected.first) {
-                    case ngraph::element::Type_t::f32:
+                case ov::element::f32: {
+                    switch (expected.get_element_type()) {
+                    case ov::element::f32:
                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer) + expected_offset * 6,
                                                                   reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
                        break;
-                    case ngraph::element::Type_t::f64:
+                    case ov::element::f64:
                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const double*>(expectedBuffer) + expected_offset * 6,
                                                                   reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
                        break;
@ -127,66 +179,64 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
                        break;
                    }

-                    const auto fBuffer = lockedMemory.as<const float*>();
-                    for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
-                        ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
-                            << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                    if (m_outStaticShape) {
+                        const auto fBuffer = static_cast<float*>(actual.data());
+                        for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
+                            ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
+                                << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                        }
                    }
                    break;
                }
-                case InferenceEngine::Precision::I32: {
-                    switch (expected.first) {
-                    case ngraph::element::Type_t::i32:
+                case ov::element::i32: {
+                    switch (expected.get_element_type()) {
+                    case ov::element::i32:
                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer) + expected_offset,
                                                                   reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
                        break;
-                    case ngraph::element::Type_t::i64:
+                    case ov::element::i64:
                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer) + expected_offset,
                                                                   reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
                        break;
                    default:
                        break;
                    }
-                    const auto iBuffer = lockedMemory.as<const int*>();
-                    for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
-                        ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                    if (m_outStaticShape) {
+                        const auto iBuffer = static_cast<int*>(actual.data());
+                        for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
+                            ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                        }
                    }
                    break;
                }
                default:
                    FAIL() << "Comparator for " << precision << " precision isn't supported";
                }
-                expected_offset += validNums;
-                actual_offset += maxOutputBoxesPerBatch;
+                if (!m_outStaticShape) {
+                    expected_offset += validNums;
+                    actual_offset += validNums;
+                } else {
+                    expected_offset += validNums;
+                    actual_offset += maxOutputBoxesPerBatch;
+                }
            }
        } else {
-            const auto& expectedBuffer = expected.second.data();
-            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
-            IE_ASSERT(memory);
-            const auto lockedMemory = memory->wmap();
-            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
-
-            auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
-            // W/A for int4, uint4
-            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
-                k /= 2;
-            }
            if (outputIndex == 2) {
-                if (expected.second.size() != k * actual->byteSize())
+                if (expected.get_size() != actual.get_size())
                    throw std::runtime_error("Expected and actual size 3rd output have different "
                                             "size");
            }

-            const auto& precision = actual->getTensorDesc().getPrecision();
-            size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
+            const auto& precision = actual.get_element_type();
+            size_t size = expected.get_size();
            switch (precision) {
-            case InferenceEngine::Precision::I32: {
-                switch (expected.first) {
-                case ngraph::element::Type_t::i32:
+            case ov::element::i32: {
+                switch (expected.get_element_type()) {
+                case ov::element::i32:
                    LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
                                                               size, 0);
                    break;
-                case ngraph::element::Type_t::i64:
+                case ov::element::i64:
                    LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
                                                               size, 0);
                    break;
@ -203,9 +253,8 @@ void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element
 }

 void MulticlassNmsLayerTest::SetUp() {
-    InputShapeParams inShapeParams;
+    std::vector<InputShape> shapes;
    InputPrecisions inPrecisions;
-    op::v8::MulticlassNms::Attributes attrs;
    size_t maxOutBoxesPerClass, backgroundClass, keepTopK;
    element::Type outType;

@ -214,27 +263,19 @@ void MulticlassNmsLayerTest::SetUp() {
    InputfloatVar inFloatVar;
    InputboolVar inboolVar;

-    std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
+    std::tie(shapes, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
        this->GetParam();

-    // size_t numBatches, numBoxes, numClasses;
-    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
-    auto realClasses = numClasses;
-    if (backgroundClass >= 0 && backgroundClass <= numClasses) {
-        realClasses = realClasses - 1;
-    }
+    init_input_shapes(shapes);

-    maxOutputBoxesPerClass = 0;
-    if (maxOutBoxesPerClass >= 0)
-        maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(maxOutBoxesPerClass));
-    else
-        maxOutputBoxesPerClass = numBoxes;
+    // input is dynamic shape -> output will be dynamic shape
+    // input is static shape -> output will be static shape
+    const auto inputDynamicParam = {shapes[0].first, shapes[1].first};
+    m_outStaticShape = std::any_of(inputDynamicParam.begin(), inputDynamicParam.end(), [](const ov::PartialShape& shape) {
+        return shape.rank() == 0;
+    });

-    maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
-    if (keepTopK >= 0)
-        maxOutputBoxesPerBatch = std::min(maxOutputBoxesPerBatch, static_cast<size_t>(keepTopK));
-
-    Precision paramsPrec, maxBoxPrec, thrPrec;
+    ElementType paramsPrec, maxBoxPrec, thrPrec;
    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;

    float iouThr, scoreThr, nmsEta;
@ -243,28 +284,39 @@ void MulticlassNmsLayerTest::SetUp() {
    bool sortResCB, normalized;
    std::tie(sortResCB, normalized) = inboolVar;

-    const std::vector<size_t> boxesShape {numBatches, numBoxes, 4}, scoresShape {numBatches, numClasses, numBoxes};
-    auto ngPrc = convertIE2nGraphPrc(paramsPrec);
-    auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
-    auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
+    const auto params = ngraph::builder::makeDynamicParams(paramsPrec, inputDynamicShapes);
+    const auto paramOuts =
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));

-    attrs.iou_threshold = iouThr;
-    attrs.score_threshold = scoreThr;
-    attrs.nms_eta = nmsEta;
-    attrs.sort_result_type = sortResultType;
-    attrs.sort_result_across_batch = sortResCB;
-    attrs.output_type = outType;
-    attrs.nms_top_k = maxOutBoxesPerClass;
-    attrs.keep_top_k = keepTopK;
-    attrs.background_class = backgroundClass;
-    attrs.normalized = normalized;
+    m_attrs.iou_threshold = iouThr;
+    m_attrs.score_threshold = scoreThr;
+    m_attrs.nms_eta = nmsEta;
+    m_attrs.sort_result_type = sortResultType;
+    m_attrs.sort_result_across_batch = sortResCB;
+    m_attrs.output_type = outType;
+    m_attrs.nms_top_k = maxOutBoxesPerClass;
+    m_attrs.keep_top_k = keepTopK;
+    m_attrs.background_class = backgroundClass;
+    m_attrs.normalized = normalized;

-    auto nms = std::make_shared<opset8::MulticlassNms>(paramOuts[0], paramOuts[1], attrs);
+    auto nms = std::make_shared<opset8::MulticlassNms>(paramOuts[0], paramOuts[1], m_attrs);

-    auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(ngPrc, Shape {1}, {1}));
-    auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
-    auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
-    function = std::make_shared<Function>(OutputVector {nms_0_identity, nms_1_identity, nms_2_identity}, params, "MulticlassNMS");
+    if (!m_outStaticShape) {
+        auto result = std::make_shared<opset5::Result>(nms);
+        function = std::make_shared<Function>(result, params, "MulticlassNMS");
+    } else {
+        auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(paramsPrec, Shape {1}, {1}));
+        auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
+        auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
+        OutputVector results = {
+            std::make_shared<opset5::Result>(nms_0_identity),
+            std::make_shared<opset5::Result>(nms_1_identity),
+            std::make_shared<opset5::Result>(nms_2_identity)
+        };
+        function = std::make_shared<Function>(results, params, "MulticlassNMS");
+    }
 }

-}  // namespace LayerTestsDefinitions
+} // namespace subgraph
+} // namespace test
+} // namespace ov
--- a/ngraph/core/src/pass/convert_precision.cpp
+++ b/ngraph/core/src/pass/convert_precision.cpp
@ -30,6 +30,8 @@ bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ngraph::ele
 bool fuse_type_to_nms3(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
 bool fuse_type_to_nms4(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
 bool fuse_type_to_nms5(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
+bool fuse_type_to_matrix_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
+bool fuse_type_to_multiclass_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
 bool fuse_type_to_topk(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
 bool fuse_type_to_maxpool(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
 bool fuse_type_to_nonzero(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx);
@ -253,6 +255,8 @@ bool ngraph::pass::ConvertPrecision::run_on_function(std::shared_ptr<ngraph::Fun
        {opset3::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms3},
        {opset4::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms4},
        {opset5::NonMaxSuppression::get_type_info_static(), fuse_type_to_nms5},
+        {opset8::MatrixNms::get_type_info_static(), fuse_type_to_matrix_nms},
+        {opset8::MulticlassNms::get_type_info_static(), fuse_type_to_multiclass_nms},
        {opset6::CTCGreedyDecoderSeqLen::get_type_info_static(), fuse_type_to_ctc_greedy_decoder_seq_len},
        {opset4::TopK::get_type_info_static(), fuse_type_to_topk},
        {opset8::MaxPool::get_type_info_static(), fuse_type_to_maxpool},
@ -385,6 +389,34 @@ bool fuse_type_to_nms5(const std::shared_ptr<ngraph::Node>& node, ngraph::elemen
    return true;
 }

+bool fuse_type_to_matrix_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
+    auto nms = ov::as_type_ptr<opset8::MatrixNms>(node);
+    if (!nms) {
+        return false;
+    }
+
+    if ((idx == 1 || idx == 2) && (to == element::i32 || to == element::i64)) {
+        nms->set_output_type(to);
+        return true;
+    }
+
+    return false;
+}
+
+bool fuse_type_to_multiclass_nms(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
+    auto nms = ov::as_type_ptr<opset8::MulticlassNms>(node);
+    if (!nms) {
+        return false;
+    }
+
+    if ((idx == 1 || idx == 2) && (to == element::i32 || to == element::i64)) {
+        nms->set_output_type(to);
+        return true;
+    }
+
+    return false;
+}
+
 bool fuse_type_to_topk(const std::shared_ptr<ngraph::Node>& node, ngraph::element::Type to, size_t idx) {
    if (auto topk = ov::as_type_ptr<opset4::TopK>(node)) {
        if (idx == 1 && (to == element::i32 || to == element::i64)) {