[CPU] Added dynamism support for TensorIterator (#8879)

2021-12-24 15:08:42 +03:00
parent d1fd0d259e
commit 91945ba122
10 changed files with 1180 additions and 204 deletions
--- a/src/bindings/python/tests/init.py
+++ b/src/bindings/python/tests/init.py
@@ -77,7 +77,6 @@ xfail_issue_48052 = xfail_test(reason="Dropout op is not supported in traning mo
 xfail_issue_45180 = xfail_test(reason="RuntimeError: Unsupported dynamic op: ReduceSum")
 xfail_issue_44851 = xfail_test(reason="Expected: Unsupported dynamic op: Broadcast")
 xfail_issue_44858 = xfail_test(reason="Expected: Unsupported dynamic op: Unsqueeze")
-xfail_issue_44956 = xfail_test(reason="Expected: Unsupported dynamic op: Loop")
 xfail_issue_44957 = xfail_test(reason="Expected: Unsupported dynamic op: NonZero")
 xfail_issue_44958 = xfail_test(reason="Expected: Unsupported dynamic op: Interpolate")
 xfail_issue_44965 = xfail_test(reason="Expected: RuntimeError: value info has no element")
--- a/src/bindings/python/tests/test_onnx/test_backend.py
+++ b/src/bindings/python/tests/test_onnx/test_backend.py
@@ -27,7 +27,6 @@ from tests import (
    xfail_issue_39658,
    xfail_issue_39662,
    xfail_issue_44858,
-    xfail_issue_44956,
    xfail_issue_44965,
    xfail_issue_44968,
    xfail_issue_45180,
@@ -265,12 +264,6 @@ tests_expected_to_fail = [
        "OnnxBackendNodeModelTest.test_unsqueeze_two_axes_cpu",
        "OnnxBackendNodeModelTest.test_unsqueeze_unsorted_axes_cpu",
    ),
-    (
-        xfail_issue_44956,
-        "OnnxBackendNodeModelTest.test_loop11_cpu",
-        "OnnxBackendNodeModelTest.test_range_int32_type_negative_delta_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_range_float_type_positive_delta_expanded_cpu",
-    ),
    (
        xfail_issue_44965,
        "OnnxBackendNodeModelTest.test_loop13_seq_cpu",
--- a/src/bindings/python/tests_compatibility/init.py
+++ b/src/bindings/python/tests_compatibility/init.py
@@ -89,7 +89,6 @@ xfail_issue_48052 = xfail_test(reason="Dropout op is not supported in traning mo
 xfail_issue_45180 = xfail_test(reason="RuntimeError: Unsupported dynamic op: ReduceSum")
 xfail_issue_44851 = xfail_test(reason="Expected: Unsupported dynamic op: Broadcast")
 xfail_issue_44858 = xfail_test(reason="Expected: Unsupported dynamic op: Unsqueeze")
-xfail_issue_44956 = xfail_test(reason="Expected: Unsupported dynamic op: Loop")
 xfail_issue_44957 = xfail_test(reason="Expected: Unsupported dynamic op: NonZero")
 xfail_issue_44958 = xfail_test(reason="Expected: Unsupported dynamic op: Interpolate")
 xfail_issue_44965 = xfail_test(reason="Expected: RuntimeError: value info has no element")
--- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
+++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
@@ -27,7 +27,6 @@ from tests_compatibility import (
    xfail_issue_39658,
    xfail_issue_39662,
    xfail_issue_44858,
-    xfail_issue_44956,
    xfail_issue_44965,
    xfail_issue_44968,
    xfail_issue_45180,
@@ -265,12 +264,6 @@ tests_expected_to_fail = [
        "OnnxBackendNodeModelTest.test_unsqueeze_two_axes_cpu",
        "OnnxBackendNodeModelTest.test_unsqueeze_unsorted_axes_cpu",
    ),
-    (
-        xfail_issue_44956,
-        "OnnxBackendNodeModelTest.test_loop11_cpu",
-        "OnnxBackendNodeModelTest.test_range_int32_type_negative_delta_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_range_float_type_positive_delta_expanded_cpu",
-    ),
    (
        xfail_issue_44965,
        "OnnxBackendNodeModelTest.test_loop13_seq_cpu",
--- a/src/plugins/intel_cpu/src/cpu_types.cpp
+++ b/src/plugins/intel_cpu/src/cpu_types.cpp
@@ -468,6 +468,8 @@ std::string algToString(const Algorithm alg) {
    CASE(MathSoftPlus);
    CASE(MathSoftsign);
    CASE(MathTan);
+    CASE(TensorIteratorCommon);
+    CASE(TensorIteratorLoop);
 #undef CASE
    return "Undefined";
 }
--- a/src/plugins/intel_cpu/src/cpu_types.h
+++ b/src/plugins/intel_cpu/src/cpu_types.h
@@ -219,7 +219,11 @@ enum Algorithm {
    MathSinh,
    MathSoftPlus,
    MathSoftsign,
-    MathTan
+    MathTan,
+
+    // TensorIterator
+    TensorIteratorCommon,
+    TensorIteratorLoop
 };

 extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp
@@ -6,26 +6,29 @@

 #include <string>
 #include <vector>
-#include <map>
 #include <mkldnn_extension_utils.h>
 #include <ie_ngraph_utils.hpp>
 #include <utils/general_utils.h>
 #include "common/blocked_desc_creator.h"
 #include "utils/ngraph_utils.hpp"
 #include "transformations/utils/utils.hpp"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
 using namespace InferenceEngine::details;

 namespace MKLDNNPlugin {

-static NodeConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
+#define THROW_ERROR IE_THROW() << getTypeStr() << " layer with name '" << getName() << "' "
+
+static NodeConfig make_plain_config(const std::shared_ptr<ov::Node>& op) {
    NodeConfig config;

    for (size_t i = 0; i < op->get_input_size(); i++) {
        const auto &origShape = op->get_input_partial_shape(i);
-        const auto& shape = Shape(origShape.rank().get_length() == 0 ? ngraph::PartialShape{1} : origShape);
+        const auto& shape = Shape(origShape.rank().get_length() == 0 ? ov::PartialShape{1} : origShape);
        const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i));

        PortConfig data_conf {};
@@ -36,7 +39,7 @@ static NodeConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {

    for (size_t i = 0; i < op->get_output_size(); i++) {
        const auto &origShape = op->get_output_partial_shape(i);
-        const auto& shape = Shape(origShape.rank().get_length() == 0 ? ngraph::PartialShape{1} : origShape);
+        const auto& shape = Shape(origShape.rank().get_length() == 0 ? ov::PartialShape{1} : origShape);
        const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i));

        PortConfig data_conf {};
@@ -49,6 +52,31 @@ static NodeConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
    return config;
 }

+static void redefineToMemories(const std::vector<MKLDNNMemoryPtr>& to_mems, const std::shared_ptr<MemoryDesc> new_desc) {
+    const auto &currDesc = to_mems.front()->getDesc();
+    if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) {
+        // WA [DS] : need to rewrite it. Updated copypaste is from MKLDNNNode::redefineOutputMemory
+        // this path is necessary if there are several edges per one port
+        // in this case edge memory share same physical memory
+        // so we need to find which edge allocate memory, reallocate memory and share this memory between other edges
+        size_t sharedEdgeNum = 0;
+        for (size_t j = 0; j < to_mems.size(); j++) {
+            if (!to_mems[j]->isUsedExternalStorage()) {
+                sharedEdgeNum = j;
+                break;
+            }
+        }
+
+        to_mems[sharedEdgeNum]->redefineDesc(new_desc);
+        void *data = to_mems[sharedEdgeNum]->GetData();
+        for (size_t j = 0; j < to_mems.size(); j++) {
+            if (j == sharedEdgeNum)
+                continue;
+            to_mems[j]->redefineDesc(new_desc, data);
+        }
+    }
+}
+
 class PortIteratorHelper : public PortMapHelper {
 public:
    PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
@@ -101,7 +129,7 @@ public:

        auto &chunk_mem = sliced_src ? mem_holder_src : mem_holder_dst;
        chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) +
-                chunk_offset_in_byte + chunk_stride_in_byte * iter);
+                                          chunk_offset_in_byte + chunk_stride_in_byte * iter);

        reorder.execute(strm, mem_holder_src, mem_holder_dst);
    }
@@ -136,7 +164,7 @@ public:
    IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
        // Only scalar I32 tensor is supported
        IE_ASSERT(to->GetDataType() == memory::data_type::s32);
-        IE_ASSERT(to->GetShape() == Shape(InferenceEngine::SizeVector{1}));
+        IE_ASSERT(to->GetShape() == Shape(VectorDims{1}));
        mem_holder_dst = to->GetPrimitive();
    }

@@ -171,8 +199,7 @@ class asIntCheck : public PortChecker {
 public:
    asIntCheck(const MKLDNNMemoryPtr &mem) {
        IE_ASSERT(mem->GetDataType() == memory::data_type::s32);
-        const auto a = Shape(InferenceEngine::SizeVector{1});
-        IE_ASSERT(mem->GetShape() == a);
+        IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1}));
        mem_holder = mem->GetPrimitive();
    }

@@ -196,96 +223,115 @@ private:
    int value;
 };

-}  // namespace MKLDNNPlugin
-
-static int getNumIteration(const std::shared_ptr<const ngraph::Node>& op, const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) {
-    const auto isIterable = [](const PortMap& rule) { return rule.axis != -1; };
-
-    const auto getNumIterations = [](const PortMap& rule, const std::vector<size_t>& dimensions) -> int {
-        const auto axis = rule.axis;
-        if (axis < 0 || static_cast<std::size_t>(axis) >= dimensions.size()) {
-            IE_THROW() << R"(: Invalid "axis" value in an iteration component: )"
-                               << rule.axis  << ", dimensions number = " << dimensions.size() << " (out of range)";
-        }
-        const auto space = dimensions[axis];
-        const int start = static_cast<int>((rule.start < 0 ? (space + 1) : 0) + rule.start);
-        const int end   = static_cast<int>((rule.end   < 0 ? (space + 1) : 0) + rule.end);
-
-        const auto stride = rule.stride;
-        if (stride == 0) {
-            IE_THROW() << R"(: Invalid "stride" value in an iteration component: )" << rule.stride << " (infinite loop)";
-        }
-        const auto step = std::abs(stride);
-
-        const auto src = stride < 0 ? end : start;
-        const auto dst = stride < 0 ? start : end;
-        const auto length = dst - src;
-        if (src < 0 || src >= dst || dst > static_cast<int64_t>(space) || length < step) {
-            IE_THROW() << R"(: Invalid "start"/"stride"/"end" values in an iteration component)"
-                               << ": \"start\" = " << rule.start << ", \"stride\" = " << rule.stride  << ", \"end\" = " << rule.end;
-        }
-
-        if (length % step != 0) {
-            IE_THROW() << ": Each iteration must be the same size: length (" << length << ") is not divisible by step (" << step << ")";
-        }
-
-        return static_cast<int>(length / step);
-    };
-
-
-    int numIterations = 1;
-    bool isDefault = true;
-    for (const auto& rule : inputPortMap) {
-        if (!isIterable(rule)) {
-            continue;
-        }
-
-        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_input_size())) {
-            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
-                               << " inputs number = " << op->get_input_size() << " (out of range)";
-        }
-
-        const auto currentNumIterations = getNumIterations(rule, op->get_input_shape(rule.from));
-        if (isDefault) {
-            isDefault = false;
-            numIterations = currentNumIterations;
-        } else if (numIterations != currentNumIterations) {
-            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
-        }
-    }
-
-    for (const auto& rule : outputPortMap) {
-        if (!isIterable(rule)) {
-            continue;
-        }
-
-        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_output_size())) {
-            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
-                               << " inputs number = " << op->get_output_size() << " (out of range)";
-        }
-
-        const auto currentNumIterations = getNumIterations(rule, op->get_output_shape(rule.from));
-        if (isDefault) {
-            isDefault = false;
-            numIterations = currentNumIterations;
-        } else if (numIterations != currentNumIterations) {
-            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
-        }
-    }
-
-    return numIterations;
+DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector<MKLDNNMemoryPtr> &to,
+                             const PortMap &map_rule) : from(from), to(to), map_rule(map_rule) {
+    elem_size = MKLDNNExtensionUtils::sizeOfDataType(from->GetDataType());
 }

-bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
-    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
+void DynamicBuffer::execute(const mkldnn::engine& eng, const int iter) {
+    if (iter == 0) {
+        init(eng);
+        return;
+    }

+    auto new_buffer = create_buffer(eng);
+    move_buffer(new_buffer);
+    move_data();
+}
+
+void DynamicBuffer::init(const mkldnn::engine& eng) {
+    chunk_offset_in_byte = 0;
+    buffer_offset_in_byte = 0;
+
+    const auto axis = map_rule.axis;
+    const auto stride = map_rule.stride;
+    const auto abs_stride = std::abs(stride);
+
+    auto src_mem = from->GetPrimitive();
+    auto src_desc = src_mem.get_desc();
+    auto dims = src_desc.dims();
+
+    if (dims[axis] != abs_stride)
+        IE_THROW() << "TensorIterator (Loop) has incorrect output shape[axis] after iteration for concatenation. " << abs_stride <<
+                   " is expected, but actual: " << dims[axis];
+
+    count = std::accumulate(dims.begin(), dims.begin() + map_rule.axis, 1, std::multiplies<size_t>());
+    len = std::accumulate(dims.begin() + map_rule.axis + 1, dims.end(), elem_size, std::multiplies<size_t>());
+    mem_holder_buffer.reset(new memory(src_desc, eng));
+    copy(reinterpret_cast<const uint8_t*>(from->GetPtr()), get_ptr(*mem_holder_buffer.get()), 0, 0, 1, from->GetSize());
+}
+
+std::shared_ptr<mkldnn::memory> DynamicBuffer::create_buffer(const mkldnn::engine& eng) {
+    const auto axis = map_rule.axis;
+    const auto stride = map_rule.stride;
+    const auto abs_stride = std::abs(stride);
+
+    const auto old_desc = mem_holder_buffer->get_desc();
+    auto dims = old_desc.dims();
+
+    if (from->getStaticDims()[axis] != abs_stride)
+        IE_THROW() << "TensorIterator (Loop) has incorrect output shape[axis] after iteration for concatenation. " << abs_stride <<
+        " is expected, but actual: " << from->getStaticDims()[axis];
+
+    dims[axis] += abs_stride;
+    mkldnn::memory::desc new_buffer_desc(dims, old_desc.data_type(), MKLDNNExtensionUtils::GetPlainFormatByRank(dims.size()));
+
+    if (stride > 0.0f) {
+        chunk_offset_in_byte += new_buffer_desc.data.format_desc.blocking.strides[axis] * elem_size * abs_stride;
+    } else {
+        buffer_offset_in_byte = from->GetPrimitive().get_desc().data.format_desc.blocking.strides[axis] * elem_size * abs_stride;
+    }
+
+    return std::make_shared<mkldnn::memory>(new_buffer_desc, eng);
+}
+
+void DynamicBuffer::move_buffer(std::shared_ptr<mkldnn::memory> new_buffer) {
+    const auto axis = map_rule.axis;
+    const auto src_stride = mem_holder_buffer->get_desc().dims()[axis] * len;
+    const auto dst_stride = new_buffer->get_desc().dims()[axis] * len;
+
+    copy(get_ptr(*mem_holder_buffer.get()), get_ptr(*new_buffer.get()) + buffer_offset_in_byte,
+         src_stride, dst_stride, count, src_stride);
+    mem_holder_buffer = new_buffer;
+}
+
+void DynamicBuffer::move_data() {
+    const auto axis = map_rule.axis;
+    const auto src_stride = abs(map_rule.stride) * len;
+    const auto dst_stride = mem_holder_buffer->get_desc().dims()[axis] * len;
+
+    copy(reinterpret_cast<const uint8_t*>(from->GetPtr()), get_ptr(*mem_holder_buffer.get()) + chunk_offset_in_byte,
+         src_stride, dst_stride, count, src_stride);
+}
+
+void DynamicBuffer::transfer(const MKLDNNNode* node) {
+    const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(
+            MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims()));
+    redefineToMemories(to, desc);
+
+    copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast<uint8_t*>(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize());
+}
+
+void DynamicBuffer::copy(const uint8_t* src, uint8_t* dst, const size_t src_stride, const size_t dst_stride, const size_t count, const size_t len) {
+    parallel_for(count, [&](const size_t i) {
+        cpu_memcpy(&dst[i * dst_stride], &src[i * src_stride], len);
+    });
+}
+
+uint8_t* DynamicBuffer::get_ptr(mkldnn::memory& prim) {
+    auto ptr = static_cast<uint8_t*>(prim.get_data_handle());
+    auto md = prim.get_desc().data;
+    mkldnn::impl::memory_desc_wrapper wrapper(md);
+    return ptr + wrapper.offset0() * wrapper.data_type_size();
+}
+
+}  // namespace MKLDNNPlugin
+
+bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
+    try {
        if (!one_of(op->get_type_info(),
-                ngraph::op::v0::TensorIterator::get_type_info_static(),
-                ngraph::op::v5::Loop::get_type_info_static())) {
+                    ov::op::v0::TensorIterator::get_type_info_static(),
+                    ov::op::v5::Loop::get_type_info_static())) {
            errorMessage = "Only opset1 TensorIterator or opset5 Loop operations are supported.";
            return false;
        }
@@ -295,7 +341,7 @@ bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr<const
    return true;
 }

-MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
        MKLDNNNode(op, eng, cache), ngraphOp(op) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
@@ -304,19 +350,18 @@ MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph:
 }

 void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
-    auto tiOp = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp>(ngraphOp);
-    if (tiOp == nullptr) {
-        IE_THROW() << "Can't cast TensorIterator node with name: " << getName() << " to ngraph::op::util::SubGraphOp";
+    auto tiOp = ov::as_type_ptr<const ov::op::util::SubGraphOp>(ngraphOp);
+    if (!tiOp) {
+        THROW_ERROR << "cannot be cast to ov::op::util::SubGraphOp";
    }
-    const std::shared_ptr<const ngraph::Function> body = tiOp->get_function();
+    const std::shared_ptr<const ov::Model> body = tiOp->get_function();
    sub_graph.CreateGraph(body, ext_mng, weightCache);

    const auto &inMap = sub_graph.GetInputNodesMap();
    for (const auto &param : tiOp->get_function()->get_parameters()) {
        auto inNode = inMap.find(param->get_friendly_name());
        if (inNode != inMap.end()) {
-            auto inMem = inNode->second->getChildEdgeAt(0)->getMemoryPtr();
-            input_mem.push_back(inMem);
+            input_mems.push_back(getToMemories(inNode->second.get(), 0));
        }
    }

@@ -337,20 +382,20 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {

        std::string type_name = desc->get_type_info().name;
        if (type_name == "ConcatOutputDescription") {
-            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::ConcatOutputDescription>(desc);
+            auto output_desc = ov::as_type_ptr<const ov::op::util::SubGraphOp::ConcatOutputDescription>(desc);
            IE_ASSERT(output_desc != nullptr);

            outputPortMap.emplace_back(PortMap {
-                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx),
-                static_cast<int>(output_desc->m_axis), static_cast<int>(output_desc->m_stride),
-                static_cast<int>(output_desc->m_start), static_cast<int>(output_desc->m_end),
-                static_cast<int>(output_desc->m_part_size)});
+                    static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx),
+                    static_cast<int>(output_desc->m_axis), static_cast<int>(output_desc->m_stride),
+                    static_cast<int>(output_desc->m_start), static_cast<int>(output_desc->m_end),
+                    static_cast<int>(output_desc->m_part_size)});
        } else if (type_name == "BodyOutputDescription") {
-            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::BodyOutputDescription>(desc);
+            auto output_desc = ov::as_type_ptr<const ov::op::util::SubGraphOp::BodyOutputDescription>(desc);
            IE_ASSERT(output_desc != nullptr);

            outputPortMap.emplace_back(PortMap {
-                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx), -1, 1, 0, -1, 1});
+                    static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx), -1, 1, 0, -1, 1});
        } else {
            IE_THROW() << "Incorrect type of the output description.";
        }
@@ -360,31 +405,30 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
    for (const auto& desc : tiOp->get_input_descriptions()) {
        auto body_input_index = desc->m_body_parameter_index;

-        if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::SliceInputDescription>(desc)) {
+        if (auto slice_desc = ov::as_type_ptr<const ov::op::util::SubGraphOp::SliceInputDescription>(desc)) {
            inputPortMap.emplace_back(PortMap {
-                static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
-                static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
-                static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
-                static_cast<int>(slice_desc->m_part_size)});
-        } else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::MergedInputDescription>(desc)) {
+                    static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
+                    static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
+                    static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
+                    static_cast<int>(slice_desc->m_part_size)});
+        } else if (auto merge_desc = ov::as_type_ptr<const ov::op::util::SubGraphOp::MergedInputDescription>(desc)) {
            inputPortMap.emplace_back(PortMap {
-                static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+                    static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});

            auto body_output_idx = merge_desc->m_body_value_index;

            backEdges.emplace_back(PortMap {
-                static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
-        } else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::InvariantInputDescription>(desc)) {
+                    static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+        } else if (auto inv_desc = ov::as_type_ptr<const ov::op::util::SubGraphOp::InvariantInputDescription>(desc)) {
            inputPortMap.emplace_back(PortMap {
                    static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
        } else {
-            IE_THROW() << "Incorrect type of the input description.";
+            THROW_ERROR << "has incorrect type of the input description.";
        }
    }

-    n_iter = getNumIteration(ngraphOp, inputPortMap, outputPortMap);
-
-    if (const auto loopOp = std::dynamic_pointer_cast<const ngraph::op::v5::Loop>(ngraphOp)) {
+    if (auto loopOp = ov::as_type_ptr<const ov::op::v5::Loop>(ngraphOp)) {
+        algorithm = TensorIteratorLoop;
        auto spec_port = loopOp->get_special_body_ports();
        if (spec_port.current_iteration_input_idx != -1) {
            loopBodyCurrentIterationIdx.push_back(spec_port.current_iteration_input_idx);
@@ -394,74 +438,60 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
        }
        loopTripCountIdx = 0;
        loopExecutionConditionIdx = 1;
+    } else if (auto ti = ov::as_type_ptr<const ov::op::v0::TensorIterator>(ngraphOp)) {
+        algorithm = TensorIteratorCommon;
+    } else {
+        THROW_ERROR << "isn't supported!";
    }
-
-    config = make_plain_config(ngraphOp);
 }

 void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
+    supportedPrimitiveDescriptors.emplace_back(make_plain_config(ngraphOp), impl_desc_type::unknown);
 }

-
 void MKLDNNTensorIteratorNode::createPrimitive() {
-    const auto &eng = getEngine();
-
-    for (auto map_rule : inputPortMap) {
-        auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
-        auto &to_mem = input_mem[map_rule.to];
-
-        if (map_rule.axis == -1)
-            first_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-        else
-            before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
-    }
-
-    for (auto map_rule : outputPortMap) {
-        auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
-        auto &from_mem = output_mem[map_rule.to];
-
-        if (map_rule.axis == -1)
-            last_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-        else
-            after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
-    }
-
-    for (auto map_rule : backEdges) {
-        auto from_mem = output_mem[map_rule.from];
-        auto to_mem = input_mem[map_rule.to];
-
-        before_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-    }
-
-    // special purpose ports
-    for (auto idx : loopBodyCurrentIterationIdx) {
-        auto to_mem = input_mem[idx];
-        before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
-    }
-
-    if (loopBodyConditionOutputIdx == -1) {
+    if (loopBodyConditionOutputIdx == -1)
        continue_cond_check.reset(new staticValueCheck(true)); // always true
-    } else {
-        auto mem = output_mem[loopBodyConditionOutputIdx];
-        continue_cond_check.reset(new asBoolCheck(mem));
-    }
-
-    if (loopTripCountIdx == -1) {
-        trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
-    } else {
-        auto mem = getParentEdgesAtPort(loopTripCountIdx)[0]->getMemoryPtr();
-        trip_count_check.reset(new asIntCheck(mem));
-    }
-
-    if (loopExecutionConditionIdx == -1) {
+    if (loopExecutionConditionIdx == -1)
        initial_cond_check.reset(new staticValueCheck(true));
-    } else {
-        auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr();
-        initial_cond_check.reset(new asBoolCheck(mem));
+
+    if (isDynamicNode())
+        prepareDynamicBuffers();
+
+    MKLDNNNode::createPrimitive();
+}
+
+bool MKLDNNTensorIteratorNode::needPrepareParams() const {
+    if (getAlgorithm() == TensorIteratorLoop) {
+        const auto tripCountPtr = reinterpret_cast<const uint32_t*>(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->GetPtr());
+        const auto condPtr = reinterpret_cast<const uint8_t*>(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->GetPtr());
+        if (tripCountPtr[0] != lastUsedTripCount || condPtr[0] != lastUsedCond)
+            return true;
+    }
+
+    return MKLDNNNode::needPrepareParams();
+}
+
+void MKLDNNTensorIteratorNode::prepareParams() {
+    reshapeSubgraphInput();
+
+    first_mappers.clear();
+    before_mappers.clear();
+    back_mappers.clear();
+
+    prepareInputPorts();
+    prepareInitialCond();
+    prepareContinueCond();
+    prepareTripCount();
+    // special purpose ports
+    prepareLoopBodyCurrentIteration();
+
+    if (!isDynamicNode()) {
+        prepareOutputPorts();
+        prepareBackEdges();
    }
 }

@@ -485,7 +515,7 @@ void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) {
        continue_cond = continue_cond_check->getStatus();

        // copy data from subgraph iteration to outputs
-        // or to next iteration inputs
+        // or to the next iteration inputs
        for (auto &mapper : after_mappers)
            mapper->execute(strm, i);
    }
@@ -494,6 +524,266 @@ void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) {
        mapper->execute(strm);
 }

+void MKLDNNTensorIteratorNode::executeDynamicImpl(mkldnn::stream strm) {
+    const auto &eng = getEngine();
+    sub_graph.ResetInferCount();
+
+    bool continue_cond = initial_cond_check->getStatus();
+    int max_num_iter = trip_count_check->getStatus();
+
+    for (auto &mapper : first_mappers)
+        mapper->execute(strm);
+
+    if (!continue_cond || max_num_iter == 0)
+        THROW_ERROR << "has incorrect iteration count for dynamic execution";
+
+    // use  "i != max_num_iter" only to allow "-1" works like infinite loop
+    for (int i = 0; i != max_num_iter && continue_cond; i++) {
+        // copy data to subgraph iteration
+        for (auto &mapper : before_mappers)
+            mapper->execute(strm, i);
+        for (auto &mapper : back_mappers)
+            mapper->execute(strm, i);
+
+        sub_graph.Infer();
+
+        continue_cond = continue_cond_check->getStatus();
+
+        for (auto& buffer : buffers)
+            buffer->execute(eng, i);
+
+        // on the last iteration we shouldn't reshape body inputs and init back edges
+        if ((i + 1 != max_num_iter) && continue_cond)
+            prepareDynamicBackEdges();
+    }
+
+    reshapeAndFillOutput(strm);
+}
+
+/* *==============* Prepare reorders, edges between body and TI *==============* */
+
+void MKLDNNTensorIteratorNode::prepareInputPorts() {
+    const auto &eng = getEngine();
+    for (auto map_rule : inputPortMap) {
+        auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
+        auto &to_mem = input_mems[map_rule.to].front();  // first memory is enough to get common memory ptr
+
+        if (map_rule.axis == -1)
+            first_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mem, eng));
+        else
+            before_mappers.emplace_back(
+                    std::make_shared<PortIteratorHelper>(from_mem, to_mem, true, map_rule, eng));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareOutputPorts() {
+    const auto &eng = getEngine();
+    for (auto map_rule : outputPortMap) {
+        auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
+        auto &from_mem = output_mem[map_rule.to];
+
+        if (map_rule.axis == -1)
+            last_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mem, eng));
+        else
+            after_mappers.emplace_back(std::make_shared<PortIteratorHelper>(from_mem, to_mem, false, map_rule, eng));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareBackEdges() {
+    const auto &eng = getEngine();
+    for (auto map_rule : backEdges) {
+        auto from_mem = output_mem[map_rule.from];
+        auto to_mem = input_mems[map_rule.to].front();
+
+        before_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mem, eng));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareDynamicBackEdges() {
+    const auto &eng = getEngine();
+    back_mappers.clear();
+    for (auto map_rule : backEdges) {
+        auto from_mem = output_mem[map_rule.from];
+        auto to_mems = input_mems[map_rule.to];
+
+        const auto& desc = from_mem->getDesc();
+        redefineToMemories(to_mems, desc.clone());
+
+        // first memory is enough to get common memory ptr
+        back_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mems.front(), eng));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareDynamicBuffers() {
+    for (auto map_rule : outputPortMap) {
+        if (map_rule.axis != -1) {
+            auto to_mems = getToMemories(this, map_rule.from);
+            auto &from_mem = output_mem[map_rule.to];
+            buffers.emplace_back(std::make_shared<DynamicBuffer>(from_mem, to_mems, map_rule));
+        }
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareLoopBodyCurrentIteration() {
+    const auto &eng = getEngine();
+    for (auto idx : loopBodyCurrentIterationIdx) {
+        auto to_mem = input_mems[idx].front();  // first memory is enough to get common memory ptr
+        before_mappers.emplace_back(std::make_shared<IterCountPortHelper>(to_mem, eng));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareContinueCond() {
+    if (loopBodyConditionOutputIdx != -1 || !continue_cond_check) {
+        auto mem = output_mem[loopBodyConditionOutputIdx];
+        continue_cond_check.reset(new asBoolCheck(mem));
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareInitialCond() {
+    if (loopExecutionConditionIdx != -1 || !initial_cond_check) {
+        auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr();
+        initial_cond_check.reset(new asBoolCheck(mem));
+        lastUsedCond = initial_cond_check->getStatus();
+    }
+}
+
+void MKLDNNTensorIteratorNode::prepareTripCount() {
+    if (loopTripCountIdx == -1) {
+        trip_count_check.reset(new staticValueCheck(getNumIteration(inputPortMap, outputPortMap)));
+    } else {
+        auto mem = getParentEdgesAtPort(loopTripCountIdx)[0]->getMemoryPtr();
+        trip_count_check.reset(new asIntCheck(mem));
+    }
+    lastUsedTripCount = trip_count_check->getStatus();
+}
+
+/* *==============* *==============* *==============* *==============* *==============* */
+
+void MKLDNNTensorIteratorNode::reshapeSubgraphInput() {
+    for (auto map_rule : inputPortMap) {
+        auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
+        auto &to_mems = input_mems[map_rule.to];
+        auto new_dims = from_mem->getStaticDims();
+        if (map_rule.axis != -1)
+            new_dims[map_rule.axis] = abs(map_rule.stride);
+
+        const auto desc = std::make_shared<CpuBlockedMemoryDesc>(to_mems.front()->getDesc().getPrecision(), Shape(new_dims));
+        redefineToMemories(to_mems, desc);
+    }
+}
+
+void MKLDNNTensorIteratorNode::reshapeAndFillOutput(mkldnn::stream strm) {
+    auto eng = strm.get_engine();
+    for (auto map_rule : outputPortMap) {
+        if (map_rule.axis == -1) {
+            auto to_mems = getToMemories(this, map_rule.from);
+            auto &from_mem = output_mem[map_rule.to];
+
+            const auto desc = getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(from_mem->getStaticDims());
+            redefineToMemories(to_mems, desc);
+
+            PortMapHelper *mapper = new BackEdgePortHelper(from_mem, to_mems.front(), eng);
+            mapper->execute(strm);
+        }
+    }
+
+    for (auto buffer : buffers) {
+        buffer->transfer(this);
+    }
+}
+
+int MKLDNNTensorIteratorNode::getNumIteration(const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) const {
+    const auto isIterable = [](const PortMap& rule) {
+        return rule.axis != -1;
+    };
+
+    const auto getNumIterations = [this](const PortMap& rule, const std::vector<size_t>& dimensions) -> int {
+        const auto axis = rule.axis;
+        if (axis < 0 || static_cast<std::size_t>(axis) >= dimensions.size()) {
+            THROW_ERROR << ": Invalid \"axis\" value in an iteration component: "
+                        << rule.axis  << ", dimensions number = " << dimensions.size() << " (out of range)";
+        }
+        const auto space = dimensions[axis];
+        const int start = static_cast<int>((rule.start < 0 ? (space + 1) : 0) + rule.start);
+        const int end   = static_cast<int>((rule.end   < 0 ? (space + 1) : 0) + rule.end);
+
+        const auto stride = rule.stride;
+        if (stride == 0) {
+            THROW_ERROR << ": Invalid \"stride\" value in an iteration component: " << rule.stride << " (infinite loop)";
+        }
+        const auto step = std::abs(stride);
+
+        const auto src = stride < 0 ? end : start;
+        const auto dst = stride < 0 ? start : end;
+        const auto length = dst - src;
+        if (src < 0 || src >= dst || dst > static_cast<int64_t>(space) || length < step) {
+            THROW_ERROR << ": Invalid \"start\",\"stride\",\"end\" values in an iteration component"
+                        << ": \"start\" = " << rule.start << ", \"stride\" = " << rule.stride  << ", \"end\" = " << rule.end;
+        }
+
+        if (length % step != 0) {
+            THROW_ERROR << ": Each iteration must be the same size: length (" << length << ") is not divisible by step (" << step << ")";
+        }
+
+        return static_cast<int>(length / step);
+    };
+
+
+    int numIterations = 1;
+    bool isDefault = true;
+    for (const auto& rule : inputPortMap) {
+        const auto& dims = getParentEdgesAtPort(rule.from)[0]->getMemoryPtr()->getStaticDims();
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(inputShapes.size())) {
+            THROW_ERROR << ": Invalid \"from\" value: \"from\" = " << rule.from
+                        << " inputs number = " << inputShapes.size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, dims);
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            THROW_ERROR << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    for (const auto& rule : outputPortMap) {
+        const auto& dims = getBaseMemDescAtOutputPort(rule.from)->getShape().getDims();
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (dims[rule.axis] == Shape::UNDEFINED_DIM)
+            continue;
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(outputShapes.size())) {
+            THROW_ERROR << ": Invalid \"from\" value: \"from\" = " << rule.from
+                        << " inputs number = " << outputShapes.size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, dims);
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            THROW_ERROR << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    return numIterations;
+}
+
+std::vector<MKLDNNMemoryPtr> MKLDNNTensorIteratorNode::getToMemories(const MKLDNNNode* node, const size_t port) const {
+    std::vector<MKLDNNMemoryPtr> memories;
+    for (auto edge : node->getChildEdgesAtPort(port))
+        memories.push_back(edge->getMemoryPtr());
+    return memories;
+}
+
 bool MKLDNNTensorIteratorNode::created() const {
    return getType() == TensorIterator;
 }
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <memory>
 #include <vector>
+#include <common/memory_desc_wrapper.hpp>

 namespace MKLDNNPlugin {

@@ -55,36 +56,102 @@ protected:
 };


+/**
+ * Class for storing intermediate output buffer state for dynamism when we don't know
+ * final output shape but we should concatenate output after each iteration
+ */
+class DynamicBuffer {
+public:
+    DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector<MKLDNNMemoryPtr> &to, const PortMap &map_rule);
+    ~DynamicBuffer() = default;
+
+    void execute(const mkldnn::engine& eng, const int iter);
+    void transfer(const MKLDNNNode* node);
+
+private:
+    void init(const mkldnn::engine& eng);
+
+    /* methods for resize and refill buffer */
+    std::shared_ptr<mkldnn::memory> create_buffer(const mkldnn::engine& eng);
+    void move_buffer(std::shared_ptr<mkldnn::memory> new_buffer);
+    void move_data();
+
+    static void copy(const uint8_t* src, uint8_t* dst, const size_t src_stride, const size_t dst_stride, const size_t count, const size_t len);
+    static uint8_t* get_ptr(mkldnn::memory& prim);
+
+    size_t len = 1lu;
+    size_t count = 1lu;
+    size_t elem_size = 0lu;
+    ptrdiff_t chunk_offset_in_byte = 0;
+    ptrdiff_t buffer_offset_in_byte = 0;
+
+    MKLDNNMemoryPtr from;
+    std::vector<MKLDNNMemoryPtr> to;
+    PortMap map_rule;
+
+    std::shared_ptr<mkldnn::memory> mem_holder_buffer;
+};
+
 class MKLDNNTensorIteratorNode : public MKLDNNNode {
 public:
-    MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNTensorIteratorNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);

-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
    void initSupportedPrimitiveDescriptors() override;
    void getSupportedDescriptors() override;
    void createPrimitive() override;
    bool created() const override;
    void execute(mkldnn::stream strm) override;
+    bool isExecutable() const override { return true; }

    void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; }

+protected:
+    //  needShapeInfer() should return false
+    //  because we cannot resolve the output dimensions before the inference is completed
+    bool needShapeInfer() const override { return false; };
+
+    bool needPrepareParams() const override;
+    void prepareParams() override;
+    void executeDynamicImpl(mkldnn::stream strm) override;
+
 private:
-    int n_iter = 0;
+    void prepareInputPorts();
+    void prepareOutputPorts();
+    void prepareBackEdges();
+    void prepareDynamicBackEdges();
+    void prepareDynamicBuffers();
+    void prepareLoopBodyCurrentIteration();
+    void prepareContinueCond();
+    void prepareInitialCond();
+    void prepareTripCount();
+
+    /* Dynamic support */
+    void reshapeSubgraphInput();
+    void reshapeAndFillOutput(mkldnn::stream strm);
+    int getNumIteration(const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) const;
+
+    // this method get all memory ptrs of childs of one port to redefine descs for them
+    std::vector<MKLDNNMemoryPtr> getToMemories(const MKLDNNNode* node, const size_t port) const;

    MKLDNNExtensionManager::Ptr ext_mng;
    MKLDNNGraph sub_graph;
-    std::vector<MKLDNNMemoryPtr> input_mem, output_mem;
+    std::vector<std::vector<MKLDNNMemoryPtr>> input_mems;
+    std::vector<MKLDNNMemoryPtr> output_mem;

    std::vector<std::shared_ptr<PortMapHelper>>
        first_mappers,   /// < Applied once before loop
        last_mappers,    /// < Applied once after loop
        before_mappers,  /// < Applied before each iteration
-        after_mappers;   /// < Applied after each iteration
+        after_mappers,   /// < Applied after each iteration
+        back_mappers;    /// < Applied before each iteration for dynamic shapes

    std::shared_ptr<PortChecker>
        trip_count_check,      /// < Perform check of trip count value. value >= -1
-        initial_cond_check,   /// < Perform check of initial continue condition value. value [0, 1]
-        continue_cond_check;  /// < Perform check of continue condition value of body. value [0, 1]
+        initial_cond_check,    /// < Perform check of initial continue condition value. value [0, 1]
+        continue_cond_check;   /// < Perform check of continue condition value of body. value [0, 1]
+
+    std::vector<std::shared_ptr<DynamicBuffer>> buffers;

    std::vector<PortMap> inputPortMap;  //!< Input ports map
    std::vector<PortMap> outputPortMap;  //!< Output ports map
@@ -95,9 +162,10 @@ private:
    int loopTripCountIdx = -1;
    int loopExecutionConditionIdx = -1;

-    NodeConfig config;
+    int lastUsedTripCount = -1;
+    bool lastUsedCond = false;

-    const std::shared_ptr<ngraph::Node> ngraphOp;
+    const std::shared_ptr<ov::Node> ngraphOp;
 };

 }  // namespace MKLDNNPlugin
--- a/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp
@@ -0,0 +1,471 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <shared_test_classes/single_layer/loop.hpp>
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace ov;
+using namespace test;
+using namespace ngraph::helpers;
+
+namespace CPULayerTestsDefinitions {
+
+enum LOOP_IN_TYPE {
+    INVARIANT,
+    MERGED
+};
+
+using LoopParams = typename std::tuple<
+        InputLayerType,                                                    // TripCount is a constant?
+        int64_t,                                                           // TripCount, -1 means infinity
+        std::vector<InputShape>,                                           // InputShapes
+        std::vector<LOOP_IN_TYPE>,                                         // Type
+        ElementType>;                                                      // Input element type
+
+
+class LoopLayerCPUTest : public testing::WithParamInterface<LoopParams>,
+                         virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LoopParams> obj) {
+        InputLayerType trip_count_type;
+        int64_t trip_count;
+        std::vector<InputShape> shapes;
+        std::vector<LOOP_IN_TYPE> types;
+        ElementType netType;
+        std::tie(trip_count_type, trip_count, shapes, types, netType) = obj.param;
+
+        std::ostringstream result;
+        for (size_t i = 0; i < shapes.size(); i++) {
+            result << "Input" << i << "_";
+            result << "IS=" << CommonTestUtils::partialShape2str({shapes[i].first}) << "_";
+            result << "TS=";
+            for (const auto& item : shapes[i].second) {
+                result << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        result << "types=";
+        for (auto type : types)
+            result << type << "_";
+        result << "trip_count_type=" << trip_count_type << "_";
+        result << "trip_count=" << trip_count << "_";
+        result << "netType=" << netType;
+        return result.str();
+}
+
+protected:
+    void generate_inputs(const std::vector<ov::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+
+        // trip count
+        int i = 0;
+        if (funcInputs[i].get_node_shared_ptr()->get_friendly_name() == "trip_count") {
+            const auto& funcInput = funcInputs[i];
+            ov::runtime::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(),
+                                                                                 funcInput.get_shape(), 10, 1);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+            i++;
+        }
+
+        // parameters for body
+        for (; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::runtime::Tensor tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(),
+                                                                                 targetInputStaticShapes[i], 15, 0, 32768);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
+    void SetUp() override {
+        InputLayerType trip_count_type;
+        int64_t trip_count;
+        std::vector<InputShape> shapes;
+        std::vector<LOOP_IN_TYPE> types;
+        ElementType netType;
+        std::tie(trip_count_type, trip_count, shapes, types, netType) = this->GetParam();
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        init_input_shapes(shapes);
+
+        auto params = ngraph::builder::makeDynamicParams(netType, inputDynamicShapes);
+
+        // Set up the cell body, a function from (Xi, Yi) -> (Zo)
+        // Body parameters
+        const std::vector<ngraph::PartialShape> body_params_shapes(shapes.size(), ngraph::PartialShape::dynamic());
+        ngraph::ParameterVector body_params;
+        for (const auto &pshape : body_params_shapes) {
+            body_params.emplace_back(std::make_shared<ngraph::opset1::Parameter>(netType, pshape));
+        }
+
+        auto body_condition_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
+        auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
+        std::shared_ptr<ngraph::Node> trip_count_input;
+        int shift = 0;
+        if (trip_count_type == InputLayerType::PARAMETER) {
+            for (auto& target : targetStaticShapes)
+                target.insert(target.begin(), ngraph::Shape{});
+            trip_count_input = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::i64, ngraph::Shape{1});
+            trip_count_input->set_friendly_name("trip_count");
+            params.insert(params.begin(), ov::as_type_ptr<ngraph::opset5::Parameter>(trip_count_input));
+            shift++;
+        } else {
+            trip_count_input = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, trip_count);
+        }
+
+        // Body
+        std::shared_ptr<ngraph::Node> Zo = body_params[0];
+        for (int i = 1; i < body_params.size(); ++i) {
+            Zo = std::make_shared<ngraph::op::v1::Add>(body_params[i], Zo);
+        }
+
+        auto body = std::make_shared<ov::Model>(ngraph::OutputVector{body_condition_const, Zo},
+                                                       body_params);
+
+        auto loop = std::make_shared<ngraph::opset5::Loop>(trip_count_input, exec_condition);
+        loop->set_function(body);
+        loop->set_special_body_ports(ngraph::opset5::Loop::SpecialBodyPorts{-1, 0});
+
+        for (int i = 0; i < body_params.size(); ++i) {
+            if (types[i] == LOOP_IN_TYPE::INVARIANT) {
+                loop->set_invariant_input(body_params[i], params[shift + i]);
+            } else if (types[i] == LOOP_IN_TYPE::MERGED) {
+                // todo: support several merged inputs
+                // now supported only one in this sample
+                loop->set_merged_input(body_params[i], params[shift + i], Zo);
+            }
+        }
+
+        // Output 0 is last Zo
+        auto out0 = loop->get_iter_value(body_condition_const, -1);
+        auto out1 = loop->get_iter_value(Zo, -1);
+        // Output 1 is concat of Zos
+        // start=0, stride=1, part_size=1, end=-1, axis=1
+        auto out2 = loop->get_concatenated_slices(Zo, 0, 1, 1, -1, 1);
+
+        auto result0 = std::make_shared<ngraph::opset5::Result>(out0);
+        auto result1 = std::make_shared<ngraph::opset5::Result>(out1);
+        auto result2 = std::make_shared<ngraph::opset5::Result>(out2);
+        function = std::make_shared<ov::Model>(ngraph::ResultVector{result0, result1, result2}, params, "loop");
+    }
+};
+
+class LoopWhileLayerCPUTest : public LoopLayerCPUTest {
+protected:
+    // body:
+    // while (i < 10)
+    //  x += 2
+    //  i += 2
+
+    void SetUp() override {
+        InputLayerType trip_count_type;
+        int64_t trip_count;
+        std::vector<InputShape> shapes;
+        std::vector<LOOP_IN_TYPE> types;
+        std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam();
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        init_input_shapes(shapes);
+        for (auto& target : targetStaticShapes)
+            target.insert(target.begin(), ngraph::Shape{});
+
+        auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
+
+        // Body parameters
+        const std::vector<ngraph::PartialShape> body_params_shapes(shapes.size(), ngraph::PartialShape::dynamic());
+        ngraph::ParameterVector body_params = { std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, ngraph::Shape{}) };
+        for (const auto &pshape : body_params_shapes) {
+            body_params.emplace_back(std::make_shared<ngraph::opset1::Parameter>(inType, pshape));
+        }
+
+        auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, true);
+        auto trip_count_input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, ngraph::Shape{});
+        trip_count_input->set_friendly_name("trip_count");
+        params.insert(params.begin(), trip_count_input);
+
+        // Body
+        auto const_body_cond = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{}, 10);
+        auto const_body_step = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{}, 2);
+        auto less = std::make_shared<ngraph::opset5::Less>(body_params[0], const_body_cond);
+        auto exec_idx = std::make_shared<ngraph::opset5::Add>(body_params[0], const_body_step);
+
+        auto node_const = std::make_shared<ngraph::opset5::Constant>(inType, ngraph::Shape{}, 2);
+        auto node = std::make_shared<ngraph::opset5::Add>(body_params[1], node_const);
+
+        // reference ngraph function is resized by input static shapes in tests but
+        // loop with pad in body has different input shape in each infer request so tests don't support it.
+        // Alternative - eltwise instead of pad
+        // const std::vector<int64_t> begin(inputDynamicShapes[0].rank().get_length(), 1);
+        // const std::vector<int64_t> end(inputDynamicShapes[0].rank().get_length(), 0);
+        // auto node = ngraph::builder::makePad(body_params[1], begin, end, .0f, PadMode::CONSTANT);
+
+        auto body = std::make_shared<ov::Model>(ngraph::OutputVector{less, exec_idx, node}, body_params);
+
+        auto loop = std::make_shared<ngraph::opset5::Loop>(params[0], exec_condition);
+        loop->set_function(body);
+        loop->set_special_body_ports(ngraph::opset5::Loop::SpecialBodyPorts{-1, 0});
+
+        loop->set_merged_input(body_params[0], params[0], exec_idx);
+        loop->set_merged_input(body_params[1], params[1], node);
+
+        auto out0 = loop->get_iter_value(exec_idx, -1);
+        auto out1 = loop->get_iter_value(node, -1);
+
+        auto result0 = std::make_shared<ngraph::opset5::Result>(out0);
+        auto result1 = std::make_shared<ngraph::opset5::Result>(out1);
+        function = std::make_shared<ov::Model>(ngraph::ResultVector{ result0, result1 }, params, "loop");
+    }
+};
+
+class LoopForDiffShapesLayerCPUTest : public LoopLayerCPUTest {
+    // parameter                   back edge
+    //    |                 |-------------------|
+    //    |-----------------|                   |
+    //  StridedSlice       Add ----- Constant   |
+    //    |                 |                   |
+    //    |                 |-------------------|
+    // ConcatOutput       Output
+
+protected:
+    void SetUp() override {
+        InputLayerType trip_count_type;
+        int64_t trip_count;
+        std::vector<InputShape> shapes;
+        std::vector<LOOP_IN_TYPE> types;
+        std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam();
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        init_input_shapes(shapes);
+
+        auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
+
+        // Set up the cell body, a function from (Xi, Yi) -> (Zo)
+        // Body parameters
+        const std::vector<ngraph::PartialShape> body_params_shapes(shapes.size(), ngraph::PartialShape::dynamic());
+        ngraph::ParameterVector body_params;
+        for (const auto &pshape : body_params_shapes) {
+            body_params.emplace_back(std::make_shared<ngraph::opset1::Parameter>(inType, pshape));
+        }
+
+        auto body_condition_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
+        auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
+        std::shared_ptr<ngraph::Node> trip_count_input;
+        int shift = 0;
+        if (trip_count_type == InputLayerType::PARAMETER) {
+            for (auto& target : targetStaticShapes)
+                target.insert(target.begin(), ngraph::Shape{});
+            trip_count_input = std::make_shared<ngraph::opset5::Parameter>(ngraph::element::i64, ngraph::Shape{1});
+            trip_count_input->set_friendly_name("trip_count");
+            params.insert(params.begin(), ov::as_type_ptr<ngraph::opset5::Parameter>(trip_count_input));
+            shift++;
+        } else {
+            trip_count_input = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, trip_count);
+        }
+
+        // Body
+        const auto axis = 1;
+        auto s = ngraph::builder::makeSlice(body_params[0], {0}, {1}, {1}, {axis}, inType);
+        auto constant = ngraph::builder::makeConstant(inType, std::vector<size_t>{1}, std::vector<float>{0.5});
+        auto eltwise = std::make_shared<ov::op::v1::Add>(body_params[0], constant);
+
+        auto body = std::make_shared<ov::Model>(ngraph::OutputVector{body_condition_const, s, eltwise}, body_params);
+
+        auto loop = std::make_shared<ngraph::opset5::Loop>(trip_count_input, exec_condition);
+        loop->set_function(body);
+        loop->set_special_body_ports(ngraph::opset5::Loop::SpecialBodyPorts{-1, 0});
+
+        loop->set_merged_input(body_params[0], params[shift], eltwise);
+
+        // Output 0 is last Zo
+        auto out0 = loop->get_iter_value(body_condition_const, -1);
+        auto out1 = loop->get_iter_value(eltwise, -1);
+        // Output 1 is concat of Zos
+        // start=0, stride=1, part_size=1, end=-1, axis=1
+        auto out2 = loop->get_concatenated_slices(s, 0, 1, 1, -1, 1);
+
+        auto result0 = std::make_shared<ngraph::opset5::Result>(out0);
+        auto result1 = std::make_shared<ngraph::opset5::Result>(out1);
+        auto result2 = std::make_shared<ngraph::opset5::Result>(out2);
+        function = std::make_shared<ov::Model>(ngraph::ResultVector{result0, result1, result2}, params, "loop");
+    }
+};
+
+TEST_P(LoopLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+TEST_P(LoopWhileLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+TEST_P(LoopForDiffShapesLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+namespace {
+
+const std::vector<ElementType> inputPrecisions = {
+        ElementType::f32,
+        ElementType::bf16,
+        ElementType::i8
+};
+
+std::vector<InputLayerType> trip_count_type { InputLayerType::CONSTANT, InputLayerType::PARAMETER };
+std::vector<int64_t> trip_count { 1, 5 }; // works only if trip_count_type is constant
+
+// dim[axis] = 1 because loop supports concatenation only with stride = part_size = 1
+// first loop suit test is with output concatenation
+std::vector<std::vector<InputShape>> inputs = {
+    {  //first test suit
+        {   //dynamic shape for first input
+            {-1, 1, -1},
+            { // target static shapes
+                {10, 1, 10},
+                {1, 1, 1},
+                {1, 1, 1},
+                {5, 1, 3}
+            }
+        },
+        {   //dynamic shape for second input
+            {-1, -1, -1},
+            { // target static shapes
+                {1, 1, 1},
+                {5, 1, 2},
+                {5, 1, 2},
+                {5, 1, 3}
+            }
+        },
+        {   //dynamic shape for third input
+            {-1, 1, -1},
+            { // target static shapes
+                {10, 1, 10},
+                {5, 1, 2},
+                {5, 1, 2},
+                {5, 1, 3}
+            }
+        }
+    },
+
+    {  //second test suit
+        {   //dynamic shape for first input
+            {{1, 10}, 1, {1, 10}},
+            { // target static shapes
+                {8, 1, 8},
+                {1, 1, 1},
+                {1, 1, 1},
+                {1, 1, 1}
+            }
+        },
+        {   //dynamic shape for second input
+            {{1, 8}, 1, {1, 8}},
+            { // target static shapes
+                {8, 1, 8},
+                {1, 1, 1},
+                {1, 1, 1},
+                {5, 1, 3}
+            }
+        },
+        {   //dynamic shape for third input
+            {{1, 10}, -1, {1, 10}},
+            { // target static shapes
+                {8, 1, 8},
+                {1, 1, 1},
+                {1, 1, 1},
+                {5, 1, 3}
+            }
+        }
+    },
+};
+std::vector<LOOP_IN_TYPE> types = {
+        LOOP_IN_TYPE::INVARIANT, LOOP_IN_TYPE::INVARIANT, LOOP_IN_TYPE::MERGED
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LoopForCommon, LoopLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(trip_count_type),
+                                 ::testing::ValuesIn(trip_count),
+                                 ::testing::ValuesIn(inputs),
+                                 ::testing::Values(types),
+                                 ::testing::ValuesIn(inputPrecisions)),
+                         LoopLayerCPUTest::getTestCaseName);
+
+std::vector<std::vector<InputShape>> inputs_2 = {
+    {  //first test suit
+        {   //dynamic shape
+            {-1, -1},
+            { // target static shapes
+                {10, 10},
+                {1, 1},
+                {1, 1},
+                {5, 3}
+            }
+        },
+    },
+
+    {  //second test suit
+        {   //dynamic shape
+            {{1, 10}, {1, 10}},
+            { // target static shapes
+                {5, 2},
+                {2, 5},
+                {5, 5},
+                {5, 5}
+            }
+        },
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LoopWhileCommon, LoopWhileLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Values(trip_count_type[0]),
+                                 ::testing::Values(-1),
+                                 ::testing::ValuesIn(inputs_2),
+                                 ::testing::Values(std::vector<LOOP_IN_TYPE>{}),
+                                 ::testing::ValuesIn(inputPrecisions)),
+                         LoopWhileLayerCPUTest::getTestCaseName);
+
+std::vector<std::vector<InputShape>> inputs_3 = {
+        {  // first test suit
+            {
+                {-1, -1, -1},
+                { // target static shapes
+                     {10, 1, 10},
+                     {1, 10, 1},
+                     {1, 10, 1},
+                     {2, 2, 2},
+                }
+            },
+        },
+        {  // second test suit
+            {
+                {{0, 10}, {0, 10}, {0, 10}},
+                { // target static shapes
+                     {10, 5, 10},
+                     {1, 10, 1},
+                     {1, 10, 1},
+                     {2, 1, 2},
+                }
+            },
+        },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LoopForDiffShapesConcat, LoopForDiffShapesLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(trip_count_type),
+                                 ::testing::ValuesIn(trip_count),
+                                 ::testing::ValuesIn(inputs_3),
+                                 ::testing::Values(std::vector<LOOP_IN_TYPE>{}),
+                                 ::testing::ValuesIn(inputPrecisions)),
+                         LoopLayerCPUTest::getTestCaseName);
+
+}  // namespace
+} // namespace CPULayerTestsDefinitions
--- a/src/tests/functional/plugin/cpu/single_layer_tests/tensor_iterator.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/tensor_iterator.cpp
@@ -0,0 +1,157 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <shared_test_classes/single_layer/tensor_iterator.hpp>
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace ov;
+using namespace test;
+
+namespace CPULayerTestsDefinitions {
+
+using TensorIteratorParams = typename std::tuple<
+        std::vector<InputShape>,                    // Input shapes
+        ngraph::op::RecurrentSequenceDirection,     // Direction
+        ElementType>;                               // element type
+
+
+class TensorIteratorCPUTest : public testing::WithParamInterface<TensorIteratorParams>,
+                              virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<TensorIteratorParams> obj) {
+        std::vector<InputShape> shapes;
+        ngraph::op::RecurrentSequenceDirection direction;
+        ElementType inType;
+        std::tie(shapes, direction, inType) = obj.param;
+
+        std::ostringstream result;
+        for (size_t i = 0; i < shapes.size(); i++) {
+            result << "Input" << i << "_";
+            result << "IS=" << CommonTestUtils::partialShape2str({shapes[i].first}) << "_";
+            result << "TS=";
+            for (const auto& item : shapes[i].second) {
+                result << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        result << "direction=" << direction << "_";
+        result << "netPRC=" << inType << "_";
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        std::vector<InputShape> shapes;
+        ngraph::op::RecurrentSequenceDirection direction;
+        ElementType inType;
+        std::tie(shapes, direction, inType) = this->GetParam();
+
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        init_input_shapes({shapes});
+
+        const size_t sequence_axis = 1;
+        auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+        auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
+
+        ngraph::ParameterVector body_params;
+        for (size_t i = 0; i < shapes.size(); i++) {
+            ngraph::PartialShape shape = shapes[i].first;
+            shape[sequence_axis] = 1;
+            auto paramNode = std::make_shared<ngraph::opset1::Parameter>(inType, shape);
+            body_params.push_back(paramNode);
+        }
+        auto tanh = ngraph::builder::makeActivation(body_params[0], inType, ngraph::helpers::Tanh);
+        auto relu = ngraph::builder::makeActivation(body_params[1], inType, ngraph::helpers::Relu);
+        auto add = std::make_shared<ngraph::opset1::Add>(tanh, relu);
+
+        auto body = std::make_shared<ov::Model>(ngraph::OutputVector{add}, body_params, "body");
+        tensor_iterator->set_function(body);
+
+        if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
+            tensor_iterator->set_sliced_input(body_params[0], params[0], 0, 1, 1, -1, sequence_axis);
+            tensor_iterator->set_sliced_input(body_params[1], params[1], 0, 1, 1, -1, sequence_axis);
+            tensor_iterator->get_concatenated_slices(add, 0, 1, 1, -1, sequence_axis);
+        } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
+            tensor_iterator->set_sliced_input(body_params[0], params[0], -1, -1, 1, 0, sequence_axis);
+            tensor_iterator->set_sliced_input(body_params[1], params[1], -1, -1, 1, 0, sequence_axis);
+            tensor_iterator->get_concatenated_slices(add, -1, -1, 1, 0, sequence_axis);
+        } else {
+            NGRAPH_CHECK(false, "Bidirectional case is not supported.");
+        }
+
+        function = std::make_shared<ov::Model>(ngraph::OutputVector{tensor_iterator->output(0)}, params);
+    }
+};
+
+TEST_P(TensorIteratorCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+namespace {
+
+const std::vector<ElementType> inputPrecisions = {
+        ElementType::f32,
+        ElementType::bf16,
+        ElementType::i8
+};
+
+std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
+                                                                 ngraph::op::RecurrentSequenceDirection::REVERSE};
+std::vector<std::vector<InputShape>> inputs = {
+    {  //first test suit
+        {   //dynamic shape for first input
+            {-1, -1, -1},
+            {  // target static shapes
+                {10, 12, 10},
+                {10, 8, 10},
+                {1, 8, 2},
+                {5, 3, 3}
+            }
+        },
+        {   //dynamic shape for second input
+            {-1, -1, -1},
+            {  // target static shapes
+                {1, 12, 1},
+                {1, 8, 1},
+                {5, 8, 2},
+                {5, 3, 3}
+            }
+        },
+    },
+
+    {  //second test suit
+        {   //dynamic shape for first input
+            {{1, 12}, 5, {1, 12}},
+            {  // target static shapes
+                {1, 5, 1},
+                {5, 5, 5},
+                {1, 5, 1},
+                {5, 5, 5}
+             }
+        },
+        {   //dynamic shape for second input
+            {{1, 12}, 5, {1, 12}},
+            {  // target static shapes
+                {1, 5, 1},
+                {1, 5, 1},
+                {5, 5, 1},
+                {5, 5, 5}
+            }
+        },
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_TensorIteratorSimple, TensorIteratorCPUTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(inputs),
+                                 ::testing::ValuesIn(direction),
+                                 ::testing::ValuesIn(inputPrecisions)),
+                         TensorIteratorCPUTest::getTestCaseName);
+
+}  // namespace
+} // namespace CPULayerTestsDefinitions