Reference implementations for Loop and TensorIterator ops (#2978)

* Loop op ngraph implementation, update IE IR Reader and ngraph to cnn converter * refactoring SubGraphOp class * type prop unit tests * ngraph code style * update comment * single layer tests for Loop operation * fix file name * Add SpecialBodyPorts attribute in Loop op, update single layer tests * first debug version * more tests * missing test file * removed not needed shapes from test data * move test data to new folder * shape infer tests * Added execution tests * add several new tests cases, strict checks in Loop impl, temporary disable single layer tests * ngraph codestyle, refactoring, clone_new_args test * resolve review remarks * fix build * fix tests * more execution tests * add a new constructor of Loop op, resolve review remarks * execution tests * synchro with current version * handle scalars and more tests * scalar test enabled * loop reference impl * bug fixes in tests, onnx importer part and in the ref implementation of the Loop op * applied remarks * handle unsupported cases * rewrite unit tests * update INTERPRETER manifest * is_termination_condition_always_true simplification * [TEST] update python models tests * review remarks * added xfail to tiny_yolov3 * missing model test * revert test data * fixed numbers of failing tests * fixed failed test description * fix test message * fix xfail test * reference implementation for ngraph::function * update loop reference implementation * Refactor loop reference implementation * ngraph codestyle * Refactoring * Submodule update * Skip check for Reduce ops in mkl for scalar cases, support for yolov3 * fix ngraph reader tests * revert ceiling op, renaming * Add allias(Ceiling) for Ceil op in mkl * delete xfails * fix build * single layer tests for tensor iterarator * Refactor TensorIterator and Loop ref impls * revert dynamic tensor creation, disable some dynamic test cases * fix warning * Resolve review remarks * revert Predefined values in Loop tests Co-authored-by: Mateusz Bencer <mateusz.bencer@intel.com>
2020-11-10 15:49:59 +03:00 · 2020-11-10 15:49:59 +03:00 · c309bb77d2
commit c309bb77d2
parent b6e2cd692b
49 changed files with 1199 additions and 65 deletions
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@ -20,6 +20,7 @@ MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
 MKLDNN_EXTENSION_NODE(MathImpl, Atan);
 MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
 MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
+MKLDNN_EXTENSION_NODE(MathImpl, Ceiling);
 MKLDNN_EXTENSION_NODE(MathImpl, Cos);
 MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
 MKLDNN_EXTENSION_NODE(MathImpl, Erf);
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@ -68,6 +68,7 @@ public:
            else if (math_func == "Atan") mathFunction = Math::Atan;
            else if (math_func == "Atanh") mathFunction = Math::Atanh;
            else if (math_func == "Ceil") mathFunction = Math::Ceil;
+            else if (math_func == "Ceiling") mathFunction = Math::Ceil;
            else if (math_func == "Cos") mathFunction = Math::Cos;
            else if (math_func == "Cosh") mathFunction = Math::Cosh;
            else if (math_func == "Floor") mathFunction = Math::Floor;
@ -276,6 +277,7 @@ REG_FACTORY_FOR(MathImpl, Asinh);
 REG_FACTORY_FOR(MathImpl, Atan);
 REG_FACTORY_FOR(MathImpl, Atanh);
 REG_FACTORY_FOR(MathImpl, Ceil);
+REG_FACTORY_FOR(MathImpl, Ceiling);
 REG_FACTORY_FOR(MathImpl, Cos);
 REG_FACTORY_FOR(MathImpl, Cosh);
 REG_FACTORY_FOR(MathImpl, Erf);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
@ -1264,7 +1264,10 @@ void MKLDNNReduceNode::getSupportedDescriptors() {
        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims())
            THROW_IE_EXCEPTION << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
    } else {
-        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims())
+        // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d.
+        // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases.
+        bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1;
+        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d)
            THROW_IE_EXCEPTION << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
    }

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;

 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;

 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
@ -27,6 +27,12 @@ const std::vector<std::vector<size_t>> inputShapes = {
        std::vector<size_t>{3, 5, 7, 9},
 };

+const std::vector<std::vector<size_t>> inputShapesOneAxis = {
+        std::vector<size_t>{10, 20, 30, 40},
+        std::vector<size_t>{3, 5, 7, 9},
+        std::vector<size_t>{10},
+};
+
 const std::vector<std::vector<int>> axes = {
        {0},
        {1},
@ -71,7 +77,7 @@ const auto paramsOneAxis = testing::Combine(
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Layout::ANY),
-        testing::ValuesIn(inputShapes),
+        testing::ValuesIn(inputShapesOneAxis),
        testing::Values(CommonTestUtils::DEVICE_CPU)
 );

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;

 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tensor_iterator.cpp
@ -0,0 +1,58 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/op/util/attr_types.hpp>
+#include "single_layer_tests/tensor_iterator.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
+    std::vector<bool> should_decompose = {true, false};
+    std::vector<size_t> seq_lengths_zero_clip{2};
+    std::vector<size_t> seq_lengths_clip_non_zero{20};
+    std::vector<size_t> batch{1, 10};
+    std::vector<size_t> hidden_size{1, 10};
+    std::vector<size_t> input_size{10};
+    std::vector<ngraph::helpers::TensorIteratorBody> body_type
+        = {ngraph::helpers::TensorIteratorBody::LSTM, ngraph::helpers::TensorIteratorBody::RNN,
+           ngraph::helpers::TensorIteratorBody::GRU};
+    std::vector<float> clip{0.f};
+    std::vector<float> clip_non_zeros{0.7f};
+    std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
+                                                           ngraph::op::RecurrentSequenceDirection::REVERSE};
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16};
+
+    INSTANTIATE_TEST_CASE_P(smoke_TensorIteratorCommon, TensorIteratorTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(should_decompose),
+                                    ::testing::ValuesIn(seq_lengths_zero_clip),
+                                    ::testing::ValuesIn(batch),
+                                    ::testing::ValuesIn(hidden_size),
+                                    ::testing::ValuesIn(input_size),
+                                    ::testing::ValuesIn(clip),
+                                    ::testing::ValuesIn(body_type),
+                                    ::testing::ValuesIn(direction),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            TensorIteratorTest::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(smoke_TensorIteratorCommonClip, TensorIteratorTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(should_decompose),
+                                    ::testing::ValuesIn(seq_lengths_clip_non_zero),
+                                    ::testing::ValuesIn(batch),
+                                    ::testing::ValuesIn(hidden_size),
+                                    ::testing::ValuesIn(input_size),
+                                    ::testing::ValuesIn(clip_non_zeros),
+                                    ::testing::ValuesIn(body_type),
+                                    ::testing::ValuesIn(direction),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            TensorIteratorTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
@ -61,7 +61,7 @@ class StaticShapeLoopTest : public testing::WithParamInterface<StaticShapeLoopPa
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj);
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
-    std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
+    std::vector<std::vector<std::uint8_t>> PredefinedRefs();

 private:
    bool static_iter_num;       // trip count provided by constant node
@ -100,7 +100,7 @@ protected:
        return LayerTestsCommon::GenerateInput(info);
    }

-    std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
+    std::vector<std::vector<std::uint8_t>> PredefinedRefs() {
        if (outputGens.empty())
            return LayerTestsCommon::CalculateRefs();

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/tensor_iterator.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/tensor_iterator.hpp
@ -0,0 +1,39 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ngraph/op/util/attr_types.hpp>
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+namespace LayerTestsDefinitions {
+
+using TensorIteratorParams = typename std::tuple<
+        bool,                                     // using unroll tensor iterator transformation
+        size_t,                                   // seq_lengths
+        size_t,                                   // batch
+        size_t,                                   // hidden size
+        size_t,                                   // input size
+        float,                                    // clip
+        ngraph::helpers::TensorIteratorBody,      // body type
+        ngraph::op::RecurrentSequenceDirection,   // direction
+        InferenceEngine::Precision,               // Network precision
+        std::string>;                             // Device name
+
+class TensorIteratorTest : public testing::WithParamInterface<TensorIteratorParams>,
+                     virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<TensorIteratorParams> &obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -36,6 +36,10 @@ std::string GRUCellTest::getTestCaseName(const testing::TestParamInfo<GRUCellPar
    std::string targetDevice;
    std::tie(should_decompose, batch, hidden_size, input_size, activations, clip,
            linear_before_reset, netPrecision, targetDevice) = obj.param;
+    inputShapes = {
+            {{batch, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
+                    {3 * hidden_size, hidden_size}, {(linear_before_reset? 4 : 3) * hidden_size}},
+    };
    std::ostringstream result;
    result << "decomposition" << should_decompose << "_";
    result << "batch=" << batch << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -42,7 +42,7 @@ namespace LayerTestsDefinitions {
                        {3 * hidden_size, hidden_size}, {(linear_before_reset ? 4 : 3) * hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
@ -53,7 +53,6 @@ namespace LayerTestsDefinitions {

    void LoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
-        SetRefMode(LayerTestsUtils::IE);
        bool execute_first_iteration;
        bool is_body_condition_const;
        bool body_condition; // works only if is_body_condition_const ==
@ -161,8 +160,6 @@ namespace LayerTestsDefinitions {

    void StaticShapeLoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
-        SetRefMode(LayerTestsUtils::IE);
-
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
        std::tie(
            static_continue_cond,
@ -261,7 +258,7 @@ namespace LayerTestsDefinitions {
    }

    // Predefined ref output
-    std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::CalculateRefs() {
+    std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::PredefinedRefs() {
        bool auto_concat_out = (axis != -1);
        const auto n_iter = actual_n_iter();

@ -293,6 +290,23 @@ namespace LayerTestsDefinitions {
        Run();
    }

+    TEST_P(StaticShapeLoopTest, CompareWithPredefinedRefs) {
+        SKIP_IF_CURRENT_TEST_IS_DISABLED()
+        LoadNetwork();
+        Infer();
+        auto expectedOutputs = PredefinedRefs(); // use predefined refs instead of CalculateRefs function
+        const auto& actualOutputs = GetOutputs();
+
+        if (expectedOutputs.empty()) {
+            return;
+        }
+
+        IE_ASSERT(actualOutputs.size() == expectedOutputs.size())
+        << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
+
+        Compare(expectedOutputs, actualOutputs);
+    }
+
    TEST_P(TrivialLoopTest, PassThroughBody) {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        InferenceEngine::Precision iePrc;
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -41,7 +41,7 @@ namespace LayerTestsDefinitions {
                        {4 * hidden_size, hidden_size}, {4 * hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -41,7 +41,7 @@ namespace LayerTestsDefinitions {
                        {hidden_size, hidden_size}, {hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/tensor_iterator.cpp
@ -0,0 +1,226 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <functional>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/precision_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+
+#include "single_layer_tests/tensor_iterator.hpp"
+#include <transformations/control_flow/unroll_tensor_iterator.hpp>
+
+namespace LayerTestsDefinitions {
+
+    std::string TensorIteratorTest::getTestCaseName(const testing::TestParamInfo<TensorIteratorParams> &obj) {
+        bool should_decompose;
+        size_t seq_lenghts;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        ngraph::helpers::TensorIteratorBody ti_body;
+        float clip;
+        ngraph::op::RecurrentSequenceDirection direction;
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::tie(should_decompose, seq_lenghts, batch, hidden_size, input_size, clip, ti_body, direction, netPrecision,
+                 targetDevice) = obj.param;
+        std::vector<std::vector<size_t>> inputShapes = {};
+
+        switch (ti_body) {
+            case ngraph::helpers::TensorIteratorBody::LSTM:
+                inputShapes = {
+                        {{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size},
+                                {4 * hidden_size, hidden_size}, {4 * hidden_size}},
+                };
+                break;
+            case ngraph::helpers::TensorIteratorBody::GRU:
+                inputShapes = {
+                        {{batch, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
+                                {3 * hidden_size, hidden_size}, {3 * hidden_size}},
+                };
+                break;
+            case ngraph::helpers::TensorIteratorBody::RNN:
+                inputShapes = {{batch, input_size}, {batch, hidden_size},
+                               {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}};
+                break;
+        }
+
+        std::ostringstream result;
+        result << "unrolling=" << should_decompose << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
+        result << "batch=" << batch << "_";
+        result << "hidden_size=" << hidden_size << "_";
+        result << "input_size=" << input_size << "_";
+        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+        result << "TensorIteratorBody=" << ti_body << "_";
+        result << "direction=" << direction << "_";
+        result << "clip=" << clip << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        return result.str();
+    }
+
+    void TensorIteratorTest::SetUp() {
+        size_t seq_lenghts;
+        bool should_decompose;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        ngraph::helpers::TensorIteratorBody ti_body;
+        float clip;
+        ngraph::op::RecurrentSequenceDirection direction;
+        InferenceEngine::Precision netPrecision;
+        std::tie(should_decompose, seq_lenghts, batch, hidden_size, input_size, clip, ti_body, direction, netPrecision,
+                 targetDevice) = this->GetParam();
+        std::vector<std::vector<size_t>> inputShapes;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
+
+        // Each case consist of 3 steps:
+        // 1. Create TensorIterator body.
+        // 2. Set PortMap
+        // 3. Create outer function
+        auto axis = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{1});
+        switch (ti_body) {
+            case ngraph::helpers::TensorIteratorBody::LSTM: {
+                inputShapes = {
+                        {{batch, seq_lenghts, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size},
+                                {4 * hidden_size, hidden_size}, {4 * hidden_size}},
+                };
+                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
+
+                // 1. Create TensorIterator body.
+                inputShapes[0][1] = 1; // sliced dimension
+                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
+                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
+                std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5]};
+                ngraph::OutputVector out_vector = {squeeze, body_params[1], body_params[2]};
+                auto lstm_cell = ngraph::builder::makeLSTM(out_vector, WRB, hidden_size, {"sigmoid", "tanh", "tanh"}, {}, {}, clip);
+                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_cell->output(0), axis);
+                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(unsqueeze),
+                                             std::make_shared<ngraph::opset1::Result>(lstm_cell->output(0)),
+                                             std::make_shared<ngraph::opset1::Result>(lstm_cell->output(1))};
+                auto body = std::make_shared<ngraph::Function>(results, body_params, "lstm_cell");
+                tensor_iterator->set_function(body);
+
+                // 2. Set PortMap
+                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
+                    tensor_iterator->get_concatenated_slices(results[0], 0, 1, 1, -1, 1);
+                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
+                    tensor_iterator->get_concatenated_slices(results[0], -1, -1, 1, 0, 1);
+                } else {
+                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
+                }
+
+                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
+                tensor_iterator->set_invariant_input(body_params[2], outer_params[2]);
+                tensor_iterator->get_iter_value(results[1]);
+                tensor_iterator->get_iter_value(results[2]);
+
+                // 3. Outer function
+                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1),
+                                                                                   tensor_iterator->output(2)}, outer_params);
+                break;
+            }
+            case ngraph::helpers::TensorIteratorBody::GRU: {
+                inputShapes = {
+                        {{batch, seq_lenghts, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
+                                {3 * hidden_size, hidden_size}, {3 * hidden_size}},
+                };
+                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+
+                // 1. Create TensorIterator body.
+                inputShapes[0][1] = 1; // sliced dimension
+                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+                std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
+                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
+                ngraph::OutputVector out_vector = {squeeze, body_params[1]};
+                auto gru_cell = ngraph::builder::makeGRU(out_vector, WRB, hidden_size, {"sigmoid", "tanh"},
+                                                         {}, {}, clip, false);
+                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(gru_cell->output(0), axis);
+                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_cell->output(0)),
+                                             std::make_shared<ngraph::opset1::Result>(unsqueeze)};
+                auto body = std::make_shared<ngraph::Function>(results, body_params, "gru_cell");
+                tensor_iterator->set_function(body);
+
+                // 2. Set PortMap
+                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
+                    tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 1);
+                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
+                    tensor_iterator->get_concatenated_slices(results[1], -1, -1, 1, 0, 1);
+                } else {
+                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
+                }
+
+                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
+                tensor_iterator->get_iter_value(results[0]);
+
+                // 3. Outer function
+                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1)}, outer_params);
+                break;
+            }
+            case ngraph::helpers::TensorIteratorBody::RNN: {
+                inputShapes = {{batch, seq_lenghts, input_size},
+                               {batch,       hidden_size},
+                               {hidden_size, input_size},
+                               {hidden_size, hidden_size},
+                               {hidden_size}};
+                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+
+                // 1. Create TensorIterator body.
+                inputShapes[0][1] = 1; // sliced dimension
+                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+                std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
+                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
+                ngraph::OutputVector out_vector = {squeeze, body_params[1]};
+                auto rnn_cell = ngraph::builder::makeRNN(out_vector, WRB, hidden_size, {"tanh"}, {}, {}, clip);
+                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(rnn_cell->output(0), axis);
+                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_cell),
+                                             std::make_shared<ngraph::opset1::Result>(unsqueeze)};
+                auto body = std::make_shared<ngraph::Function>(results, body_params, "rnn_cell");
+                tensor_iterator->set_function(body);
+
+                // 2. Set PortMap
+                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
+                    tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 1);
+                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
+                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
+                    tensor_iterator->get_concatenated_slices(results[1], -1, -1, 1, 0, 1);
+                } else {
+                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
+                }
+
+                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
+                tensor_iterator->get_iter_value(results[0]);
+
+                // 3. Outer function
+                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1)}, outer_params);
+                break;
+            }
+        }
+        if (should_decompose) {
+            ngraph::pass::Manager m;
+            m.register_pass<ngraph::pass::UnrollTensorIterator>();
+            m.run_passes(function);
+        }
+    }
+
+    TEST_P(TensorIteratorTest, CompareWithRefs) {
+        Run();
+    };
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@ -182,6 +182,13 @@ enum class PadMode {
    SYMMETRIC,
 };

+enum class TensorIteratorBody {
+    RNN,
+    GRU,
+    LSTM,
+    // CNN todo: implement
+};
+
 std::ostream &operator<<(std::ostream &os, const ReductionType &m);
 std::ostream &operator<<(std::ostream &os, const PadMode &m);

@ -258,5 +265,7 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Nearest

 std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type);

+std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
+
 }  // namespace helpers
 }  // namespace ngraph
--- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
@ -729,5 +729,21 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCa
    return os;
 }

+std::ostream& operator<<(std::ostream & os, TensorIteratorBody type) {
+    switch (type) {
+        case TensorIteratorBody::LSTM:
+            os << "LSTM";
+            break;
+        case TensorIteratorBody::RNN:
+            os << "RNN";
+            break;
+        case TensorIteratorBody::GRU:
+            os << "GRU";
+            break;
+        default:
+            throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
+    }
+    return os;
+}
 }  // namespace helpers
 }  // namespace ngraph
--- a/ngraph/core/include/ngraph/op/loop.hpp
+++ b/ngraph/core/include/ngraph/op/loop.hpp
@ -96,6 +96,9 @@ namespace ngraph
                std::shared_ptr<Node>
                    clone_with_new_inputs(const OutputVector& new_args) const override;

+                bool evaluate(const HostTensorVector& outputs,
+                              const HostTensorVector& inputs) const override;
+
            private:
                SpecialBodyPorts m_special_body_ports;
                int64_t m_num_iterations = -1; // -1 means infinity
--- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
@ -341,6 +341,10 @@ namespace ngraph
                std::vector<std::shared_ptr<op::util::SubGraphOp::OutputDescription>>
                    m_output_descriptions;
            };
+            using InputDescriptionPtr = std::shared_ptr<util::SubGraphOp::InputDescription>;
+            using OutputDescriptionPtr = std::shared_ptr<util::SubGraphOp::OutputDescription>;
+            using InputDescriptionVector = std::vector<InputDescriptionPtr>;
+            using OutputDescriptionVector = std::vector<OutputDescriptionPtr>;
        }
    }
    template class NGRAPH_API FactoryRegistry<op::util::SubGraphOp::InputDescription>;
--- a/ngraph/core/reference/include/ngraph/runtime/reference/function.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/function.hpp
@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include <vector>
+#include "ngraph/function.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            void function(const std::shared_ptr<Function>& function,
+                          const HostTensorVector& inputs,
+                          HostTensorVector& outputs);
+        }
+    }
+}
--- a/ngraph/core/reference/include/ngraph/runtime/reference/loop.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/loop.hpp
@ -0,0 +1,36 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and`
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cmath>
+#include <ngraph/opsets/opset5.hpp>
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            void loop(const std::shared_ptr<Function>& body,
+                      const op::util::OutputDescriptionVector& out_descs,
+                      const op::util::InputDescriptionVector& input_descs,
+                      const opset5::Loop::SpecialBodyPorts& special_ports,
+                      const HostTensorVector& out,
+                      const HostTensorVector& args);
+        }
+    }
+}
--- a/ngraph/core/reference/include/ngraph/runtime/reference/tensor_iterator.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/tensor_iterator.hpp
@ -0,0 +1,41 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and`
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cmath>
+#include <ngraph/opsets/opset5.hpp>
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            using custom_evaluate_function =
+                std::function<void(const std::shared_ptr<ngraph::Function>& function,
+                                   const HostTensorVector& inputs,
+                                   HostTensorVector& outputs)>;
+            void tensor_iterator(uint64_t num_iterations,
+                                 const std::shared_ptr<Function>& body,
+                                 const op::util::OutputDescriptionVector& out_descs,
+                                 const op::util::InputDescriptionVector& input_descs,
+                                 const HostTensorVector& out,
+                                 const HostTensorVector& args,
+                                 const custom_evaluate_function& evaluate = nullptr);
+        }
+    }
+}
--- a/ngraph/core/reference/src/runtime/reference/function.cpp
+++ b/ngraph/core/reference/src/runtime/reference/function.cpp
@ -0,0 +1,147 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <cstring>
+
+#include "ngraph/opsets/opset5.hpp"
+#include "ngraph/runtime/reference/function.hpp"
+
+#include "ngraph/runtime/host_tensor.hpp"
+#include "ngraph/runtime/reference/concat.hpp"
+#include "ngraph/runtime/tensor.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            static bool call(const HostTensorVector& func_outputs,
+                             const HostTensorVector& func_inputs,
+                             const std::shared_ptr<ngraph::Function>& function)
+            {
+                // map function params -> HostTensor
+                std::unordered_map<descriptor::Tensor*, std::shared_ptr<HostTensor>> tensor_map;
+                size_t input_count = 0;
+                for (const auto& param : function->get_parameters())
+                {
+                    for (size_t i = 0; i < param->get_output_size(); ++i)
+                    {
+                        descriptor::Tensor* tensor = &param->output(i).get_tensor();
+                        tensor_map.insert({tensor, func_inputs[input_count++]});
+                    }
+                }
+
+                // map function outputs -> HostTensor
+                for (size_t output_count = 0; output_count < function->get_results().size();
+                     ++output_count)
+                {
+                    auto output = function->get_results()[output_count];
+                    descriptor::Tensor* tensor = &output->get_output_tensor(0);
+                    tensor_map.insert({tensor, func_outputs[output_count]});
+                }
+
+                // for each ordered op in the graph
+                for (const auto& op : function->get_ordered_ops())
+                {
+                    if (op::is_parameter(op))
+                    {
+                        continue;
+                    }
+
+                    // get op inputs from map
+                    std::vector<std::shared_ptr<HostTensor>> op_inputs;
+                    for (auto input : op->inputs())
+                    {
+                        descriptor::Tensor* tensor = &input.get_tensor();
+                        op_inputs.push_back(tensor_map.at(tensor));
+                    }
+
+                    // get op outputs from map or create
+                    std::vector<std::shared_ptr<HostTensor>> op_outputs;
+                    for (size_t i = 0; i < op->get_output_size(); ++i)
+                    {
+                        descriptor::Tensor* tensor = &op->output(i).get_tensor();
+                        std::shared_ptr<HostTensor> host_tensor;
+                        auto it = tensor_map.find(tensor);
+                        if (it == tensor_map.end())
+                        {
+                            host_tensor = std::make_shared<HostTensor>(op->output(i));
+                            tensor_map.insert({tensor, host_tensor});
+                        }
+                        else
+                        {
+                            host_tensor = it->second;
+                        }
+                        op_outputs.push_back(host_tensor);
+                    }
+                    op->validate_and_infer_types();
+                    if (!op->evaluate(op_outputs, op_inputs))
+                    {
+                        throw ngraph_error("Evaluate function is not implemented.");
+                    }
+                }
+                return true;
+            }
+
+            void function(const std::shared_ptr<ngraph::Function>& function,
+                          const HostTensorVector& inputs,
+                          HostTensorVector& outputs)
+            {
+                const auto& parameters = function->get_parameters();
+                const auto& parametersNumber = parameters.size();
+                const auto& inputsNumber = inputs.size();
+                NGRAPH_CHECK(parametersNumber == inputsNumber,
+                             "Got function (",
+                             function->get_friendly_name(),
+                             ") with ",
+                             parametersNumber,
+                             " parameters, but ",
+                             inputsNumber,
+                             " input blobs");
+
+                for (const auto& parameter : parameters)
+                {
+                    const auto& parameterIndex = function->get_parameter_index(parameter);
+                    const auto& parameterShape = parameter->get_shape();
+                    const auto& parameterType = parameter->get_element_type();
+                    const auto& parameterSize = shape_size(parameterShape) * parameterType.size();
+
+                    const auto& input = inputs[parameterIndex];
+                    const auto& inputSize = input->get_size_in_bytes();
+                    NGRAPH_CHECK(parameterSize == inputSize,
+                                 "Got parameter (",
+                                 parameter->get_friendly_name(),
+                                 ") of size ",
+                                 parameterSize,
+                                 " bytes, but corresponding input with index ",
+                                 parameterIndex,
+                                 " has ",
+                                 inputSize,
+                                 " bytes");
+                }
+
+                const auto& results = function->get_results();
+                outputs.reserve(results.size());
+                for (size_t i = 0; i < results.size(); ++i)
+                {
+                    outputs.push_back(std::make_shared<HostTensor>());
+                }
+                call(outputs, inputs, function);
+            }
+        }
+    }
+}
--- a/ngraph/core/reference/src/runtime/reference/loop.cpp
+++ b/ngraph/core/reference/src/runtime/reference/loop.cpp
@ -0,0 +1,227 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "runtime/reference/loop.hpp"
+#include "runtime/reference/concat.hpp"
+#include "runtime/reference/function.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            void loop(const std::shared_ptr<Function>& func,
+                      const op::util::OutputDescriptionVector& out_descs,
+                      const op::util::InputDescriptionVector& input_descs,
+                      const opset5::Loop::SpecialBodyPorts& special_ports,
+                      const HostTensorVector& out,
+                      const HostTensorVector& args)
+            {
+                const auto& cur_iter_idx = special_ports.current_iteration_input_idx;
+                auto val =
+                    std::find_if(input_descs.begin(),
+                                 input_descs.end(),
+                                 [&cur_iter_idx](const op::util::InputDescriptionPtr& in_desc) {
+                                     return in_desc->m_body_parameter_index == cur_iter_idx;
+                                 });
+                bool cur_iter_initial_value_exist = val != input_descs.end();
+                bool cur_iter_back_edge_exist = false;
+
+                // If current_iteration_input is exist and initial value is not provided, we
+                // should allocate input_descs.size() + 1 inputs and set default value (0) for
+                // current_iteration input.
+                int64_t inputs_count =
+                    input_descs.size() + (cur_iter_idx >= 0 ? !cur_iter_initial_value_exist : 0);
+                HostTensorVector inputs_to_body;
+                for (int64_t i = 0; i < inputs_count; ++i)
+                    inputs_to_body.push_back(
+                        std::make_shared<HostTensor>(element::dynamic, PartialShape::dynamic()));
+                if (cur_iter_idx >= 0 && !cur_iter_initial_value_exist)
+                {
+                    const auto& cur_iter = func->get_parameters().at(cur_iter_idx);
+                    if (cur_iter->get_partial_shape().is_dynamic())
+                    {
+                        cur_iter->set_partial_shape(Shape{1});
+                        cur_iter->validate_and_infer_types();
+                    }
+
+                    auto init = std::make_shared<opset5::Constant>(
+                        func->get_parameters().at(cur_iter_idx)->get_element_type(),
+                        func->get_parameters().at(cur_iter_idx)->get_shape(),
+                        0);
+                    inputs_to_body.at(cur_iter_idx)->initialize(init);
+                    // reinterpret_cast<int64_t*>(inputs_to_body.at(cur_iter_idx).data())[0] = 0;
+                }
+
+                // Port map processing: inputs and back edges
+                struct BackEdge
+                {
+                    uint64_t param_idx;
+                    uint64_t result_idx;
+                };
+                std::vector<BackEdge> back_edges;
+                for (const auto& desc : input_descs)
+                {
+                    inputs_to_body[desc->m_body_parameter_index] = args[desc->m_input_index];
+                    if (const auto& merged_desc =
+                            std::dynamic_pointer_cast<opset5::Loop::MergedInputDescription>(desc))
+                    {
+                        back_edges.push_back(
+                            {merged_desc->m_body_parameter_index, merged_desc->m_body_value_index});
+                        cur_iter_back_edge_exist |=
+                            merged_desc->m_body_parameter_index == cur_iter_idx;
+                    }
+                }
+
+                // Get TripCount
+                int64_t trip_count = 0;
+                if (args[0]->get_element_type() == ngraph::element::i32)
+                {
+                    auto* trip_count_p = args[0]->get_data_ptr<int32_t>();
+                    trip_count = trip_count_p[0];
+                }
+                else if (args[0]->get_element_type() == ngraph::element::i64)
+                {
+                    auto* trip_count_p = args[0]->get_data_ptr<int64_t>();
+                    trip_count = trip_count_p[0];
+                }
+                else
+                {
+                    NGRAPH_CHECK(
+                        false,
+                        "Unsupported element type for trip_count input. Expected int32 or int64.");
+                }
+                NGRAPH_CHECK(trip_count != 0, "Zero count of iteration not supported");
+
+                // Loop iterations
+                auto exec_condition = args[1]->get_data_ptr<bool>();
+                if (exec_condition[0])
+                {
+                    // Find all ConcatOutputDescription
+                    std::vector<std::shared_ptr<opset5::Loop::ConcatOutputDescription>>
+                        concat_outputs;
+                    for (const auto& desc : out_descs)
+                    {
+                        if (const auto& concat_desc =
+                                std::dynamic_pointer_cast<opset5::Loop::ConcatOutputDescription>(
+                                    desc))
+                        {
+                            concat_outputs.push_back(concat_desc);
+                        }
+                    }
+                    // Allocate vectors for store output values
+                    std::vector<HostTensorVector> values_to_concat(concat_outputs.size());
+                    HostTensorVector body_outputs;
+
+                    // Negative value means infinity count of iterations
+                    trip_count = trip_count >= 0 ? trip_count : std::numeric_limits<int64_t>::max();
+                    for (int64_t cur_iter = 0; cur_iter < trip_count; ++cur_iter)
+                    {
+                        // Evaluate body
+                        body_outputs.clear();
+                        reference::function(func, inputs_to_body, body_outputs);
+
+                        // Store values for later concatenation
+                        for (size_t i = 0; i < values_to_concat.size(); ++i)
+                        {
+                            values_to_concat[i].push_back(
+                                body_outputs[concat_outputs[i]->m_body_value_index]);
+                        }
+
+                        // Check execution condition
+                        bool body_exec_condition;
+                        body_outputs[special_ports.body_condition_output_idx]->read(
+                            &body_exec_condition, sizeof(bool));
+                        if (!body_exec_condition)
+                            break;
+
+                        // If there are no rules for calculating the current iteration, just
+                        // increment it.
+                        if (cur_iter_idx >= 0 && !cur_iter_back_edge_exist)
+                        {
+                            const auto& cur_iter_param = func->get_parameters().at(cur_iter_idx);
+                            int64_t iter_num = cur_iter + 1;
+                            if (cur_iter_param->get_element_type() == element::i64)
+                                inputs_to_body.at(cur_iter_idx)
+                                    ->write(&iter_num, cur_iter_param->get_element_type().size());
+                            else if (cur_iter_param->get_element_type() == element::i32)
+                            {
+                                int32_t iter_num_i32 = static_cast<int32_t>(iter_num);
+                                inputs_to_body.at(cur_iter_idx)
+                                    ->write(&iter_num_i32,
+                                            cur_iter_param->get_element_type().size());
+                            }
+                            else
+                                NGRAPH_CHECK(false,
+                                             "Unsupported element type for current iteration "
+                                             "input. Expected int32 or int64.");
+                        }
+
+                        // Back-edge processing
+                        for (auto& back_edge : back_edges)
+                        {
+                            inputs_to_body[back_edge.param_idx] =
+                                body_outputs[back_edge.result_idx];
+                        }
+                    }
+
+                    for (const auto& desc : out_descs)
+                    {
+                        if (const auto& body_desc =
+                                std::dynamic_pointer_cast<opset5::Loop::BodyOutputDescription>(
+                                    desc))
+                        {
+                            out[body_desc->m_output_index]->write(
+                                body_outputs[body_desc->m_body_value_index]->get_data_ptr(),
+                                body_outputs[body_desc->m_body_value_index]->get_size_in_bytes());
+                        }
+                    }
+
+                    // Concatenate and copy all values stored in values_to_concat vector to outputs
+                    for (size_t i = 0; i < concat_outputs.size(); ++i)
+                    {
+                        const auto& concat_desc = concat_outputs[i];
+                        auto shape =
+                            func->get_results().at(concat_desc->m_body_value_index)->get_shape();
+                        std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);
+                        shape.at(concat_desc->m_axis) = values_to_concat[i].size();
+                        out[concat_desc->m_output_index]->set_shape(shape);
+                        std::vector<const char*> pointers_on_values;
+                        pointers_on_values.reserve(values_to_concat[i].size());
+                        for (const auto& vec : values_to_concat[i])
+                        {
+                            pointers_on_values.push_back(vec->get_data_ptr<char>());
+                        }
+                        reference::concat(
+                            pointers_on_values,
+                            out[concat_desc->m_output_index]->get_data_ptr<char>(),
+                            shapes_to_concat,
+                            shape,
+                            concat_desc->m_axis,
+                            out[concat_desc->m_output_index]->get_element_type().size());
+                    }
+                }
+                else
+                {
+                    NGRAPH_CHECK(
+                        false,
+                        "ExecutionCondition is false. Zero count of iteration not supported.");
+                }
+            }
+        }
+    }
+}
--- a/ngraph/core/reference/src/runtime/reference/tensor_iterator.cpp
+++ b/ngraph/core/reference/src/runtime/reference/tensor_iterator.cpp
@ -0,0 +1,181 @@
+//*****************************************************************************
+// Copyright 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "runtime/reference/tensor_iterator.hpp"
+#include "runtime/reference/concat.hpp"
+#include "runtime/reference/function.hpp"
+#include "runtime/reference/split.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            void tensor_iterator(uint64_t num_iterations,
+                                 const std::shared_ptr<Function>& func,
+                                 const op::util::OutputDescriptionVector& out_descs,
+                                 const op::util::InputDescriptionVector& input_descs,
+                                 const HostTensorVector& out,
+                                 const HostTensorVector& args,
+                                 const custom_evaluate_function& evaluate)
+            {
+                HostTensorVector inputs_to_body;
+                for (int64_t i = 0; i < input_descs.size(); ++i)
+                    inputs_to_body.push_back(
+                        std::make_shared<HostTensor>(element::dynamic, PartialShape::dynamic()));
+
+                // Port map processing: inputs and back edges
+                struct BackEdge
+                {
+                    uint64_t param_idx;
+                    uint64_t result_idx;
+                };
+                std::vector<BackEdge> back_edges;
+                for (const auto& desc : input_descs)
+                {
+                    inputs_to_body[desc->m_body_parameter_index] = args[desc->m_input_index];
+                    if (const auto& merged_desc =
+                            std::dynamic_pointer_cast<opset5::Loop::MergedInputDescription>(desc))
+                    {
+                        back_edges.push_back(
+                            {merged_desc->m_body_parameter_index, merged_desc->m_body_value_index});
+                    }
+                }
+                // Find all ConcatOutputDescription
+                std::vector<std::shared_ptr<opset5::TensorIterator::ConcatOutputDescription>>
+                    concat_outputs;
+                for (const auto& desc : out_descs)
+                {
+                    if (const auto& concat_desc = std::dynamic_pointer_cast<
+                            opset5::TensorIterator::ConcatOutputDescription>(desc))
+                    {
+                        concat_outputs.push_back(concat_desc);
+                    }
+                }
+
+                // Slicing
+                std::vector<std::shared_ptr<opset5::TensorIterator::SliceInputDescription>>
+                    slice_inputs;
+                std::vector<HostTensorVector> sliced_values;
+                int slice_in_idx = 0;
+                for (const auto& desc : input_descs)
+                {
+                    if (const auto& slice_desc = std::dynamic_pointer_cast<
+                            opset5::TensorIterator::SliceInputDescription>(desc))
+                    {
+                        const auto el_size =
+                            args[slice_desc->m_input_index]->get_element_type().size();
+                        slice_inputs.push_back(slice_desc);
+                        auto shape = args[slice_desc->m_input_index]->get_shape();
+                        shape.at(slice_desc->m_axis) = 1;
+                        sliced_values.emplace_back(HostTensorVector());
+                        for (int i = 0; i < num_iterations; ++i)
+                        {
+                            sliced_values.back().emplace_back(std::make_shared<HostTensor>(
+                                args[slice_desc->m_input_index]->get_element_type(), shape));
+                        }
+                        std::vector<char*> pointers_to_data(num_iterations);
+                        for (size_t j = 0; j < pointers_to_data.size(); ++j)
+                        {
+                            pointers_to_data[j] =
+                                sliced_values[slice_in_idx][j]->get_data_ptr<char>();
+                        }
+                        reference::split(args[slice_desc->m_input_index]->get_data_ptr<char>(),
+                                         args[slice_desc->m_input_index]->get_shape(),
+                                         el_size,
+                                         slice_desc->m_axis,
+                                         num_iterations,
+                                         pointers_to_data.data());
+                        slice_in_idx++;
+                    }
+                }
+
+                // Allocate vectors for store output values
+                std::vector<HostTensorVector> values_to_concat(concat_outputs.size());
+                HostTensorVector body_outputs;
+
+                for (int64_t cur_iter = 0; cur_iter < num_iterations; ++cur_iter)
+                {
+                    // Copy new values for sliced inputs
+                    for (size_t i = 0; i < slice_inputs.size(); ++i)
+                    {
+                        inputs_to_body[slice_inputs[i]->m_body_parameter_index] =
+                            sliced_values[i][cur_iter];
+                    }
+
+                    // Evaluate body
+                    if (!evaluate)
+                    {
+                        reference::function(func, inputs_to_body, body_outputs);
+                    }
+                    else
+                    {
+                        evaluate(func, inputs_to_body, body_outputs);
+                    }
+
+                    // Store values for later concatenation
+                    for (size_t i = 0; i < values_to_concat.size(); ++i)
+                    {
+                        values_to_concat[i].push_back(
+                            body_outputs[concat_outputs[i]->m_body_value_index]);
+                    }
+
+                    // Back-edge processing
+                    for (auto& back_edge : back_edges)
+                    {
+                        inputs_to_body[back_edge.param_idx] = body_outputs[back_edge.result_idx];
+                    }
+                }
+
+                for (const auto& desc : out_descs)
+                {
+                    if (const auto& body_desc = std::dynamic_pointer_cast<
+                            opset5::TensorIterator::BodyOutputDescription>(desc))
+                    {
+                        // Copy output values from the last iteration
+                        out[body_desc->m_output_index]->write(
+                            body_outputs[body_desc->m_body_value_index]->get_data_ptr(),
+                            body_outputs[body_desc->m_body_value_index]->get_size_in_bytes());
+                    }
+                }
+
+                // Concatenate and copy all values stored in values_to_concat vector to outputs
+                for (size_t i = 0; i < concat_outputs.size(); ++i)
+                {
+                    const auto& concat_desc = concat_outputs[i];
+                    auto shape =
+                        func->get_results().at(concat_desc->m_body_value_index)->get_shape();
+                    std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);
+                    shape.at(concat_desc->m_axis) = values_to_concat[i].size();
+                    out[concat_desc->m_output_index]->set_shape(shape);
+                    std::vector<const char*> pointers_on_values;
+                    pointers_on_values.reserve(values_to_concat[i].size());
+                    for (const auto& vec : values_to_concat[i])
+                    {
+                        pointers_on_values.push_back(vec->get_data_ptr<char>());
+                    }
+                    reference::concat(pointers_on_values,
+                                      out[concat_desc->m_output_index]->get_data_ptr<char>(),
+                                      shapes_to_concat,
+                                      shape,
+                                      concat_desc->m_axis,
+                                      out[concat_desc->m_output_index]->get_element_type().size());
+                }
+            }
+        }
+    }
+}
--- a/ngraph/core/src/op/loop.cpp
+++ b/ngraph/core/src/op/loop.cpp
@ -15,11 +15,14 @@
 //*****************************************************************************

 #include "ngraph/op/loop.hpp"
+#include "itt.hpp"
 #include "ngraph/factory.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/opsets/opset5.hpp"
 #include "ngraph/specialize_function.hpp"

+#include "ngraph/runtime/reference/loop.hpp"
+
 using namespace std;
 using namespace ngraph;

@ -380,3 +383,11 @@ Output<Node> op::v5::Loop::get_concatenated_slices(const Output<Node>& value,
                 "{-1}");
    return SubGraphOp::get_concatenated_slices(value, start, stride, part_size, end, axis);
 }
+
+bool op::v5::Loop::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const
+{
+    OV_ITT_SCOPED_TASK(itt::domains::nGraphOp, "op::v5::Loop::evaluate");
+    runtime::reference::loop(
+        m_body, m_output_descriptions, m_input_descriptions, m_special_body_ports, outputs, inputs);
+    return true;
+}
--- a/ngraph/frontend/onnx_import/src/op/loop.cpp
+++ b/ngraph/frontend/onnx_import/src/op/loop.cpp
@ -143,7 +143,8 @@ namespace ngraph
                    const auto concat_axis_const =
                        ngraph::op::Constant::create(ngraph::element::i64, {1}, {concat_axis});
                    // provide scalar handing for scan outputs
-                    for (int i = loop_carried_dependencies.size() + 1; i < body_outputs.size(); ++i)
+                    for (size_t i = loop_carried_dependencies.size() + 1; i < body_outputs.size();
+                         ++i)
                    {
                        auto body_output_shape = body_outputs[i].get_partial_shape();
                        if (body_output_shape.is_static() &&
--- a/ngraph/python/tests/init.py
+++ b/ngraph/python/tests/init.py
@ -98,8 +98,6 @@ xfail_issue_36478 = xfail_test(reason="RuntimeError: [NOT_IMPLEMENTED] Input ima
                               "not supported yet...")
 xfail_issue_36480 = xfail_test(reason="RuntimeError: [NOT_FOUND] Unsupported property dummy_option "
                               "by CPU plugin")
-xfail_issue_36483 = xfail_test(reason="RuntimeError: Unsupported primitive of type: "
-                               "Ceiling name: <value>")
 xfail_issue_36485 = xfail_test(reason="RuntimeError: Check 'm_group >= 1' failed at "
                               "/openvino/ngraph/core/src/op/shuffle_channels.cpp:77:")
 xfail_issue_36486 = xfail_test(reason="RuntimeError: HardSigmoid operation should be converted "
--- a/ngraph/python/tests/test_ngraph/test_ops.py
+++ b/ngraph/python/tests/test_ngraph/test_ops.py
@ -20,7 +20,7 @@ import ngraph as ng
 from ngraph.impl import AxisSet, Function, Shape, Type
 from ngraph.impl.op import Constant, Parameter
 from tests.runtime import get_runtime
-from tests import xfail_issue_36483, xfail_issue_34323
+from tests import xfail_issue_34323


 def binary_op(op_str, a, b):
@ -370,7 +370,6 @@ def test_atanh():
    unary_op_exec(op_str, input_list)


-@xfail_issue_36483
 def test_ceiling():
    input_list = [0.5, 0, 0.4, 0.5]
    op_str = "Ceiling"
--- a/ngraph/python/tests/test_ngraph/test_ops_unary.py
+++ b/ngraph/python/tests/test_ngraph/test_ops_unary.py
@ -19,7 +19,7 @@ import pytest
 import ngraph as ng
 from ngraph.impl import Shape, Type
 from tests.test_ngraph.util import run_op_node
-from tests import xfail_issue_35929, xfail_issue_36483
+from tests import xfail_issue_35929


@xfail_issue_35929
@ -67,8 +67,8 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
        pytest.param(ng.acos, np.arccos, np.float32(-0.5)),
        pytest.param(ng.asin, np.arcsin, np.float32(-0.5)),
        pytest.param(ng.atan, np.arctan, np.float32(-0.5)),
-        pytest.param(ng.ceiling, np.ceil, np.float32(1.5), marks=xfail_issue_36483),
-        pytest.param(ng.ceil, np.ceil, np.float32(1.5), marks=xfail_issue_36483),
+        pytest.param(ng.ceiling, np.ceil, np.float32(1.5)),
+        pytest.param(ng.ceil, np.ceil, np.float32(1.5)),
        pytest.param(ng.cos, np.cos, np.float32(np.pi / 4.0)),
        pytest.param(ng.cosh, np.cosh, np.float32(np.pi / 4.0)),
        pytest.param(ng.exp, np.exp, np.float32(1.5)),
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@ -38,7 +38,6 @@ from tests import (BACKEND_NAME,
                   xfail_issue_33616,
                   xfail_issue_38086,
                   xfail_issue_38087,
-                   xfail_issue_36483,
                   xfail_issue_34323,
                   xfail_issue_35915,
                   xfail_issue_34310,
@ -205,9 +204,6 @@ tests_expected_to_fail = [
        "OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
    (xfail_issue_38087,
        "OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"),
-    (xfail_issue_36483,
-        "OnnxBackendNodeModelTest.test_ceil_cpu",
-        "OnnxBackendNodeModelTest.test_ceil_example_cpu"),
    (xfail_issue_34323,
        "OnnxBackendNodeModelTest.test_constant_cpu",
        "OnnxBackendNodeModelTest.test_eyelike_populate_off_main_diagonal_cpu",
--- a/ngraph/test/runtime/interpreter/int_backend.cpp
+++ b/ngraph/test/runtime/interpreter/int_backend.cpp
@ -53,6 +53,13 @@ shared_ptr<runtime::Tensor>
    return make_shared<runtime::HostTensor>(type, shape);
 }

+shared_ptr<runtime::Tensor>
+    runtime::interpreter::INTBackend::create_dynamic_tensor(const element::Type& type,
+                                                            const PartialShape& pshape)
+{
+    return make_shared<runtime::HostTensor>(type, pshape);
+}
+
 shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
    const element::Type& type, const Shape& shape, void* memory_pointer)
 {
--- a/ngraph/test/runtime/interpreter/int_backend.hpp
+++ b/ngraph/test/runtime/interpreter/int_backend.hpp
@ -56,6 +56,8 @@ public:
        create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override;

    std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
+    std::shared_ptr<Tensor> create_dynamic_tensor(const element::Type& type,
+                                                  const PartialShape& shape) override;

    std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
                                        bool enable_performance_data = false) override;
--- a/ngraph/test/runtime/interpreter/int_executable.cpp
+++ b/ngraph/test/runtime/interpreter/int_executable.cpp
@ -284,7 +284,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::

    // convert inputs to HostTensor
    vector<shared_ptr<HostTensor>> func_inputs;
-    for (auto tensor : inputs)
+    for (const auto& tensor : inputs)
    {
        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
        func_inputs.push_back(host_tensor);
@ -296,7 +296,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::

    // convert outputs to HostTensor
    vector<shared_ptr<HostTensor>> func_outputs;
-    for (auto tensor : outputs)
+    for (const auto& tensor : outputs)
    {
        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
        func_outputs.push_back(host_tensor);
@ -305,7 +305,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    // map function params -> HostTensor
    unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
    size_t input_count = 0;
-    for (auto param : get_parameters())
+    for (const auto& param : get_parameters())
    {
        for (size_t i = 0; i < param->get_output_size(); ++i)
        {
@ -327,7 +327,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    }

    // for each ordered op in the graph
-    for (auto op : m_nodes)
+    for (const auto& op : m_nodes)
    {
        event::Duration d2(op->description(), "Interpreter");
        if (op::is_parameter(op))
@ -387,7 +387,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
        }
        if (!op->evaluate(op_outputs, op_inputs))
        {
-            generate_calls(type, *op.get(), op_outputs, op_inputs);
+            generate_calls(type, *op, op_outputs, op_inputs);
        }
        if (m_performance_counters_enabled)
        {
--- a/ngraph/test/runtime/interpreter/int_executable.hpp
+++ b/ngraph/test/runtime/interpreter/int_executable.hpp
@ -98,6 +98,7 @@
 #include "ngraph/runtime/reference/sum.hpp"
 #include "ngraph/runtime/reference/tan.hpp"
 #include "ngraph/runtime/reference/tanh.hpp"
+#include "ngraph/runtime/reference/tensor_iterator.hpp"
 #include "ngraph/runtime/reference/topk.hpp"
 #include "ngraph/runtime/tensor.hpp"
 #include "op/avg_pool.hpp"
@ -1235,6 +1236,81 @@ protected:
                args[0]->get_data_ptr<const T>(), out[0]->get_data_ptr<T>(), element_count);
            break;
        }
+        case OP_TYPEID::TensorIterator:
+        {
+            auto ti = dynamic_cast<const op::v0::TensorIterator&>(node);
+
+            reference::custom_evaluate_function evaluate =
+                [](const std::shared_ptr<ngraph::Function>& function,
+                   const HostTensorVector& inputs,
+                   HostTensorVector& outputs) -> void {
+                const auto& parameters = function->get_parameters();
+                const auto& parametersNumber = parameters.size();
+                const auto& inputsNumber = inputs.size();
+                NGRAPH_CHECK(parametersNumber == inputsNumber,
+                             "Got function (",
+                             function->get_friendly_name(),
+                             ") with ",
+                             parametersNumber,
+                             " parameters, but ",
+                             inputsNumber,
+                             " input blobs");
+
+                auto inputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
+                for (const auto& parameter : parameters)
+                {
+                    const auto& parameterIndex = function->get_parameter_index(parameter);
+                    const auto& parameterShape = parameter->get_shape();
+                    const auto& parameterType = parameter->get_element_type();
+                    const auto& parameterSize = shape_size(parameterShape) * parameterType.size();
+
+                    const auto& input = inputs[parameterIndex];
+                    const auto& inputSize = input->get_size_in_bytes();
+                    NGRAPH_CHECK(parameterSize == inputSize,
+                                 "Got parameter (",
+                                 parameter->get_friendly_name(),
+                                 ") of size ",
+                                 parameterSize,
+                                 " bytes, but corresponding input with index ",
+                                 parameterIndex,
+                                 " has ",
+                                 inputSize,
+                                 " bytes");
+
+                    auto tensor =
+                        std::make_shared<runtime::HostTensor>(parameterType, parameterShape);
+                    tensor->write(input->get_data_ptr(), parameterSize);
+                    inputTensors.push_back(tensor);
+                }
+
+                const auto& results = function->get_results();
+                std::vector<std::shared_ptr<ngraph::runtime::Tensor>> outputTensors;
+                outputTensors.reserve(results.size());
+                for (size_t i = 0; i < results.size(); ++i)
+                {
+                    outputTensors.push_back(std::make_shared<HostTensor>());
+                }
+                runtime::Backend::set_backend_shared_library_search_directory("");
+                auto backend = runtime::Backend::create("INTERPRETER");
+                auto handle = backend->compile(function);
+                handle->call_with_validate(outputTensors, inputTensors);
+
+                outputs.reserve(outputTensors.size());
+                for (const auto& tensor : outputTensors)
+                {
+                    auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
+                    outputs.push_back(host_tensor);
+                }
+            };
+            reference::tensor_iterator(ti.get_num_iterations(),
+                                       ti.get_function(),
+                                       ti.get_output_descriptions(),
+                                       ti.get_input_descriptions(),
+                                       out,
+                                       args,
+                                       evaluate);
+            break;
+        }
        case OP_TYPEID::DetectionOutput_v0:
        {
            const op::DetectionOutput* detOut = static_cast<const op::DetectionOutput*>(&node);
@ -1378,7 +1454,6 @@ protected:
        case OP_TYPEID::ShuffleChannels:
        case OP_TYPEID::SpaceToDepth:
        case OP_TYPEID::SquaredDifference:
-        case OP_TYPEID::TensorIterator:
        case OP_TYPEID::Tile:
        case OP_TYPEID::UnknownOp:
            throw unsupported_op("Unsupported op '" + node.description() + "'");
@ -1397,6 +1472,7 @@ protected:
        case OP_TYPEID::LogicalAnd_v1:
        case OP_TYPEID::LogicalOr_v1:
        case OP_TYPEID::LogicalXor_v1:
+        case OP_TYPEID::Loop_v5:
        case OP_TYPEID::MatMul:
        case OP_TYPEID::Maximum:
        case OP_TYPEID::Minimum:
--- a/ngraph/test/runtime/interpreter/opset_int_tbl.hpp
+++ b/ngraph/test/runtime/interpreter/opset_int_tbl.hpp
@ -59,11 +59,12 @@ NGRAPH_OP(LSTMCell, op::v4)

 #define ID_SUFFIX(NAME) NAME##_v5
 NGRAPH_OP(GatherND, op::v5)
-NGRAPH_OP(LSTMSequence, op::v5)
 NGRAPH_OP(GRUSequence, op::v5)
-NGRAPH_OP(RNNSequence, op::v5)
 NGRAPH_OP(BatchNormInference, op::v5)
-NGRAPH_OP(Round, op::v5)
 NGRAPH_OP(LogSoftmax, op::v5)
+NGRAPH_OP(Loop, op::v5)
+NGRAPH_OP(LSTMSequence, op::v5)
 NGRAPH_OP(NonMaxSuppression, op::v5)
+NGRAPH_OP(RNNSequence, op::v5)
+NGRAPH_OP(Round, op::v5)
 #undef ID_SUFFIX
--- a/ngraph/test/runtime/interpreter/unit_test.manifest
+++ b/ngraph/test/runtime/interpreter/unit_test.manifest
@ -133,9 +133,6 @@ onnx_controlflow_loop_2d_no_identity_termination_cond
 onnx_controlflow_loop_2d_const_no_identity_termination_cond
 onnx_controlflow_loop_2d_both_cond_and_trip_count_as_inputs

-#dynamic trip count
-onnx_controlflow_loop_2d_trip_count_dynamic
-
 # Input body shape is changed during Loop iterations
 # Exception is throw during Loop shape inference
 # Is it expected?
@ -144,13 +141,7 @@ onnx_controlflow_loop_concat_values
 # Infinitive Loop is not supported
 onnx_controlflow_loop_infinite

-# Loop is not supported yet by INTERPRETER backend
-onnx_controlflow_loop_2d_add
-onnx_controlflow_loop_2d_no_identity_termination_cond_false
-onnx_controlflow_loop_add_initializer_from_parent_scope
-onnx_controlflow_loop_add_node_from_parent_scope
-onnx_controlflow_loop_add_value_the_same_node_from_parent_and_subgraph
-onnx_controlflow_loop_scalars
-onnx_controlflow_loop_2d_add_const_cond
+# Dynamic shape support?
+onnx_controlflow_loop_2d_trip_count_dynamic
 onnx_controlflow_loop_no_variadic_inputs_and_outputs
-onnx_controlflow_loop_power
+onnx_controlflow_loop_power