Reference implementations for Loop and TensorIterator ops (#2978)

* Loop op ngraph implementation, update IE IR Reader and ngraph to cnn converter * refactoring SubGraphOp class * type prop unit tests * ngraph code style * update comment * single layer tests for Loop operation * fix file name * Add SpecialBodyPorts attribute in Loop op, update single layer tests * first debug version * more tests * missing test file * removed not needed shapes from test data * move test data to new folder * shape infer tests * Added execution tests * add several new tests cases, strict checks in Loop impl, temporary disable single layer tests * ngraph codestyle, refactoring, clone_new_args test * resolve review remarks * fix build * fix tests * more execution tests * add a new constructor of Loop op, resolve review remarks * execution tests * synchro with current version * handle scalars and more tests * scalar test enabled * loop reference impl * bug fixes in tests, onnx importer part and in the ref implementation of the Loop op * applied remarks * handle unsupported cases * rewrite unit tests * update INTERPRETER manifest * is_termination_condition_always_true simplification * [TEST] update python models tests * review remarks * added xfail to tiny_yolov3 * missing model test * revert test data * fixed numbers of failing tests * fixed failed test description * fix test message * fix xfail test * reference implementation for ngraph::function * update loop reference implementation * Refactor loop reference implementation * ngraph codestyle * Refactoring * Submodule update * Skip check for Reduce ops in mkl for scalar cases, support for yolov3 * fix ngraph reader tests * revert ceiling op, renaming * Add allias(Ceiling) for Ceil op in mkl * delete xfails * fix build * single layer tests for tensor iterarator * Refactor TensorIterator and Loop ref impls * revert dynamic tensor creation, disable some dynamic test cases * fix warning * Resolve review remarks * revert Predefined values in Loop tests Co-authored-by: Mateusz Bencer <mateusz.bencer@intel.com>
2020-11-10 15:49:59 +03:00 · 2020-11-10 15:49:59 +03:00 · c309bb77d2
commit c309bb77d2
parent b6e2cd692b
49 changed files with 1199 additions and 65 deletions
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@ -20,6 +20,7 @@ MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
 MKLDNN_EXTENSION_NODE(MathImpl, Atan);
 MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
 MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
 MKLDNN_EXTENSION_NODE(MathImpl, Ceiling);
 MKLDNN_EXTENSION_NODE(MathImpl, Cos);
 MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
 MKLDNN_EXTENSION_NODE(MathImpl, Erf);
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@ -68,6 +68,7 @@ public:
            else if (math_func == "Atan") mathFunction = Math::Atan;
            else if (math_func == "Atanh") mathFunction = Math::Atanh;
            else if (math_func == "Ceil") mathFunction = Math::Ceil;
            else if (math_func == "Ceiling") mathFunction = Math::Ceil;
            else if (math_func == "Cos") mathFunction = Math::Cos;
            else if (math_func == "Cosh") mathFunction = Math::Cosh;
            else if (math_func == "Floor") mathFunction = Math::Floor;
@ -276,6 +277,7 @@ REG_FACTORY_FOR(MathImpl, Asinh);
 REG_FACTORY_FOR(MathImpl, Atan);
 REG_FACTORY_FOR(MathImpl, Atanh);
 REG_FACTORY_FOR(MathImpl, Ceil);
 REG_FACTORY_FOR(MathImpl, Ceiling);
 REG_FACTORY_FOR(MathImpl, Cos);
 REG_FACTORY_FOR(MathImpl, Cosh);
 REG_FACTORY_FOR(MathImpl, Erf);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
@ -1264,7 +1264,10 @@ void MKLDNNReduceNode::getSupportedDescriptors() {
        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims())
            THROW_IE_EXCEPTION << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
    } else {
-        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims())
+        // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d.
        // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases.
        bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1;
        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d)
            THROW_IE_EXCEPTION << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
    }
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;
 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;
 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
@ -27,6 +27,12 @@ const std::vector<std::vector<size_t>> inputShapes = {
        std::vector<size_t>{3, 5, 7, 9},
 };
 const std::vector<std::vector<size_t>> inputShapesOneAxis = {
        std::vector<size_t>{10, 20, 30, 40},
        std::vector<size_t>{3, 5, 7, 9},
        std::vector<size_t>{10},
 };
 const std::vector<std::vector<int>> axes = {
        {0},
        {1},
@ -71,7 +77,7 @@ const auto paramsOneAxis = testing::Combine(
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Layout::ANY),
-        testing::ValuesIn(inputShapes),
+        testing::ValuesIn(inputShapesOneAxis),
        testing::Values(CommonTestUtils::DEVICE_CPU)
 );
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;
 namespace {
-    // without clip values increase rapidly, so use only seq_lenghts = 2
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tensor_iterator.cpp
@ -0,0 +1,58 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <ngraph/op/util/attr_types.hpp>
 #include "single_layer_tests/tensor_iterator.hpp"
 #include "common_test_utils/test_constants.hpp"
 using namespace LayerTestsDefinitions;
 namespace {
    // output values increase rapidly without clip, so use only seq_lenghts = 2
    std::vector<bool> should_decompose = {true, false};
    std::vector<size_t> seq_lengths_zero_clip{2};
    std::vector<size_t> seq_lengths_clip_non_zero{20};
    std::vector<size_t> batch{1, 10};
    std::vector<size_t> hidden_size{1, 10};
    std::vector<size_t> input_size{10};
    std::vector<ngraph::helpers::TensorIteratorBody> body_type
        = {ngraph::helpers::TensorIteratorBody::LSTM, ngraph::helpers::TensorIteratorBody::RNN,
           ngraph::helpers::TensorIteratorBody::GRU};
    std::vector<float> clip{0.f};
    std::vector<float> clip_non_zeros{0.7f};
    std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
                                                           ngraph::op::RecurrentSequenceDirection::REVERSE};
    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
                                                             InferenceEngine::Precision::FP16};
    INSTANTIATE_TEST_CASE_P(smoke_TensorIteratorCommon, TensorIteratorTest,
                            ::testing::Combine(
                                    ::testing::ValuesIn(should_decompose),
                                    ::testing::ValuesIn(seq_lengths_zero_clip),
                                    ::testing::ValuesIn(batch),
                                    ::testing::ValuesIn(hidden_size),
                                    ::testing::ValuesIn(input_size),
                                    ::testing::ValuesIn(clip),
                                    ::testing::ValuesIn(body_type),
                                    ::testing::ValuesIn(direction),
                                    ::testing::ValuesIn(netPrecisions),
                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                            TensorIteratorTest::getTestCaseName);
    INSTANTIATE_TEST_CASE_P(smoke_TensorIteratorCommonClip, TensorIteratorTest,
                            ::testing::Combine(
                                    ::testing::ValuesIn(should_decompose),
                                    ::testing::ValuesIn(seq_lengths_clip_non_zero),
                                    ::testing::ValuesIn(batch),
                                    ::testing::ValuesIn(hidden_size),
                                    ::testing::ValuesIn(input_size),
                                    ::testing::ValuesIn(clip_non_zeros),
                                    ::testing::ValuesIn(body_type),
                                    ::testing::ValuesIn(direction),
                                    ::testing::ValuesIn(netPrecisions),
                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                            TensorIteratorTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gru_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
@ -61,7 +61,7 @@ class StaticShapeLoopTest : public testing::WithParamInterface<StaticShapeLoopPa
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj);
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
-    std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
+    std::vector<std::vector<std::uint8_t>> PredefinedRefs();
 private:
    bool static_iter_num;       // trip count provided by constant node
@ -100,7 +100,7 @@ protected:
        return LayerTestsCommon::GenerateInput(info);
    }
-    std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
+    std::vector<std::vector<std::uint8_t>> PredefinedRefs() {
        if (outputGens.empty())
            return LayerTestsCommon::CalculateRefs();
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/lstm_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_cell.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_cell.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_sequence.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/rnn_sequence.hpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/tensor_iterator.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/tensor_iterator.hpp
@ -0,0 +1,39 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <tuple>
 #include <string>
 #include <vector>
 #include <memory>
 #include <ngraph/op/util/attr_types.hpp>
 #include "functional_test_utils/layer_test_utils.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 namespace LayerTestsDefinitions {
 using TensorIteratorParams = typename std::tuple<
        bool,                                     // using unroll tensor iterator transformation
        size_t,                                   // seq_lengths
        size_t,                                   // batch
        size_t,                                   // hidden size
        size_t,                                   // input size
        float,                                    // clip
        ngraph::helpers::TensorIteratorBody,      // body type
        ngraph::op::RecurrentSequenceDirection,   // direction
        InferenceEngine::Precision,               // Network precision
        std::string>;                             // Device name
 class TensorIteratorTest : public testing::WithParamInterface<TensorIteratorParams>,
                     virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<TensorIteratorParams> &obj);
 protected:
    void SetUp() override;
 };
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -36,6 +36,10 @@ std::string GRUCellTest::getTestCaseName(const testing::TestParamInfo<GRUCellPar
    std::string targetDevice;
    std::tie(should_decompose, batch, hidden_size, input_size, activations, clip,
            linear_before_reset, netPrecision, targetDevice) = obj.param;
    inputShapes = {
            {{batch, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
                    {3 * hidden_size, hidden_size}, {(linear_before_reset? 4 : 3) * hidden_size}},
    };
    std::ostringstream result;
    result << "decomposition" << should_decompose << "_";
    result << "batch=" << batch << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/gru_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -42,7 +42,7 @@ namespace LayerTestsDefinitions {
                        {3 * hidden_size, hidden_size}, {(linear_before_reset ? 4 : 3) * hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
@ -53,7 +53,6 @@ namespace LayerTestsDefinitions {
    void LoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        SetRefMode(LayerTestsUtils::IE);
        bool execute_first_iteration;
        bool is_body_condition_const;
        bool body_condition; // works only if is_body_condition_const ==
@ -161,8 +160,6 @@ namespace LayerTestsDefinitions {
    void StaticShapeLoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        SetRefMode(LayerTestsUtils::IE);
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
        std::tie(
            static_continue_cond,
@ -261,7 +258,7 @@ namespace LayerTestsDefinitions {
    }
    // Predefined ref output
-    std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::CalculateRefs() {
+    std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::PredefinedRefs() {
        bool auto_concat_out = (axis != -1);
        const auto n_iter = actual_n_iter();
@ -293,6 +290,23 @@ namespace LayerTestsDefinitions {
        Run();
    }
    TEST_P(StaticShapeLoopTest, CompareWithPredefinedRefs) {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        LoadNetwork();
        Infer();
        auto expectedOutputs = PredefinedRefs(); // use predefined refs instead of CalculateRefs function
        const auto& actualOutputs = GetOutputs();
        if (expectedOutputs.empty()) {
            return;
        }
        IE_ASSERT(actualOutputs.size() == expectedOutputs.size())
        << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
        Compare(expectedOutputs, actualOutputs);
    }
    TEST_P(TrivialLoopTest, PassThroughBody) {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        InferenceEngine::Precision iePrc;
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/lstm_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -41,7 +41,7 @@ namespace LayerTestsDefinitions {
                        {4 * hidden_size, hidden_size}, {4 * hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_cell.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_cell.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/rnn_sequence.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
@ -41,7 +41,7 @@ namespace LayerTestsDefinitions {
                        {hidden_size, hidden_size}, {hidden_size}},
        };
        std::ostringstream result;
-        result << "seq_lenghts" << seq_lenghts << "_";
+        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/tensor_iterator.cpp
@ -0,0 +1,226 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <tuple>
 #include <string>
 #include <vector>
 #include <memory>
 #include <functional>
 #include "ie_core.hpp"
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/blob_utils.hpp"
 #include "functional_test_utils/precision_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 #include "single_layer_tests/tensor_iterator.hpp"
 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
 namespace LayerTestsDefinitions {
    std::string TensorIteratorTest::getTestCaseName(const testing::TestParamInfo<TensorIteratorParams> &obj) {
        bool should_decompose;
        size_t seq_lenghts;
        size_t batch;
        size_t hidden_size;
        size_t input_size;
        ngraph::helpers::TensorIteratorBody ti_body;
        float clip;
        ngraph::op::RecurrentSequenceDirection direction;
        InferenceEngine::Precision netPrecision;
        std::string targetDevice;
        std::tie(should_decompose, seq_lenghts, batch, hidden_size, input_size, clip, ti_body, direction, netPrecision,
                 targetDevice) = obj.param;
        std::vector<std::vector<size_t>> inputShapes = {};
        switch (ti_body) {
            case ngraph::helpers::TensorIteratorBody::LSTM:
                inputShapes = {
                        {{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size},
                                {4 * hidden_size, hidden_size}, {4 * hidden_size}},
                };
                break;
            case ngraph::helpers::TensorIteratorBody::GRU:
                inputShapes = {
                        {{batch, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
                                {3 * hidden_size, hidden_size}, {3 * hidden_size}},
                };
                break;
            case ngraph::helpers::TensorIteratorBody::RNN:
                inputShapes = {{batch, input_size}, {batch, hidden_size},
                               {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}};
                break;
        }
        std::ostringstream result;
        result << "unrolling=" << should_decompose << "_";
        result << "seq_lenghts=" << seq_lenghts << "_";
        result << "batch=" << batch << "_";
        result << "hidden_size=" << hidden_size << "_";
        result << "input_size=" << input_size << "_";
        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
        result << "TensorIteratorBody=" << ti_body << "_";
        result << "direction=" << direction << "_";
        result << "clip=" << clip << "_";
        result << "netPRC=" << netPrecision.name() << "_";
        result << "targetDevice=" << targetDevice << "_";
        return result.str();
    }
    void TensorIteratorTest::SetUp() {
        size_t seq_lenghts;
        bool should_decompose;
        size_t batch;
        size_t hidden_size;
        size_t input_size;
        ngraph::helpers::TensorIteratorBody ti_body;
        float clip;
        ngraph::op::RecurrentSequenceDirection direction;
        InferenceEngine::Precision netPrecision;
        std::tie(should_decompose, seq_lenghts, batch, hidden_size, input_size, clip, ti_body, direction, netPrecision,
                 targetDevice) = this->GetParam();
        std::vector<std::vector<size_t>> inputShapes;
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
        // Each case consist of 3 steps:
        // 1. Create TensorIterator body.
        // 2. Set PortMap
        // 3. Create outer function
        auto axis = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{1});
        switch (ti_body) {
            case ngraph::helpers::TensorIteratorBody::LSTM: {
                inputShapes = {
                        {{batch, seq_lenghts, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size},
                                {4 * hidden_size, hidden_size}, {4 * hidden_size}},
                };
                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
                // 1. Create TensorIterator body.
                inputShapes[0][1] = 1; // sliced dimension
                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
                std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5]};
                ngraph::OutputVector out_vector = {squeeze, body_params[1], body_params[2]};
                auto lstm_cell = ngraph::builder::makeLSTM(out_vector, WRB, hidden_size, {"sigmoid", "tanh", "tanh"}, {}, {}, clip);
                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_cell->output(0), axis);
                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(unsqueeze),
                                             std::make_shared<ngraph::opset1::Result>(lstm_cell->output(0)),
                                             std::make_shared<ngraph::opset1::Result>(lstm_cell->output(1))};
                auto body = std::make_shared<ngraph::Function>(results, body_params, "lstm_cell");
                tensor_iterator->set_function(body);
                // 2. Set PortMap
                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
                    tensor_iterator->get_concatenated_slices(results[0], 0, 1, 1, -1, 1);
                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
                    tensor_iterator->get_concatenated_slices(results[0], -1, -1, 1, 0, 1);
                } else {
                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
                }
                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
                tensor_iterator->set_invariant_input(body_params[2], outer_params[2]);
                tensor_iterator->get_iter_value(results[1]);
                tensor_iterator->get_iter_value(results[2]);
                // 3. Outer function
                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1),
                                                                                   tensor_iterator->output(2)}, outer_params);
                break;
            }
            case ngraph::helpers::TensorIteratorBody::GRU: {
                inputShapes = {
                        {{batch, seq_lenghts, input_size}, {batch, hidden_size}, {3 * hidden_size, input_size},
                                {3 * hidden_size, hidden_size}, {3 * hidden_size}},
                };
                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
                // 1. Create TensorIterator body.
                inputShapes[0][1] = 1; // sliced dimension
                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
                std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
                ngraph::OutputVector out_vector = {squeeze, body_params[1]};
                auto gru_cell = ngraph::builder::makeGRU(out_vector, WRB, hidden_size, {"sigmoid", "tanh"},
                                                         {}, {}, clip, false);
                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(gru_cell->output(0), axis);
                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_cell->output(0)),
                                             std::make_shared<ngraph::opset1::Result>(unsqueeze)};
                auto body = std::make_shared<ngraph::Function>(results, body_params, "gru_cell");
                tensor_iterator->set_function(body);
                // 2. Set PortMap
                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
                    tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 1);
                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
                    tensor_iterator->get_concatenated_slices(results[1], -1, -1, 1, 0, 1);
                } else {
                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
                }
                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
                tensor_iterator->get_iter_value(results[0]);
                // 3. Outer function
                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1)}, outer_params);
                break;
            }
            case ngraph::helpers::TensorIteratorBody::RNN: {
                inputShapes = {{batch, seq_lenghts, input_size},
                               {batch,       hidden_size},
                               {hidden_size, input_size},
                               {hidden_size, hidden_size},
                               {hidden_size}};
                auto outer_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
                // 1. Create TensorIterator body.
                inputShapes[0][1] = 1; // sliced dimension
                auto body_params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
                std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
                auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(body_params[0], axis);
                ngraph::OutputVector out_vector = {squeeze, body_params[1]};
                auto rnn_cell = ngraph::builder::makeRNN(out_vector, WRB, hidden_size, {"tanh"}, {}, {}, clip);
                auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(rnn_cell->output(0), axis);
                ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_cell),
                                             std::make_shared<ngraph::opset1::Result>(unsqueeze)};
                auto body = std::make_shared<ngraph::Function>(results, body_params, "rnn_cell");
                tensor_iterator->set_function(body);
                // 2. Set PortMap
                if (direction == ngraph::op::RecurrentSequenceDirection::FORWARD) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], 0, 1, 1, -1, 1);
                    tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 1);
                } else if (direction == ngraph::op::RecurrentSequenceDirection::REVERSE) {
                    tensor_iterator->set_sliced_input(body_params[0], outer_params[0], -1, -1, 1, 0, 1);
                    tensor_iterator->get_concatenated_slices(results[1], -1, -1, 1, 0, 1);
                } else {
                    NGRAPH_CHECK(false, "Bidirectional case is not supported.");
                }
                tensor_iterator->set_invariant_input(body_params[1], outer_params[1]);
                tensor_iterator->get_iter_value(results[0]);
                // 3. Outer function
                function = std::make_shared<ngraph::Function>(ngraph::OutputVector{tensor_iterator->output(0), tensor_iterator->output(1)}, outer_params);
                break;
            }
        }
        if (should_decompose) {
            ngraph::pass::Manager m;
            m.register_pass<ngraph::pass::UnrollTensorIterator>();
            m.run_passes(function);
        }
    }
    TEST_P(TensorIteratorTest, CompareWithRefs) {
        Run();
    };
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@ -182,6 +182,13 @@ enum class PadMode {
    SYMMETRIC,
 };
 enum class TensorIteratorBody {
    RNN,
    GRU,
    LSTM,
    // CNN todo: implement
 };
 std::ostream &operator<<(std::ostream &os, const ReductionType &m);
 std::ostream &operator<<(std::ostream &os, const PadMode &m);
@ -258,5 +265,7 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::Nearest
 std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCalcMode type);
 std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
 }  // namespace helpers
 }  // namespace ngraph
--- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
@ -729,5 +729,21 @@ std::ostream& operator<<(std::ostream & os, ngraph::op::v4::Interpolate::ShapeCa
    return os;
 }
 std::ostream& operator<<(std::ostream & os, TensorIteratorBody type) {
    switch (type) {
        case TensorIteratorBody::LSTM:
            os << "LSTM";
            break;
        case TensorIteratorBody::RNN:
            os << "RNN";
            break;
        case TensorIteratorBody::GRU:
            os << "GRU";
            break;
        default:
            throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
    }
    return os;
 }
 }  // namespace helpers
 }  // namespace ngraph
--- a/ngraph/core/include/ngraph/op/loop.hpp
+++ b/ngraph/core/include/ngraph/op/loop.hpp
@ -96,6 +96,9 @@ namespace ngraph
                std::shared_ptr<Node>
                    clone_with_new_inputs(const OutputVector& new_args) const override;
                bool evaluate(const HostTensorVector& outputs,
                              const HostTensorVector& inputs) const override;
            private:
                SpecialBodyPorts m_special_body_ports;
                int64_t m_num_iterations = -1; // -1 means infinity
--- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp
@ -341,6 +341,10 @@ namespace ngraph
                std::vector<std::shared_ptr<op::util::SubGraphOp::OutputDescription>>
                    m_output_descriptions;
            };
            using InputDescriptionPtr = std::shared_ptr<util::SubGraphOp::InputDescription>;
            using OutputDescriptionPtr = std::shared_ptr<util::SubGraphOp::OutputDescription>;
            using InputDescriptionVector = std::vector<InputDescriptionPtr>;
            using OutputDescriptionVector = std::vector<OutputDescriptionPtr>;
        }
    }
    template class NGRAPH_API FactoryRegistry<op::util::SubGraphOp::InputDescription>;
--- a/ngraph/core/reference/include/ngraph/runtime/reference/function.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/function.hpp
@ -0,0 +1,35 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
 #pragma once
 #include <cmath>
 #include <cstddef>
 #include <vector>
 #include "ngraph/function.hpp"
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            void function(const std::shared_ptr<Function>& function,
                          const HostTensorVector& inputs,
                          HostTensorVector& outputs);
        }
    }
 }
--- a/ngraph/core/reference/include/ngraph/runtime/reference/loop.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/loop.hpp
@ -0,0 +1,36 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and`
 // limitations under the License.
 //*****************************************************************************
 #pragma once
 #include <cmath>
 #include <ngraph/opsets/opset5.hpp>
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            void loop(const std::shared_ptr<Function>& body,
                      const op::util::OutputDescriptionVector& out_descs,
                      const op::util::InputDescriptionVector& input_descs,
                      const opset5::Loop::SpecialBodyPorts& special_ports,
                      const HostTensorVector& out,
                      const HostTensorVector& args);
        }
    }
 }
--- a/ngraph/core/reference/include/ngraph/runtime/reference/tensor_iterator.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/tensor_iterator.hpp
@ -0,0 +1,41 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and`
 // limitations under the License.
 //*****************************************************************************
 #pragma once
 #include <cmath>
 #include <ngraph/opsets/opset5.hpp>
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            using custom_evaluate_function =
                std::function<void(const std::shared_ptr<ngraph::Function>& function,
                                   const HostTensorVector& inputs,
                                   HostTensorVector& outputs)>;
            void tensor_iterator(uint64_t num_iterations,
                                 const std::shared_ptr<Function>& body,
                                 const op::util::OutputDescriptionVector& out_descs,
                                 const op::util::InputDescriptionVector& input_descs,
                                 const HostTensorVector& out,
                                 const HostTensorVector& args,
                                 const custom_evaluate_function& evaluate = nullptr);
        }
    }
 }
--- a/ngraph/core/reference/src/runtime/reference/function.cpp
+++ b/ngraph/core/reference/src/runtime/reference/function.cpp
@ -0,0 +1,147 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
 #include <cstring>
 #include "ngraph/opsets/opset5.hpp"
 #include "ngraph/runtime/reference/function.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/runtime/reference/concat.hpp"
 #include "ngraph/runtime/tensor.hpp"
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            static bool call(const HostTensorVector& func_outputs,
                             const HostTensorVector& func_inputs,
                             const std::shared_ptr<ngraph::Function>& function)
            {
                // map function params -> HostTensor
                std::unordered_map<descriptor::Tensor*, std::shared_ptr<HostTensor>> tensor_map;
                size_t input_count = 0;
                for (const auto& param : function->get_parameters())
                {
                    for (size_t i = 0; i < param->get_output_size(); ++i)
                    {
                        descriptor::Tensor* tensor = &param->output(i).get_tensor();
                        tensor_map.insert({tensor, func_inputs[input_count++]});
                    }
                }
                // map function outputs -> HostTensor
                for (size_t output_count = 0; output_count < function->get_results().size();
                     ++output_count)
                {
                    auto output = function->get_results()[output_count];
                    descriptor::Tensor* tensor = &output->get_output_tensor(0);
                    tensor_map.insert({tensor, func_outputs[output_count]});
                }
                // for each ordered op in the graph
                for (const auto& op : function->get_ordered_ops())
                {
                    if (op::is_parameter(op))
                    {
                        continue;
                    }
                    // get op inputs from map
                    std::vector<std::shared_ptr<HostTensor>> op_inputs;
                    for (auto input : op->inputs())
                    {
                        descriptor::Tensor* tensor = &input.get_tensor();
                        op_inputs.push_back(tensor_map.at(tensor));
                    }
                    // get op outputs from map or create
                    std::vector<std::shared_ptr<HostTensor>> op_outputs;
                    for (size_t i = 0; i < op->get_output_size(); ++i)
                    {
                        descriptor::Tensor* tensor = &op->output(i).get_tensor();
                        std::shared_ptr<HostTensor> host_tensor;
                        auto it = tensor_map.find(tensor);
                        if (it == tensor_map.end())
                        {
                            host_tensor = std::make_shared<HostTensor>(op->output(i));
                            tensor_map.insert({tensor, host_tensor});
                        }
                        else
                        {
                            host_tensor = it->second;
                        }
                        op_outputs.push_back(host_tensor);
                    }
                    op->validate_and_infer_types();
                    if (!op->evaluate(op_outputs, op_inputs))
                    {
                        throw ngraph_error("Evaluate function is not implemented.");
                    }
                }
                return true;
            }
            void function(const std::shared_ptr<ngraph::Function>& function,
                          const HostTensorVector& inputs,
                          HostTensorVector& outputs)
            {
                const auto& parameters = function->get_parameters();
                const auto& parametersNumber = parameters.size();
                const auto& inputsNumber = inputs.size();
                NGRAPH_CHECK(parametersNumber == inputsNumber,
                             "Got function (",
                             function->get_friendly_name(),
                             ") with ",
                             parametersNumber,
                             " parameters, but ",
                             inputsNumber,
                             " input blobs");
                for (const auto& parameter : parameters)
                {
                    const auto& parameterIndex = function->get_parameter_index(parameter);
                    const auto& parameterShape = parameter->get_shape();
                    const auto& parameterType = parameter->get_element_type();
                    const auto& parameterSize = shape_size(parameterShape) * parameterType.size();
                    const auto& input = inputs[parameterIndex];
                    const auto& inputSize = input->get_size_in_bytes();
                    NGRAPH_CHECK(parameterSize == inputSize,
                                 "Got parameter (",
                                 parameter->get_friendly_name(),
                                 ") of size ",
                                 parameterSize,
                                 " bytes, but corresponding input with index ",
                                 parameterIndex,
                                 " has ",
                                 inputSize,
                                 " bytes");
                }
                const auto& results = function->get_results();
                outputs.reserve(results.size());
                for (size_t i = 0; i < results.size(); ++i)
                {
                    outputs.push_back(std::make_shared<HostTensor>());
                }
                call(outputs, inputs, function);
            }
        }
    }
 }
--- a/ngraph/core/reference/src/runtime/reference/loop.cpp
+++ b/ngraph/core/reference/src/runtime/reference/loop.cpp
@ -0,0 +1,227 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
 #include "runtime/reference/loop.hpp"
 #include "runtime/reference/concat.hpp"
 #include "runtime/reference/function.hpp"
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            void loop(const std::shared_ptr<Function>& func,
                      const op::util::OutputDescriptionVector& out_descs,
                      const op::util::InputDescriptionVector& input_descs,
                      const opset5::Loop::SpecialBodyPorts& special_ports,
                      const HostTensorVector& out,
                      const HostTensorVector& args)
            {
                const auto& cur_iter_idx = special_ports.current_iteration_input_idx;
                auto val =
                    std::find_if(input_descs.begin(),
                                 input_descs.end(),
                                 [&cur_iter_idx](const op::util::InputDescriptionPtr& in_desc) {
                                     return in_desc->m_body_parameter_index == cur_iter_idx;
                                 });
                bool cur_iter_initial_value_exist = val != input_descs.end();
                bool cur_iter_back_edge_exist = false;
                // If current_iteration_input is exist and initial value is not provided, we
                // should allocate input_descs.size() + 1 inputs and set default value (0) for
                // current_iteration input.
                int64_t inputs_count =
                    input_descs.size() + (cur_iter_idx >= 0 ? !cur_iter_initial_value_exist : 0);
                HostTensorVector inputs_to_body;
                for (int64_t i = 0; i < inputs_count; ++i)
                    inputs_to_body.push_back(
                        std::make_shared<HostTensor>(element::dynamic, PartialShape::dynamic()));
                if (cur_iter_idx >= 0 && !cur_iter_initial_value_exist)
                {
                    const auto& cur_iter = func->get_parameters().at(cur_iter_idx);
                    if (cur_iter->get_partial_shape().is_dynamic())
                    {
                        cur_iter->set_partial_shape(Shape{1});
                        cur_iter->validate_and_infer_types();
                    }
                    auto init = std::make_shared<opset5::Constant>(
                        func->get_parameters().at(cur_iter_idx)->get_element_type(),
                        func->get_parameters().at(cur_iter_idx)->get_shape(),
                        0);
                    inputs_to_body.at(cur_iter_idx)->initialize(init);
                    // reinterpret_cast<int64_t*>(inputs_to_body.at(cur_iter_idx).data())[0] = 0;
                }
                // Port map processing: inputs and back edges
                struct BackEdge
                {
                    uint64_t param_idx;
                    uint64_t result_idx;
                };
                std::vector<BackEdge> back_edges;
                for (const auto& desc : input_descs)
                {
                    inputs_to_body[desc->m_body_parameter_index] = args[desc->m_input_index];
                    if (const auto& merged_desc =
                            std::dynamic_pointer_cast<opset5::Loop::MergedInputDescription>(desc))
                    {
                        back_edges.push_back(
                            {merged_desc->m_body_parameter_index, merged_desc->m_body_value_index});
                        cur_iter_back_edge_exist |=
                            merged_desc->m_body_parameter_index == cur_iter_idx;
                    }
                }
                // Get TripCount
                int64_t trip_count = 0;
                if (args[0]->get_element_type() == ngraph::element::i32)
                {
                    auto* trip_count_p = args[0]->get_data_ptr<int32_t>();
                    trip_count = trip_count_p[0];
                }
                else if (args[0]->get_element_type() == ngraph::element::i64)
                {
                    auto* trip_count_p = args[0]->get_data_ptr<int64_t>();
                    trip_count = trip_count_p[0];
                }
                else
                {
                    NGRAPH_CHECK(
                        false,
                        "Unsupported element type for trip_count input. Expected int32 or int64.");
                }
                NGRAPH_CHECK(trip_count != 0, "Zero count of iteration not supported");
                // Loop iterations
                auto exec_condition = args[1]->get_data_ptr<bool>();
                if (exec_condition[0])
                {
                    // Find all ConcatOutputDescription
                    std::vector<std::shared_ptr<opset5::Loop::ConcatOutputDescription>>
                        concat_outputs;
                    for (const auto& desc : out_descs)
                    {
                        if (const auto& concat_desc =
                                std::dynamic_pointer_cast<opset5::Loop::ConcatOutputDescription>(
                                    desc))
                        {
                            concat_outputs.push_back(concat_desc);
                        }
                    }
                    // Allocate vectors for store output values
                    std::vector<HostTensorVector> values_to_concat(concat_outputs.size());
                    HostTensorVector body_outputs;
                    // Negative value means infinity count of iterations
                    trip_count = trip_count >= 0 ? trip_count : std::numeric_limits<int64_t>::max();
                    for (int64_t cur_iter = 0; cur_iter < trip_count; ++cur_iter)
                    {
                        // Evaluate body
                        body_outputs.clear();
                        reference::function(func, inputs_to_body, body_outputs);
                        // Store values for later concatenation
                        for (size_t i = 0; i < values_to_concat.size(); ++i)
                        {
                            values_to_concat[i].push_back(
                                body_outputs[concat_outputs[i]->m_body_value_index]);
                        }
                        // Check execution condition
                        bool body_exec_condition;
                        body_outputs[special_ports.body_condition_output_idx]->read(
                            &body_exec_condition, sizeof(bool));
                        if (!body_exec_condition)
                            break;
                        // If there are no rules for calculating the current iteration, just
                        // increment it.
                        if (cur_iter_idx >= 0 && !cur_iter_back_edge_exist)
                        {
                            const auto& cur_iter_param = func->get_parameters().at(cur_iter_idx);
                            int64_t iter_num = cur_iter + 1;
                            if (cur_iter_param->get_element_type() == element::i64)
                                inputs_to_body.at(cur_iter_idx)
                                    ->write(&iter_num, cur_iter_param->get_element_type().size());
                            else if (cur_iter_param->get_element_type() == element::i32)
                            {
                                int32_t iter_num_i32 = static_cast<int32_t>(iter_num);
                                inputs_to_body.at(cur_iter_idx)
                                    ->write(&iter_num_i32,
                                            cur_iter_param->get_element_type().size());
                            }
                            else
                                NGRAPH_CHECK(false,
                                             "Unsupported element type for current iteration "
                                             "input. Expected int32 or int64.");
                        }
                        // Back-edge processing
                        for (auto& back_edge : back_edges)
                        {
                            inputs_to_body[back_edge.param_idx] =
                                body_outputs[back_edge.result_idx];
                        }
                    }
                    for (const auto& desc : out_descs)
                    {
                        if (const auto& body_desc =
                                std::dynamic_pointer_cast<opset5::Loop::BodyOutputDescription>(
                                    desc))
                        {
                            out[body_desc->m_output_index]->write(
                                body_outputs[body_desc->m_body_value_index]->get_data_ptr(),
                                body_outputs[body_desc->m_body_value_index]->get_size_in_bytes());
                        }
                    }
                    // Concatenate and copy all values stored in values_to_concat vector to outputs
                    for (size_t i = 0; i < concat_outputs.size(); ++i)
                    {
                        const auto& concat_desc = concat_outputs[i];
                        auto shape =
                            func->get_results().at(concat_desc->m_body_value_index)->get_shape();
                        std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);
                        shape.at(concat_desc->m_axis) = values_to_concat[i].size();
                        out[concat_desc->m_output_index]->set_shape(shape);
                        std::vector<const char*> pointers_on_values;
                        pointers_on_values.reserve(values_to_concat[i].size());
                        for (const auto& vec : values_to_concat[i])
                        {
                            pointers_on_values.push_back(vec->get_data_ptr<char>());
                        }
                        reference::concat(
                            pointers_on_values,
                            out[concat_desc->m_output_index]->get_data_ptr<char>(),
                            shapes_to_concat,
                            shape,
                            concat_desc->m_axis,
                            out[concat_desc->m_output_index]->get_element_type().size());
                    }
                }
                else
                {
                    NGRAPH_CHECK(
                        false,
                        "ExecutionCondition is false. Zero count of iteration not supported.");
                }
            }
        }
    }
 }
--- a/ngraph/core/reference/src/runtime/reference/tensor_iterator.cpp
+++ b/ngraph/core/reference/src/runtime/reference/tensor_iterator.cpp
@ -0,0 +1,181 @@
 //*****************************************************************************
 // Copyright 2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
 #include "runtime/reference/tensor_iterator.hpp"
 #include "runtime/reference/concat.hpp"
 #include "runtime/reference/function.hpp"
 #include "runtime/reference/split.hpp"
 namespace ngraph
 {
    namespace runtime
    {
        namespace reference
        {
            void tensor_iterator(uint64_t num_iterations,
                                 const std::shared_ptr<Function>& func,
                                 const op::util::OutputDescriptionVector& out_descs,
                                 const op::util::InputDescriptionVector& input_descs,
                                 const HostTensorVector& out,
                                 const HostTensorVector& args,
                                 const custom_evaluate_function& evaluate)
            {
                HostTensorVector inputs_to_body;
                for (int64_t i = 0; i < input_descs.size(); ++i)
                    inputs_to_body.push_back(
                        std::make_shared<HostTensor>(element::dynamic, PartialShape::dynamic()));
                // Port map processing: inputs and back edges
                struct BackEdge
                {
                    uint64_t param_idx;
                    uint64_t result_idx;
                };
                std::vector<BackEdge> back_edges;
                for (const auto& desc : input_descs)
                {
                    inputs_to_body[desc->m_body_parameter_index] = args[desc->m_input_index];
                    if (const auto& merged_desc =
                            std::dynamic_pointer_cast<opset5::Loop::MergedInputDescription>(desc))
                    {
                        back_edges.push_back(
                            {merged_desc->m_body_parameter_index, merged_desc->m_body_value_index});
                    }
                }
                // Find all ConcatOutputDescription
                std::vector<std::shared_ptr<opset5::TensorIterator::ConcatOutputDescription>>
                    concat_outputs;
                for (const auto& desc : out_descs)
                {
                    if (const auto& concat_desc = std::dynamic_pointer_cast<
                            opset5::TensorIterator::ConcatOutputDescription>(desc))
                    {
                        concat_outputs.push_back(concat_desc);
                    }
                }
                // Slicing
                std::vector<std::shared_ptr<opset5::TensorIterator::SliceInputDescription>>
                    slice_inputs;
                std::vector<HostTensorVector> sliced_values;
                int slice_in_idx = 0;
                for (const auto& desc : input_descs)
                {
                    if (const auto& slice_desc = std::dynamic_pointer_cast<
                            opset5::TensorIterator::SliceInputDescription>(desc))
                    {
                        const auto el_size =
                            args[slice_desc->m_input_index]->get_element_type().size();
                        slice_inputs.push_back(slice_desc);
                        auto shape = args[slice_desc->m_input_index]->get_shape();
                        shape.at(slice_desc->m_axis) = 1;
                        sliced_values.emplace_back(HostTensorVector());
                        for (int i = 0; i < num_iterations; ++i)
                        {
                            sliced_values.back().emplace_back(std::make_shared<HostTensor>(
                                args[slice_desc->m_input_index]->get_element_type(), shape));
                        }
                        std::vector<char*> pointers_to_data(num_iterations);
                        for (size_t j = 0; j < pointers_to_data.size(); ++j)
                        {
                            pointers_to_data[j] =
                                sliced_values[slice_in_idx][j]->get_data_ptr<char>();
                        }
                        reference::split(args[slice_desc->m_input_index]->get_data_ptr<char>(),
                                         args[slice_desc->m_input_index]->get_shape(),
                                         el_size,
                                         slice_desc->m_axis,
                                         num_iterations,
                                         pointers_to_data.data());
                        slice_in_idx++;
                    }
                }
                // Allocate vectors for store output values
                std::vector<HostTensorVector> values_to_concat(concat_outputs.size());
                HostTensorVector body_outputs;
                for (int64_t cur_iter = 0; cur_iter < num_iterations; ++cur_iter)
                {
                    // Copy new values for sliced inputs
                    for (size_t i = 0; i < slice_inputs.size(); ++i)
                    {
                        inputs_to_body[slice_inputs[i]->m_body_parameter_index] =
                            sliced_values[i][cur_iter];
                    }
                    // Evaluate body
                    if (!evaluate)
                    {
                        reference::function(func, inputs_to_body, body_outputs);
                    }
                    else
                    {
                        evaluate(func, inputs_to_body, body_outputs);
                    }
                    // Store values for later concatenation
                    for (size_t i = 0; i < values_to_concat.size(); ++i)
                    {
                        values_to_concat[i].push_back(
                            body_outputs[concat_outputs[i]->m_body_value_index]);
                    }
                    // Back-edge processing
                    for (auto& back_edge : back_edges)
                    {
                        inputs_to_body[back_edge.param_idx] = body_outputs[back_edge.result_idx];
                    }
                }
                for (const auto& desc : out_descs)
                {
                    if (const auto& body_desc = std::dynamic_pointer_cast<
                            opset5::TensorIterator::BodyOutputDescription>(desc))
                    {
                        // Copy output values from the last iteration
                        out[body_desc->m_output_index]->write(
                            body_outputs[body_desc->m_body_value_index]->get_data_ptr(),
                            body_outputs[body_desc->m_body_value_index]->get_size_in_bytes());
                    }
                }
                // Concatenate and copy all values stored in values_to_concat vector to outputs
                for (size_t i = 0; i < concat_outputs.size(); ++i)
                {
                    const auto& concat_desc = concat_outputs[i];
                    auto shape =
                        func->get_results().at(concat_desc->m_body_value_index)->get_shape();
                    std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);
                    shape.at(concat_desc->m_axis) = values_to_concat[i].size();
                    out[concat_desc->m_output_index]->set_shape(shape);
                    std::vector<const char*> pointers_on_values;
                    pointers_on_values.reserve(values_to_concat[i].size());
                    for (const auto& vec : values_to_concat[i])
                    {
                        pointers_on_values.push_back(vec->get_data_ptr<char>());
                    }
                    reference::concat(pointers_on_values,
                                      out[concat_desc->m_output_index]->get_data_ptr<char>(),
                                      shapes_to_concat,
                                      shape,
                                      concat_desc->m_axis,
                                      out[concat_desc->m_output_index]->get_element_type().size());
                }
            }
        }
    }
 }
--- a/ngraph/core/src/op/loop.cpp
+++ b/ngraph/core/src/op/loop.cpp
@ -15,11 +15,14 @@
 //*****************************************************************************
 #include "ngraph/op/loop.hpp"
 #include "itt.hpp"
 #include "ngraph/factory.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/opsets/opset5.hpp"
 #include "ngraph/specialize_function.hpp"
 #include "ngraph/runtime/reference/loop.hpp"
 using namespace std;
 using namespace ngraph;
@ -380,3 +383,11 @@ Output<Node> op::v5::Loop::get_concatenated_slices(const Output<Node>& value,
                 "{-1}");
    return SubGraphOp::get_concatenated_slices(value, start, stride, part_size, end, axis);
 }
 bool op::v5::Loop::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const
 {
    OV_ITT_SCOPED_TASK(itt::domains::nGraphOp, "op::v5::Loop::evaluate");
    runtime::reference::loop(
        m_body, m_output_descriptions, m_input_descriptions, m_special_body_ports, outputs, inputs);
    return true;
 }
--- a/ngraph/frontend/onnx_import/src/op/loop.cpp
+++ b/ngraph/frontend/onnx_import/src/op/loop.cpp
@ -143,7 +143,8 @@ namespace ngraph
                    const auto concat_axis_const =
                        ngraph::op::Constant::create(ngraph::element::i64, {1}, {concat_axis});
                    // provide scalar handing for scan outputs
-                    for (int i = loop_carried_dependencies.size() + 1; i < body_outputs.size(); ++i)
+                    for (size_t i = loop_carried_dependencies.size() + 1; i < body_outputs.size();
                         ++i)
                    {
                        auto body_output_shape = body_outputs[i].get_partial_shape();
                        if (body_output_shape.is_static() &&
--- a/ngraph/python/tests/init.py
+++ b/ngraph/python/tests/init.py
@ -98,8 +98,6 @@ xfail_issue_36478 = xfail_test(reason="RuntimeError: [NOT_IMPLEMENTED] Input ima
                               "not supported yet...")
 xfail_issue_36480 = xfail_test(reason="RuntimeError: [NOT_FOUND] Unsupported property dummy_option "
                               "by CPU plugin")
 xfail_issue_36483 = xfail_test(reason="RuntimeError: Unsupported primitive of type: "
                               "Ceiling name: <value>")
 xfail_issue_36485 = xfail_test(reason="RuntimeError: Check 'm_group >= 1' failed at "
                               "/openvino/ngraph/core/src/op/shuffle_channels.cpp:77:")
 xfail_issue_36486 = xfail_test(reason="RuntimeError: HardSigmoid operation should be converted "
--- a/ngraph/python/tests/test_ngraph/test_ops.py
+++ b/ngraph/python/tests/test_ngraph/test_ops.py
@ -20,7 +20,7 @@ import ngraph as ng
 from ngraph.impl import AxisSet, Function, Shape, Type
 from ngraph.impl.op import Constant, Parameter
 from tests.runtime import get_runtime
-from tests import xfail_issue_36483, xfail_issue_34323
+from tests import xfail_issue_34323
 def binary_op(op_str, a, b):
@ -370,7 +370,6 @@ def test_atanh():
    unary_op_exec(op_str, input_list)
@xfail_issue_36483
 def test_ceiling():
    input_list = [0.5, 0, 0.4, 0.5]
    op_str = "Ceiling"
--- a/ngraph/python/tests/test_ngraph/test_ops_unary.py
+++ b/ngraph/python/tests/test_ngraph/test_ops_unary.py
@ -19,7 +19,7 @@ import pytest
 import ngraph as ng
 from ngraph.impl import Shape, Type
 from tests.test_ngraph.util import run_op_node
-from tests import xfail_issue_35929, xfail_issue_36483
+from tests import xfail_issue_35929
@xfail_issue_35929
@ -67,8 +67,8 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
        pytest.param(ng.acos, np.arccos, np.float32(-0.5)),
        pytest.param(ng.asin, np.arcsin, np.float32(-0.5)),
        pytest.param(ng.atan, np.arctan, np.float32(-0.5)),
-        pytest.param(ng.ceiling, np.ceil, np.float32(1.5), marks=xfail_issue_36483),
+        pytest.param(ng.ceiling, np.ceil, np.float32(1.5)),
-        pytest.param(ng.ceil, np.ceil, np.float32(1.5), marks=xfail_issue_36483),
+        pytest.param(ng.ceil, np.ceil, np.float32(1.5)),
        pytest.param(ng.cos, np.cos, np.float32(np.pi / 4.0)),
        pytest.param(ng.cosh, np.cosh, np.float32(np.pi / 4.0)),
        pytest.param(ng.exp, np.exp, np.float32(1.5)),
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@ -38,7 +38,6 @@ from tests import (BACKEND_NAME,
                   xfail_issue_33616,
                   xfail_issue_38086,
                   xfail_issue_38087,
                   xfail_issue_36483,
                   xfail_issue_34323,
                   xfail_issue_35915,
                   xfail_issue_34310,
@ -205,9 +204,6 @@ tests_expected_to_fail = [
        "OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
    (xfail_issue_38087,
        "OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"),
    (xfail_issue_36483,
        "OnnxBackendNodeModelTest.test_ceil_cpu",
        "OnnxBackendNodeModelTest.test_ceil_example_cpu"),
    (xfail_issue_34323,
        "OnnxBackendNodeModelTest.test_constant_cpu",
        "OnnxBackendNodeModelTest.test_eyelike_populate_off_main_diagonal_cpu",
--- a/ngraph/test/runtime/interpreter/int_backend.cpp
+++ b/ngraph/test/runtime/interpreter/int_backend.cpp
@ -53,6 +53,13 @@ shared_ptr<runtime::Tensor>
    return make_shared<runtime::HostTensor>(type, shape);
 }
 shared_ptr<runtime::Tensor>
    runtime::interpreter::INTBackend::create_dynamic_tensor(const element::Type& type,
                                                            const PartialShape& pshape)
 {
    return make_shared<runtime::HostTensor>(type, pshape);
 }
 shared_ptr<runtime::Tensor> runtime::interpreter::INTBackend::create_tensor(
    const element::Type& type, const Shape& shape, void* memory_pointer)
 {
--- a/ngraph/test/runtime/interpreter/int_backend.hpp
+++ b/ngraph/test/runtime/interpreter/int_backend.hpp
@ -56,6 +56,8 @@ public:
        create_tensor(const element::Type& type, const Shape& shape, void* memory_pointer) override;
    std::shared_ptr<Tensor> create_tensor(const element::Type& type, const Shape& shape) override;
    std::shared_ptr<Tensor> create_dynamic_tensor(const element::Type& type,
                                                  const PartialShape& shape) override;
    std::shared_ptr<Executable> compile(std::shared_ptr<Function> function,
                                        bool enable_performance_data = false) override;
--- a/ngraph/test/runtime/interpreter/int_executable.cpp
+++ b/ngraph/test/runtime/interpreter/int_executable.cpp
@ -284,7 +284,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    // convert inputs to HostTensor
    vector<shared_ptr<HostTensor>> func_inputs;
-    for (auto tensor : inputs)
+    for (const auto& tensor : inputs)
    {
        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
        func_inputs.push_back(host_tensor);
@ -296,7 +296,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    // convert outputs to HostTensor
    vector<shared_ptr<HostTensor>> func_outputs;
-    for (auto tensor : outputs)
+    for (const auto& tensor : outputs)
    {
        auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
        func_outputs.push_back(host_tensor);
@ -305,7 +305,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    // map function params -> HostTensor
    unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
    size_t input_count = 0;
-    for (auto param : get_parameters())
+    for (const auto& param : get_parameters())
    {
        for (size_t i = 0; i < param->get_output_size(); ++i)
        {
@ -327,7 +327,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
    }
    // for each ordered op in the graph
-    for (auto op : m_nodes)
+    for (const auto& op : m_nodes)
    {
        event::Duration d2(op->description(), "Interpreter");
        if (op::is_parameter(op))
@ -387,7 +387,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
        }
        if (!op->evaluate(op_outputs, op_inputs))
        {
-            generate_calls(type, *op.get(), op_outputs, op_inputs);
+            generate_calls(type, *op, op_outputs, op_inputs);
        }
        if (m_performance_counters_enabled)
        {
--- a/ngraph/test/runtime/interpreter/int_executable.hpp
+++ b/ngraph/test/runtime/interpreter/int_executable.hpp
@ -98,6 +98,7 @@
 #include "ngraph/runtime/reference/sum.hpp"
 #include "ngraph/runtime/reference/tan.hpp"
 #include "ngraph/runtime/reference/tanh.hpp"
 #include "ngraph/runtime/reference/tensor_iterator.hpp"
 #include "ngraph/runtime/reference/topk.hpp"
 #include "ngraph/runtime/tensor.hpp"
 #include "op/avg_pool.hpp"
@ -1235,6 +1236,81 @@ protected:
                args[0]->get_data_ptr<const T>(), out[0]->get_data_ptr<T>(), element_count);
            break;
        }
        case OP_TYPEID::TensorIterator:
        {
            auto ti = dynamic_cast<const op::v0::TensorIterator&>(node);
            reference::custom_evaluate_function evaluate =
                [](const std::shared_ptr<ngraph::Function>& function,
                   const HostTensorVector& inputs,
                   HostTensorVector& outputs) -> void {
                const auto& parameters = function->get_parameters();
                const auto& parametersNumber = parameters.size();
                const auto& inputsNumber = inputs.size();
                NGRAPH_CHECK(parametersNumber == inputsNumber,
                             "Got function (",
                             function->get_friendly_name(),
                             ") with ",
                             parametersNumber,
                             " parameters, but ",
                             inputsNumber,
                             " input blobs");
                auto inputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
                for (const auto& parameter : parameters)
                {
                    const auto& parameterIndex = function->get_parameter_index(parameter);
                    const auto& parameterShape = parameter->get_shape();
                    const auto& parameterType = parameter->get_element_type();
                    const auto& parameterSize = shape_size(parameterShape) * parameterType.size();
                    const auto& input = inputs[parameterIndex];
                    const auto& inputSize = input->get_size_in_bytes();
                    NGRAPH_CHECK(parameterSize == inputSize,
                                 "Got parameter (",
                                 parameter->get_friendly_name(),
                                 ") of size ",
                                 parameterSize,
                                 " bytes, but corresponding input with index ",
                                 parameterIndex,
                                 " has ",
                                 inputSize,
                                 " bytes");
                    auto tensor =
                        std::make_shared<runtime::HostTensor>(parameterType, parameterShape);
                    tensor->write(input->get_data_ptr(), parameterSize);
                    inputTensors.push_back(tensor);
                }
                const auto& results = function->get_results();
                std::vector<std::shared_ptr<ngraph::runtime::Tensor>> outputTensors;
                outputTensors.reserve(results.size());
                for (size_t i = 0; i < results.size(); ++i)
                {
                    outputTensors.push_back(std::make_shared<HostTensor>());
                }
                runtime::Backend::set_backend_shared_library_search_directory("");
                auto backend = runtime::Backend::create("INTERPRETER");
                auto handle = backend->compile(function);
                handle->call_with_validate(outputTensors, inputTensors);
                outputs.reserve(outputTensors.size());
                for (const auto& tensor : outputTensors)
                {
                    auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
                    outputs.push_back(host_tensor);
                }
            };
            reference::tensor_iterator(ti.get_num_iterations(),
                                       ti.get_function(),
                                       ti.get_output_descriptions(),
                                       ti.get_input_descriptions(),
                                       out,
                                       args,
                                       evaluate);
            break;
        }
        case OP_TYPEID::DetectionOutput_v0:
        {
            const op::DetectionOutput* detOut = static_cast<const op::DetectionOutput*>(&node);
@ -1378,7 +1454,6 @@ protected:
        case OP_TYPEID::ShuffleChannels:
        case OP_TYPEID::SpaceToDepth:
        case OP_TYPEID::SquaredDifference:
        case OP_TYPEID::TensorIterator:
        case OP_TYPEID::Tile:
        case OP_TYPEID::UnknownOp:
            throw unsupported_op("Unsupported op '" + node.description() + "'");
@ -1397,6 +1472,7 @@ protected:
        case OP_TYPEID::LogicalAnd_v1:
        case OP_TYPEID::LogicalOr_v1:
        case OP_TYPEID::LogicalXor_v1:
        case OP_TYPEID::Loop_v5:
        case OP_TYPEID::MatMul:
        case OP_TYPEID::Maximum:
        case OP_TYPEID::Minimum:
--- a/ngraph/test/runtime/interpreter/opset_int_tbl.hpp
+++ b/ngraph/test/runtime/interpreter/opset_int_tbl.hpp
@ -59,11 +59,12 @@ NGRAPH_OP(LSTMCell, op::v4)
 #define ID_SUFFIX(NAME) NAME##_v5
 NGRAPH_OP(GatherND, op::v5)
 NGRAPH_OP(LSTMSequence, op::v5)
 NGRAPH_OP(GRUSequence, op::v5)
 NGRAPH_OP(RNNSequence, op::v5)
 NGRAPH_OP(BatchNormInference, op::v5)
 NGRAPH_OP(Round, op::v5)
 NGRAPH_OP(LogSoftmax, op::v5)
 NGRAPH_OP(Loop, op::v5)
 NGRAPH_OP(LSTMSequence, op::v5)
 NGRAPH_OP(NonMaxSuppression, op::v5)
 NGRAPH_OP(RNNSequence, op::v5)
 NGRAPH_OP(Round, op::v5)
 #undef ID_SUFFIX
--- a/ngraph/test/runtime/interpreter/unit_test.manifest
+++ b/ngraph/test/runtime/interpreter/unit_test.manifest
@ -133,9 +133,6 @@ onnx_controlflow_loop_2d_no_identity_termination_cond
 onnx_controlflow_loop_2d_const_no_identity_termination_cond
 onnx_controlflow_loop_2d_both_cond_and_trip_count_as_inputs
 #dynamic trip count
 onnx_controlflow_loop_2d_trip_count_dynamic
 # Input body shape is changed during Loop iterations
 # Exception is throw during Loop shape inference
 # Is it expected?
@ -144,13 +141,7 @@ onnx_controlflow_loop_concat_values
 # Infinitive Loop is not supported
 onnx_controlflow_loop_infinite
-# Loop is not supported yet by INTERPRETER backend
+# Dynamic shape support?
-onnx_controlflow_loop_2d_add
+onnx_controlflow_loop_2d_trip_count_dynamic
 onnx_controlflow_loop_2d_no_identity_termination_cond_false
 onnx_controlflow_loop_add_initializer_from_parent_scope
 onnx_controlflow_loop_add_node_from_parent_scope
 onnx_controlflow_loop_add_value_the_same_node_from_parent_and_subgraph
 onnx_controlflow_loop_scalars
 onnx_controlflow_loop_2d_add_const_cond
 onnx_controlflow_loop_no_variadic_inputs_and_outputs
-onnx_controlflow_loop_power
+onnx_controlflow_loop_power