From c1608628d42fbc0d70d94abb7c5cff27c5469a24 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Mon, 7 Jun 2021 15:13:41 +0300 Subject: [PATCH] LowLatency v2 ngraph transformation (#5160) * LowLatency 2.0: transformation and unit tests * low latency 2.0: unit tests * documentation and ngraph codestyle * update CNN Interface of LowLatency transformation * fix build on Windows * fix build on Windows * investigation of a failed build on Win OS * ngraph codestyle * fix build (werrors) * New unit tests, refactoring * update functional tests for Memory * update LowLatency functional tests * extend Memory tests to cover LowLatency v2 transformation * clean up, code style * fix unit tests * update and fix unit tests, add feature to apply LLTv2 after LLTv1 * update docs, refactoring * add several gna tests to skip config * fix python api tests * update python api, rename LowLatency_v2 to LowLatency2 * deprecate LowLatency v1 * Deprecate LowLatency v1 in IE * fix wrong merge, codestyle * resolve review comments * fix python test * update skip config * apply online review notes, fix unit tests * clean up, code style * fix docs * Use debug_messages instead of exceptions in llt v2 * fix unit tests * Resolve review remarks --- .../offline_transformations_api.pyx | 4 +- .../offline_transformations_api_impl.cpp | 11 +- .../offline_transformations_api_impl.hpp | 2 +- .../offline_transformations_api_impl_defs.pxd | 5 +- .../python/tests/test_offline_api.py | 2 +- .../include/ie_transformations.hpp | 36 + .../inference_engine/ie_transformations.cpp | 10 + .../transformations/low_latency_test.cpp | 10 + .../transformations/low_latency_v2_test.cpp | 829 ++++++++++++++++++ .../single_layer_tests/memory.cpp | 8 + .../subgraph_tests/memory_LSTMCell.cpp | 45 + .../subgraph_tests/multiple_LSTMCell.cpp | 10 + .../single_layer_tests/memory.cpp | 12 +- .../skip_tests_config.cpp | 8 + .../subgraph_tests/memory_LSTMCell.cpp | 9 + .../subgraph_tests/multiple_LSTMCell.cpp | 10 + .../include/subgraph_tests/basic_lstm.hpp | 2 +- .../subgraph_tests/memory_LSTMCell.hpp | 8 - .../subgraph_tests/multiple_LSTMCell.hpp | 8 - .../single_layer/memory.hpp | 9 + .../subgraph/memory_LSTMCell.hpp | 8 +- .../subgraph/multiple_LSTMCell.hpp | 8 +- .../src/single_layer/memory.cpp | 116 ++- .../src/subgraph/memory_LSTMCell.cpp | 281 +++--- .../src/subgraph/multiple_LSTMCell.cpp | 442 +++++----- .../ngraph_functions/utils/ngraph_helpers.hpp | 11 + .../src/utils/ngraph_helpers.cpp | 27 + .../mo/back/offline_transformations.py | 2 +- model-optimizer/mo/utils/cli_parser.py | 17 +- .../unit_tests/mo/utils/cli_parser_test.py | 45 +- .../core/include/ngraph/pass/low_latency.hpp | 44 +- ngraph/core/src/op/tensor_iterator.cpp | 1 - ngraph/core/src/pass/low_latency.cpp | 203 ++++- 33 files changed, 1811 insertions(+), 432 deletions(-) create mode 100644 inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx index bd101280fcb..266c1dc94d9 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx @@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device): C.ApplyPOTTransformations(network.impl, device) -def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1): - C.ApplyLowLatencyTransformation(network.impl, num_iterations) +def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True): + C.ApplyLowLatencyTransformation(network.impl, use_const_initializer) def ApplyPruningTransformation(IENetwork network): diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp index b9ff879da8c..183deaccfb3 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp @@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet manager.run_passes(network.actual->getFunction()); } -void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) { +void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) { ngraph::pass::Manager manager; - // TODO: pass num_iterations to LowLatency - manager.register_pass(); - manager.register_pass(); - - auto pass_config = manager.get_pass_config(); - pass_config->set_callback([](const std::shared_ptr& node) -> bool { - return node->get_rt_info().count("UNROLL_TI") == 0; - }); + manager.register_pass(use_const_initializer); manager.run_passes(network.actual->getFunction()); } diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp index 504388e4afc..3941c48a50c 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp @@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf); void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device); -void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations); +void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true); void ApplyPruningTransformation(InferenceEnginePython::IENetwork network); diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd index 726880e9353..551e56c27a8 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd @@ -3,7 +3,6 @@ from libcpp cimport bool from libcpp.string cimport string -from libc.stdint cimport int64_t from ..inference_engine.ie_api_impl_defs cimport IENetwork @@ -12,10 +11,10 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi cdef void ApplyPOTTransformations(IENetwork network, string device) - cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations) + cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer) cdef void ApplyPruningTransformation(IENetwork network) cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names) - cdef void CheckAPI() \ No newline at end of file + cdef void CheckAPI() diff --git a/inference-engine/ie_bridges/python/tests/test_offline_api.py b/inference-engine/ie_bridges/python/tests/test_offline_api.py index b5565c04bb4..0bba0951c27 100644 --- a/inference-engine/ie_bridges/python/tests/test_offline_api.py +++ b/inference-engine/ie_bridges/python/tests/test_offline_api.py @@ -49,4 +49,4 @@ def test_pruning_transformations(): f = ng.function_from_cnn(net) assert f != None - assert len(f.get_ops()) == 3 \ No newline at end of file + assert len(f.get_ops()) == 3 diff --git a/inference-engine/include/ie_transformations.hpp b/inference-engine/include/ie_transformations.hpp index 6691fa74dae..0867b02972c 100644 --- a/inference-engine/include/ie_transformations.hpp +++ b/inference-engine/include/ie_transformations.hpp @@ -52,5 +52,41 @@ namespace InferenceEngine { * @param network A network to apply LowLatency transformation * * */ + +INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. " + "Use InferenceEngine::lowLatency2 instead.") INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network); + + +/** + * @brief The transformation finds all TensorIterator/Loop layers in the network, + * processes all back edges that describe a connection between Result and Parameter + * of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the + * input and output corresponding to this back edge. + * Supported platforms: CPU, GNA. + * + * The example below describes the changes made by the transformation + * [] - TensorIterator body + * () - new layer + * BE - back-edge + * + * before applying the transformation: + * -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1-> + * + * after applying the transformation: + * ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign) + * \ + * ->... + * After applying the transformation, the resulting network can be inferred + * step by step, the states will store between inferences. + * @param network A network to apply LowLatency transformation + * @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations. + If "true", then the transformation inserts Constant before ReadValue operation. + If "false, then the transformation leaves existed initializing subgraph for ReadValue operation. + * Loop operation by a given number. Does not affect TensorIterators. + * * + */ +INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network, + bool use_const_initializer = true); + } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/ie_transformations.cpp b/inference-engine/src/inference_engine/ie_transformations.cpp index 15360ae97ea..2a87671ce25 100644 --- a/inference-engine/src/inference_engine/ie_transformations.cpp +++ b/inference-engine/src/inference_engine/ie_transformations.cpp @@ -11,6 +11,16 @@ using namespace InferenceEngine; void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) { auto function = network.getFunction(); ngraph::pass::Manager manager; + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END + manager.run_passes(function); +} + +void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network, + bool use_const_initializer) { + auto function = network.getFunction(); + ngraph::pass::Manager manager; + manager.register_pass(use_const_initializer); manager.run_passes(function); } diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp index 69cb89991a3..2d1594d3841 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp @@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) { ngraph::pass::Manager manager; manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END manager.register_pass(); manager.run_passes(f); } @@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) { ngraph::pass::Manager manager; manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END manager.register_pass(); manager.run_passes(f); @@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) { ngraph::pass::Manager manager; manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END manager.register_pass(); manager.run_passes(f); @@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) { ngraph::pass::Manager manager; manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END manager.register_pass(); manager.run_passes(f); } @@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) { ngraph::pass::Manager manager; manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END manager.register_pass(); manager.run_passes(f); } diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp new file mode 100644 index 00000000000..5c66e8109ef --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_v2_test.cpp @@ -0,0 +1,829 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; +using namespace opset7; +using namespace std; + +Output create_init_subgraph(const Output& in_node) { + auto const_zero = make_shared(in_node.get_element_type(), Shape{1}, 0); + auto shape_of = make_shared(in_node); + auto broadcast = make_shared(const_zero, shape_of); + return broadcast->output(0); +} + +Output insert_identity(const Output& in_node) { + auto axis_1 = Constant::create(element::i64, Shape{1}, {1}); + auto identity_1 = std::make_shared(in_node, axis_1); + return std::make_shared(identity_1, axis_1); +} + +std::shared_ptr createLSTMBody(const std::shared_ptr& Xi, + const std::shared_ptr& H_t, + const std::shared_ptr& C_t, + bool is_loop = false) { + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + + auto func = std::make_shared(OutputVector{res_1, res_2, res_3}, + ParameterVector{Xi, H_t, C_t}); + if (is_loop) { + auto body_condition = std::make_shared( + element::boolean, Shape{1}, true); + auto cond_res = std::make_shared(body_condition); + func->add_results({cond_res}); + } + return func; +} + +TEST(TransformationTests, LowLatency2_LSTM) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t); + auto results = body->get_results(); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + tensor_iterator->set_friendly_name("LSTMTensorIterator"); + + tensor_iterator->set_merged_input(C_t, C_init, results[2]); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, H_init, results[0]); + + tensor_iterator->get_iter_value(results[0], -1); + tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + auto res_ti_2 = std::make_shared(tensor_iterator->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, + ParameterVector{X, H_init, C_init}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C_t), variable_C); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, read_value_H, read_value_C, W, R, B, 128); + auto assign_H = std::make_shared(lstm_cell->output(0), variable_H); + auto assign_C = std::make_shared(lstm_cell->output(1), variable_C); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + auto res_1 = std::make_shared(insert_identity(lstm_cell->output(0))); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_GRU) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(384 * 16, 0); + auto r_val = std::vector(384 * 128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto gru_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(gru_cell); + auto unsqueeze = std::make_shared(gru_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(Yi, Y, res_1); + + auto out0 = tensor_iterator->get_iter_value(res_1, -1); + auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + f = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("GRUTensorIterator/variable0"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(384 * 16, 0); + auto r_val = std::vector(384 * 128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto rnn_cell = std::make_shared(squeeze, read_value_H, W, R, B, 128); + auto assign_H = std::make_shared(rnn_cell->output(0), variable_H); + auto res_1 = std::make_shared(assign_H); + auto unsqueeze = std::make_shared(rnn_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + f_ref = std::make_shared(ResultVector {res_2}, ParameterVector{Xi, H_t}); + f_ref->add_sinks({assign_H}); + assign_H->add_control_dependency(read_value_H); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_RNN) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(128 * 16, 0); + auto r_val = std::vector(128 * 128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(rnn_cell); + auto unsqueeze = std::make_shared(rnn_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, + Yi}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(Yi, Y, res_1); + + auto out0 = tensor_iterator->get_iter_value(res_1, -1); + auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + f = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("RNNTensorIterator/variable0"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(128 * 16, 0); + auto r_val = std::vector(128 * 128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell = std::make_shared(squeeze, read_value_H, W, R, B, 128); + auto assign_H = std::make_shared(rnn_cell->output(0), variable_H); + auto res_1 = std::make_shared(assign_H); + auto unsqueeze = std::make_shared(rnn_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + f_ref = std::make_shared(ResultVector{res_2}, ParameterVector{Xi, H_t}); + f_ref->add_sinks({assign_H}); + assign_H->add_control_dependency(read_value_H); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_LSTMReshape) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto H = std::make_shared(element::f32, Shape{1, 128}); + auto C = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t); + auto results = body->get_results(); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_merged_input(C_t, C, results[2]); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, H, results[0]); + + auto out0 = tensor_iterator->get_iter_value(results[0], -1); + auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + auto res_ti_2 = std::make_shared(tensor_iterator->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H, + C}); + + // Reshape + // change the number of iteration of TI. 2 -> 1 + auto new_X = std::make_shared(element::f32, Shape{1, 1, 16}); + f->replace_parameter(0, new_X); + f->validate_nodes_and_infer_types(); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C_t), variable_C); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, read_value_H, read_value_C, W, R, B, 128); + auto assign_H = std::make_shared(lstm_cell->output(0), variable_H); + auto assign_C = std::make_shared(lstm_cell->output(1), variable_C); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + auto res_1 = std::make_shared(insert_identity(lstm_cell->output(0))); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_LSTM_Loop) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t, true); + auto results = body->get_results(); + + auto trip_count = + std::make_shared(element::i64, Shape{}, 1); + auto exec_condition = + std::make_shared(element::boolean, Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 3}); + loop->set_function(body); + loop->set_friendly_name("LSTMLoop"); + + loop->set_merged_input(C_t, C_init, results[2]); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(H_t, H_init, results[0]); + + auto out0 = loop->get_iter_value(results[0], -1); + auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, + ParameterVector{X, H_init, C_init}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C_t), variable_C); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, read_value_H, read_value_C, W, R, B, 128); + auto assign_H = std::make_shared(lstm_cell->output(0), variable_H); + auto assign_C = std::make_shared(lstm_cell->output(1), variable_C); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + auto res_1 = std::make_shared(insert_identity(lstm_cell->output(0))); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_LSTM_several_iterations) { + constexpr int ITER_CNT = 5; + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{ITER_CNT, 1, 16}); + auto H = std::make_shared(element::f32, Shape{1, 128}); + auto C = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t); + auto results = body->get_results(); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_merged_input(C_t, C, results[2]); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, H, results[0]); + + auto out0 = tensor_iterator->get_iter_value(results[0], -1); + auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + auto res_ti_2 = std::make_shared(tensor_iterator->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H, + C}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + // TensorIterator not unrolled. + { + auto X = std::make_shared(element::f32, Shape{ITER_CNT, 1, 16}); + auto H = std::make_shared(element::f32, Shape{1, 128}); + auto C = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C), variable_C); + + // Body + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + auto body = std::make_shared(OutputVector{res_1, res_2, res_3}, + ParameterVector{Xi, H_t, C_t}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_merged_input(C_t, read_value_C, res_3); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, read_value_H, res_1); + + auto out0 = tensor_iterator->get_iter_value(res_1, -1); + auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + auto out2 = tensor_iterator->get_iter_value(res_3, -1); + + auto assign_H = std::make_shared(out0, variable_H); + auto assign_C = std::make_shared(out2, variable_C); + auto outer_res_2 = std::make_shared(out1); + auto outer_res_1 = std::make_shared(out0); + f_ref = std::make_shared(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{10, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t, true); + auto results = body->get_results(); + + auto shape_of = std::make_shared(X); + const auto trip_count = std::make_shared(shape_of, Constant::create(ngraph::element::i64, {1}, {0}), + Constant::create(ngraph::element::i64, {1}, {0})); + auto exec_condition = + std::make_shared(element::boolean, Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 3}); + loop->set_function(body); + loop->set_friendly_name("LSTMLoop"); + + loop->set_merged_input(C_t, C_init, results[2]); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(H_t, H_init, results[0]); + + auto out0 = loop->get_iter_value(results[0], -1); + auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, + ParameterVector{X, H_init, C_init}); + + // Reshape + // change the number of iteration of Loop. 10 -> 1 + auto new_X = std::make_shared(element::f32, Shape{1, 1, 16}); + f->replace_parameter(0, new_X); + f->validate_nodes_and_infer_types(); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H_t), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C_t), variable_C); + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, read_value_H, read_value_C, W, R, B, 128); + auto assign_H = std::make_shared(lstm_cell->output(0), variable_H); + auto assign_C = std::make_shared(lstm_cell->output(1), variable_C); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(insert_identity(unsqueeze)); + auto res_1 = std::make_shared(insert_identity(lstm_cell->output(0))); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + + +TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{10, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto body = createLSTMBody(Xi, H_t, C_t, true); + auto results = body->get_results(); + + auto trip_count = + std::make_shared(element::i64, Shape{}, 10); + auto exec_condition = + std::make_shared(element::boolean, Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 3}); + loop->set_function(body); + loop->set_friendly_name("LSTMLoop"); + + loop->set_merged_input(C_t, C_init, results[2]); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(H_t, H_init, results[0]); + + auto out0 = loop->get_iter_value(results[0], -1); + auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, + ParameterVector{X, H_init, C_init}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(true); + + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto X = std::make_shared(element::f32, Shape{10, 1, 16}); + auto H = std::make_shared(element::f32, Shape{1, 128}); + auto C = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(create_init_subgraph(H), variable_H); + auto read_value_C = std::make_shared(create_init_subgraph(C), variable_C); + + // Body + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + auto body_condition = std::make_shared( + element::boolean, Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, res_3, body_condition}, + ParameterVector{Xi, H_t, C_t}); + + auto trip_count = + std::make_shared(element::i64, Shape{}, 10); + auto exec_condition = + std::make_shared(element::boolean, Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 3}); + loop->set_function(body); + loop->set_friendly_name("LSTMLoop"); + + loop->set_merged_input(C_t, read_value_C, res_3); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(H_t, read_value_H, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + auto out3 = loop->get_iter_value(res_3, -1); + + auto assign_H = std::make_shared(out0, variable_H); + auto assign_C = std::make_shared(out3, variable_C); + auto outer_res_2 = std::make_shared(out1); + auto outer_res_1 = std::make_shared(out0); + f_ref = std::make_shared(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + auto body = std::make_shared(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t}); + + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + tensor_iterator->set_friendly_name("LSTMTensorIterator"); + + tensor_iterator->set_merged_input(C_t, C_init, res_3); + tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(H_t, H_init, res_1); + + auto out0 = tensor_iterator->get_iter_value(res_1, -1); + auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); + auto res_ti_2 = std::make_shared(tensor_iterator->output(0)); + f = std::make_shared(NodeVector{res_ti_1, res_ti_2}, + ParameterVector{X, H_init, C_init}); + + auto f_2 = ngraph::clone_function(*f); + pass::Manager manager_2; + manager_2.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START + manager_2.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END + EXPECT_NO_THROW(manager_2.run_passes(f_2)); + + pass::Manager manager; + manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_START + manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END + // LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted + // but unrolls TI/Loop + manager.register_pass(); + + EXPECT_NO_THROW(manager.run_passes(f)); + } +} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp index 750a0e4af5e..9ab20c3eda4 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp @@ -10,6 +10,13 @@ using namespace LayerTestsDefinitions; namespace { +std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT, +}; + const std::vector inShapes = { {3}, {100, 100}, @@ -27,6 +34,7 @@ const std::vector iterationCount { INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest, ::testing::Combine( + ::testing::ValuesIn(transformation), ::testing::ValuesIn(iterationCount), ::testing::ValuesIn(inShapes), ::testing::ValuesIn(inputPrecisions), diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp new file mode 100644 index 00000000000..8e975434347 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "common_test_utils/test_constants.hpp" + +namespace SubgraphTestsDefinitions { + std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API + }; + + std::vector input_sizes = { + 80, + 32, + 64, + 100, + 25 + }; + + std::vector hidden_sizes = { + 128, + 200, + 300, + 24, + 32, + }; + + std::map additional_config = { + }; + + INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest, + ::testing::Combine( + ::testing::ValuesIn(transformation), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::ValuesIn(input_sizes), + ::testing::ValuesIn(hidden_sizes), + ::testing::Values(additional_config)), + MemoryLSTMCellTest::getTestCaseName); +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp index 3468d2b417f..4c18ee6ea05 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp @@ -7,6 +7,15 @@ namespace SubgraphTestsDefinitions { namespace { + +std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API +}; + std::vector input_sizes = { 80, 32, @@ -28,6 +37,7 @@ std::map additional_config = { INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest, ::testing::Combine( + ::testing::ValuesIn(transformation), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::Values(InferenceEngine::Precision::FP32), ::testing::ValuesIn(input_sizes), diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp index 2ab1357f674..c04b76705cc 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/memory.cpp @@ -10,9 +10,17 @@ using namespace LayerTestsDefinitions; namespace { +std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT +}; + const std::vector inShapes = { {1, 1}, - {1, 2} + {1, 2}, + {1, 10} }; const std::vector inputPrecisions = { @@ -22,11 +30,13 @@ const std::vector inputPrecisions = { const std::vector iterationCount { 1, 3, + 4, 10 }; INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest, ::testing::Combine( + ::testing::ValuesIn(transformation), ::testing::ValuesIn(iterationCount), ::testing::ValuesIn(inShapes), ::testing::ValuesIn(inputPrecisions), diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index 9f2c05ab6d1..cb4cc459a95 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -64,5 +64,13 @@ std::vector disabledTestPatterns() { R"(.*CachingSupport.*_batch2_.*)", // TODO: Issue 51525 R"(.*CachingSupport.*KSOFunction.*)", + // TODO: Issue 57363 (Param -> Result subgraphs) + R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)", + // TODO: Issue 57368 (accuracy) + R"(.*smoke_MemoryTest.*LOW_LATENCY.*IS=\(1.10\).*)", + R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)", + R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)", + R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)", + R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)", }; } diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp index a8d651f6c80..5818a40d4ec 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_LSTMCell.cpp @@ -6,6 +6,14 @@ #include "common_test_utils/test_constants.hpp" namespace SubgraphTestsDefinitions { + std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API + }; + std::vector input_sizes = { 80, 32, @@ -30,6 +38,7 @@ namespace SubgraphTestsDefinitions { INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest, ::testing::Combine( + ::testing::ValuesIn(transformation), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(InferenceEngine::Precision::FP32), ::testing::ValuesIn(input_sizes), diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp index 2d94617991d..d59c023773b 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/multiple_LSTMCell.cpp @@ -7,6 +7,15 @@ namespace SubgraphTestsDefinitions { namespace { + +std::vector transformation { + ngraph::helpers::MemoryTransformation::NONE, + ngraph::helpers::MemoryTransformation::LOW_LATENCY, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2, + ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API +}; + std::vector input_sizes = { 80, 32, @@ -31,6 +40,7 @@ std::map additional_config = { INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest, ::testing::Combine( + ::testing::ValuesIn(transformation), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(InferenceEngine::Precision::FP32), ::testing::ValuesIn(input_sizes), diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp index 17bf8d31cf0..7ae1d21a886 100644 --- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp @@ -39,7 +39,7 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) { // Apply LowLatency and UnrollTensorIterator transformations ngraph::pass::Manager manager; - manager.register_pass(); // LowLatency enables UnrollTI + manager.register_pass(); // LowLatency enables UnrollTI manager.run_passes(function); LoadNetwork(); IE_SUPPRESS_DEPRECATED_START diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp index 947f0fa1e1a..a6f250301b6 100644 --- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_LSTMCell.hpp @@ -12,12 +12,4 @@ TEST_P(MemoryLSTMCellTest, CompareWithRefs) { Run(); }; -TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyTransformation) { - RunLowLatency(); -}; - -TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) { - RunLowLatency(true); -}; - } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp index 5dd18ff4463..0135c09e170 100644 --- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/multiple_LSTMCell.hpp @@ -12,12 +12,4 @@ TEST_P(MultipleLSTMCellTest, CompareWithRefs) { Run(); }; -TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyTransformation) { - RunLowLatency(); -}; - -TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) { - RunLowLatency(true); -}; - } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp index ca16e30148e..59ad6c54e5a 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp @@ -14,6 +14,7 @@ namespace LayerTestsDefinitions { using MemoryTestParams = std::tuple< + ngraph::helpers::MemoryTransformation, // Apply Memory transformation int64_t, // iterationCount InferenceEngine::SizeVector, // inputShape InferenceEngine::Precision, // netPrecision @@ -28,9 +29,17 @@ protected: std::vector>> CalculateRefs() override; void SetUp() override; private: + void CreateTIFunc(); + void CreateCommonFunc(); + void ApplyLowLatency(); + InferenceEngine::Precision netPrecision; ngraph::EvaluationContext eval_context; + ngraph::helpers::MemoryTransformation transformation; + int64_t iteration_count; + ngraph::element::Type ngPrc; + InferenceEngine::SizeVector inputShape; }; } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp index 489431e2a9b..cc0aeb26e2f 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_LSTMCell.hpp @@ -10,6 +10,7 @@ namespace SubgraphTestsDefinitions { typedef std::tuple< + ngraph::helpers::MemoryTransformation, // Apply Memory transformation std::string, // Target device name InferenceEngine::Precision, // Network precision size_t, // Input size @@ -21,9 +22,13 @@ class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon, public testing::WithParamInterface { private: // you have to Unroll TI manually and remove memory untill ngraph supports it + // since we switching models we need to generate and save weights biases and inputs in SetUp void switchToNgraphFriendlyModel(); void CreatePureTensorIteratorModel(); - // since we switching models we need to generate and save weights biases and inputs in SetUp + void InitMemory(); + void ApplyLowLatency(); + + ngraph::helpers::MemoryTransformation transformation; std::vector input_bias; std::vector input_weights; std::vector hidden_memory_init; @@ -34,7 +39,6 @@ private: protected: void SetUp() override; void Run() override; - void RunLowLatency(bool regular_api = false); public: static std::string getTestCaseName(const testing::TestParamInfo &obj); }; diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp index 7c1e72a7bb7..7932ba39a07 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/multiple_LSTMCell.hpp @@ -10,6 +10,7 @@ namespace SubgraphTestsDefinitions { typedef std::tuple< + ngraph::helpers::MemoryTransformation, // Apply Memory transformation std::string, // Target device name InferenceEngine::Precision, // Network precision size_t, // Input size @@ -21,9 +22,12 @@ class MultipleLSTMCellTest : public LayerTestsUtils::LayerTestsCommon, public testing::WithParamInterface { private: // you have to Unroll TI manually and remove memory untill ngraph supports it + // since we switching models we need to generate and save weights biases and inputs in SetUp void switchToNgraphFriendlyModel(); void CreatePureTensorIteratorModel(); - // since we switching models we need to generate and save weights biases and inputs in SetUp + void InitMemory(); + void ApplyLowLatency(); + size_t hiddenSize; std::vector input_bias; std::vector input_weights; @@ -33,10 +37,10 @@ private: std::vector weights_2_vals; std::vector reccurrenceWeights_vals; std::vector bias_vals; + ngraph::helpers::MemoryTransformation transformation; protected: void SetUp() override; void Run() override; - void RunLowLatency(bool regular_api = false); public: static std::string getTestCaseName(const testing::TestParamInfo &obj); }; diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp index 0984a4dbeab..c059768c4a2 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/memory.cpp @@ -3,10 +3,18 @@ // #include +#include +#include +#include +#include #include "ngraph/opsets/opset7.hpp" #include "ngraph_functions/builders.hpp" +#include "ngraph/pass/low_latency.hpp" #include "shared_test_classes/single_layer/memory.hpp" +using namespace ngraph; +using namespace opset7; + namespace LayerTestsDefinitions { std::string MemoryTest::getTestCaseName(const testing::TestParamInfo &obj) { @@ -14,9 +22,11 @@ namespace LayerTestsDefinitions { InferenceEngine::Precision netPrecision; InferenceEngine::SizeVector inputShape; std::string targetDevice; - std::tie(iteration_count, inputShape, netPrecision, targetDevice) = obj.param; + ngraph::helpers::MemoryTransformation transformation; + std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = obj.param; std::ostringstream result; + result << "transformation=" << transformation << "_"; result << "iteration_count=" << iteration_count << "_"; result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_"; result << "netPRC=" << netPrecision.name() << "_"; @@ -26,20 +36,17 @@ namespace LayerTestsDefinitions { } void MemoryTest::SetUp() { - using namespace ngraph; - InferenceEngine::SizeVector inputShape; - std::tie(iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam(); + ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto param = ngraph::builder::makeParams(ngPrc, {inputShape}); - auto variable = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"}); - auto read_value = std::make_shared(param.at(0), variable); - auto add = std::make_shared(read_value, param.at(0)); - auto assign = std::make_shared(add, variable); - auto res = std::make_shared(add); - function = std::make_shared(ResultVector{res}, SinkVector{assign}, param, "TestMemory"); + if (transformation == ngraph::helpers::MemoryTransformation::NONE) { + CreateCommonFunc(); + } else { + CreateTIFunc(); + ApplyLowLatency(); + } - auto hostTensor = std::make_shared(ngPrc, inputShape); + auto hostTensor = std::make_shared(ngPrc, inputShape); auto variable_context = std::make_shared>(VariableContext()); auto variable_value = std::make_shared(hostTensor); variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value); @@ -48,6 +55,7 @@ namespace LayerTestsDefinitions { void MemoryTest::Run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED() using namespace LayerTestsUtils; auto crashHandler = [](int errCode) { auto &s = Summary::getInstance(); @@ -68,7 +76,13 @@ namespace LayerTestsDefinitions { } try { - LoadNetwork(); + if (transformation != ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) { + LoadNetwork(); + } else { + CoreConfiguration(this); + ConfigureNetwork(); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } GenerateInputs(); for (int64_t i = 0; i < iteration_count; ++i) { Infer(); @@ -88,12 +102,12 @@ namespace LayerTestsDefinitions { } } - std::vector>> MemoryTest::CalculateRefs() { + std::vector>> MemoryTest::CalculateRefs() { using namespace ngraph; function->validate_nodes_and_infer_types(); auto referenceInputs = std::vector>(inputs.size()); - auto refInputsTypes = std::vector(inputs.size()); + auto refInputsTypes = std::vector(inputs.size()); HostTensorVector inputTensors; for (auto & input : inputs) { const auto &dataSize = input->byteSize(); @@ -104,17 +118,25 @@ namespace LayerTestsDefinitions { const auto lockedMemory = memory->wmap(); const auto buffer = lockedMemory.as(); - auto hostTensor = std::make_shared(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()), + auto hostTensor = std::make_shared(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()), tensorDesc.getDims()); hostTensor->write(buffer, dataSize); inputTensors.push_back(hostTensor); } + // evaluate method is not implemented for TI op. + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + const auto &outInfo = executableNetwork.GetOutputsInfo(); - HostTensorVector outputTensors(outInfo.size(), std::make_shared()); + HostTensorVector outputTensors(outInfo.size()); + for (auto& outTensor : outputTensors) { + outTensor = std::make_shared(); + } function->evaluate(outputTensors, inputTensors, eval_context); - std::vector>> outputs(outInfo.size()); + std::vector>> outputs(outInfo.size()); for (size_t idx = 0; idx < outInfo.size(); ++idx) { outputs[idx].first = outputTensors[idx]->get_element_type(); outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes()); @@ -123,5 +145,61 @@ namespace LayerTestsDefinitions { return outputs; } + void MemoryTest::CreateTIFunc() { + auto param = builder::makeParams(ngPrc, {inputShape}).at(0); + std::vector> shape = {{static_cast(iteration_count), 1}}; + auto iter_count = builder::makeParams(ngPrc, shape).at(0); + + // Body + auto X = builder::makeParams(ngPrc, {inputShape}).at(0); + auto Y = builder::makeParams(ngPrc, {inputShape}).at(0); + auto Iter = builder::makeParams(ngPrc, {Shape{1, 1}}).at(0); + auto add = std::make_shared(X, Y); + auto res = std::make_shared(add); + auto Iter_res = std::make_shared(Iter); + auto body = std::make_shared(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter}); + + // TI construction + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_body(body); + + tensor_iterator->set_merged_input(X, param, res); + tensor_iterator->set_invariant_input(Y, param); + tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0); + + auto output = tensor_iterator->get_iter_value(res, -1); + auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0); + function = std::make_shared(OutputVector{output, output_iter}, + ParameterVector{param, iter_count}, + "PureTI"); + } + + void MemoryTest::CreateCommonFunc() { + auto param = builder::makeParams(ngPrc, {inputShape}); + auto variable = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"}); + auto read_value = std::make_shared(param.at(0), variable); + auto add = std::make_shared(read_value, param.at(0)); + auto assign = std::make_shared(add, variable); + auto res = std::make_shared(add); + function = std::make_shared(ResultVector{res}, SinkVector{assign}, param, "TestMemory"); + } + + void MemoryTest::ApplyLowLatency() { + if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) { + function->validate_nodes_and_infer_types(); + pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) { + function->validate_nodes_and_infer_types(); + pass::Manager manager; + manager.register_pass(false); + manager.run_passes(function); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + InferenceEngine::lowLatency2(cnnNetwork, iteration_count); + } + } + } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp index a8d5c067334..bac0c293add 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_LSTMCell.cpp @@ -9,6 +9,9 @@ #include "ngraph_functions/builders.hpp" #include "shared_test_classes/subgraph/memory_LSTMCell.hpp" +using namespace ngraph; +using namespace opset7; + namespace SubgraphTestsDefinitions { std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo &obj) { @@ -17,9 +20,11 @@ namespace SubgraphTestsDefinitions { size_t inputSize; size_t hiddenSize; std::map config; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param; + ngraph::helpers::MemoryTransformation transformation; + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param; std::ostringstream result; + result << "transformation=" << transformation << "_"; result << "netPrecision=" << netPrecision.name() << "_"; result << "IS=" << inputSize << "_"; result << "HS=" << hiddenSize << "_"; @@ -34,7 +39,7 @@ namespace SubgraphTestsDefinitions { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); configuration.insert(config.begin(), config.end()); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); @@ -51,49 +56,53 @@ namespace SubgraphTestsDefinitions { reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f); bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f); - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); - auto permute_in_params = std::make_shared(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}}); - auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); + auto permute_in_params = std::make_shared(element::i64, Shape{3}, Shape{{1, 0, 2}}); + auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto cell_memory_read = std::make_shared(cell_memory_constant, "cell_memory"); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto var_cell = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"}); + auto var_hidden = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"}); + auto cell_memory_read = std::make_shared(cell_memory_constant, var_cell); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); - auto hidden_memory_read = std::make_shared(hidden_memory_constant, "hidden_memory"); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_read = std::make_shared(hidden_memory_constant, var_hidden); // Body - inputs - auto X = std::make_shared(ngPrc, ngraph::Shape{1, 1, inputSize}); - auto H_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X = std::make_shared(ngPrc, Shape{1, 1, inputSize}); + auto H_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); // Body - layers - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(X, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(X, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); // body - outputs auto H_o = lstm->output(0); auto C_o = lstm->output(1); auto unsqueeze_o = unsqueeze->output(0); - auto body = std::make_shared(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t}); + auto body = std::make_shared(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t}); // TI construction - auto tensor_iterator = std::make_shared(); + auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); tensor_iterator->set_invariant_input(X, permute_in); tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o); @@ -107,27 +116,27 @@ namespace SubgraphTestsDefinitions { out_hidden.get_tensor().set_element_type(ngPrc); out_cell.get_tensor().set_element_type(ngPrc); - auto cell_memory_write = std::make_shared(out_cell, "cell_memory"); - auto hidden_memory_write = std::make_shared(out_hidden, "hidden_memory"); + auto cell_memory_write = std::make_shared(out_cell, var_cell); + auto hidden_memory_write = std::make_shared(out_hidden, var_hidden); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, ngraph::Shape{4}, + auto final_reshape_pattern = std::make_shared(element::i64, Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(out_unsqueeze, final_reshape_pattern, false); + auto final_reshape = std::make_shared(out_unsqueeze, final_reshape_pattern, false); cell_memory_write->add_control_dependency(cell_memory_read); - final_reshape->add_control_dependency(cell_memory_write); - hidden_memory_write->add_control_dependency(hidden_memory_read); - final_reshape->add_control_dependency(hidden_memory_write); - function = std::make_shared(final_reshape, input_parameter, "TI_with_memory"); + function = std::make_shared(OutputVector{final_reshape}, + SinkVector{cell_memory_write, hidden_memory_write}, + input_parameter, + "TI_with_memory"); } void MemoryLSTMCellTest::switchToNgraphFriendlyModel() { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); std::vector input_dims { 1, inputSize }; @@ -135,46 +144,46 @@ namespace SubgraphTestsDefinitions { std::vector hidden_memory_dims {1, hiddenSize}; std::vector cell_memory_dims {1, hiddenSize}; - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); // Body - layers - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(unsqueeze_input, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(unsqueeze_input, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode, + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(unsqueeze, final_reshape_pattern, false); + auto final_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto final_reshape = std::make_shared(unsqueeze, final_reshape_pattern, false); - function = std::make_shared(final_reshape, input_parameter, "TI_unrolled_without_memory"); + function = std::make_shared(final_reshape, input_parameter, "TI_unrolled_without_memory"); } void MemoryLSTMCellTest::CreatePureTensorIteratorModel() { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); std::vector input_dims { 1, inputSize }; @@ -182,49 +191,49 @@ namespace SubgraphTestsDefinitions { std::vector hidden_memory_dims {1, hiddenSize}; std::vector cell_memory_dims {1, hiddenSize}; - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); - auto permute_in_params = std::make_shared(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}}); - auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); + auto permute_in_params = std::make_shared(element::i64, Shape{3}, Shape{{1, 0, 2}}); + auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); // Body - inputs - auto X = std::make_shared(ngPrc, ngraph::Shape{1, 1, inputSize}); - auto H_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X = std::make_shared(ngPrc, Shape{1, 1, inputSize}); + auto H_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); H_t->set_friendly_name("hidden_state_1"); C_t->set_friendly_name("cell_state_1"); // Body - layers - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(X, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(X, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); // body - outputs auto H_o = lstm->output(0); auto C_o = lstm->output(1); auto unsqueeze_o = unsqueeze->output(0); - auto body = std::make_shared(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t}); + auto body = std::make_shared(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t}); // TI construction - auto tensor_iterator = std::make_shared(); + auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0); tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o); @@ -237,56 +246,35 @@ namespace SubgraphTestsDefinitions { out_hidden.get_tensor().set_element_type(ngPrc); out_cell.get_tensor().set_element_type(ngPrc); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, ngraph::Shape{4}, + auto final_reshape_pattern = std::make_shared(element::i64, Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(out_unsqueeze, final_reshape_pattern, false); + auto final_reshape = std::make_shared(out_unsqueeze, final_reshape_pattern, false); - function = std::make_shared(final_reshape, input_parameter, "PureTI"); + function = std::make_shared(final_reshape, input_parameter, "PureTI"); } void MemoryLSTMCellTest::Run() { SKIP_IF_CURRENT_TEST_IS_DISABLED() - - IE_SUPPRESS_DEPRECATED_START - LoadNetwork(); - auto states = executableNetwork.QueryState(); - for (auto& state : states) { - auto name = state.GetName(); - if (name == "cell_memory") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(), - cell_memory_init.data(), cell_memory_init.size()); - state.SetState(blob); - } else if (name == "hidden_memory") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(), - hidden_memory_init.data(), hidden_memory_init.size()); - state.SetState(blob); - } else { - GTEST_FAIL() << "unknown memory state"; - } + if (transformation != ngraph::helpers::MemoryTransformation::NONE) { + ApplyLowLatency(); + } else { + LoadNetwork(); } - IE_SUPPRESS_DEPRECATED_END + + InitMemory(); GenerateInputs(); Infer(); - switchToNgraphFriendlyModel(); + + // Calculate ref values + if (transformation == ngraph::helpers::MemoryTransformation::NONE) { + switchToNgraphFriendlyModel(); + } else { + CreatePureTensorIteratorModel(); + } Validate(); } - void MemoryLSTMCellTest::RunLowLatency(bool regular_api) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - - CreatePureTensorIteratorModel(); - if (regular_api) { - cnnNetwork = InferenceEngine::CNNNetwork{function}; - InferenceEngine::LowLatency(cnnNetwork); - ConfigureNetwork(); - executableNetwork = core->LoadNetwork(static_cast(cnnNetwork), targetDevice, configuration); - } else { - // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator - ngraph::pass::Manager manager; - manager.register_pass(); // LowLatency enables UnrollTI - manager.run_passes(function); - LoadNetwork(); - } + void MemoryLSTMCellTest::InitMemory() { IE_SUPPRESS_DEPRECATED_START auto states = executableNetwork.QueryState(); for (auto& state : states) { @@ -304,13 +292,52 @@ namespace SubgraphTestsDefinitions { } } IE_SUPPRESS_DEPRECATED_END - GenerateInputs(); - Infer(); + } + void MemoryLSTMCellTest::ApplyLowLatency() { + // Calculate values after LowLatency transformation CreatePureTensorIteratorModel(); - ngraph::pass::Manager manager_2; - manager_2.register_pass(); - manager_2.run_passes(function); - Validate(); + if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) { + function->validate_nodes_and_infer_types(); + // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator + pass::Manager manager; + NGRAPH_SUPPRESS_DEPRECATED_START + manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI + manager.run_passes(function); + bool ti_found = helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + LoadNetwork(); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) { + function->validate_nodes_and_infer_types(); + // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator + + pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + bool ti_found = helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + LoadNetwork(); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + IE_SUPPRESS_DEPRECATED_START + InferenceEngine::LowLatency(cnnNetwork); + IE_SUPPRESS_DEPRECATED_END + + bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction()); + EXPECT_EQ(ti_found, true); + + ConfigureNetwork(); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + InferenceEngine::lowLatency2(cnnNetwork); + + bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction()); + EXPECT_EQ(ti_found, false); + + ConfigureNetwork(); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } } } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp index d854f704930..09f8020df41 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/multiple_LSTMCell.cpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/opsets/opset5.hpp" +#include "ie_transformations.hpp" +#include "ngraph/opsets/opset7.hpp" +#include "ngraph/op/util/variable_context.hpp" #include "ngraph/pass/low_latency.hpp" -#include "ie_transformations.hpp" -#include "transformations/control_flow/unroll_tensor_iterator.hpp" - #include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" #include "shared_test_classes/subgraph/multiple_LSTMCell.hpp" +using namespace ngraph; +using namespace opset7; + namespace SubgraphTestsDefinitions { std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo &obj) { std::string targetDevice; @@ -19,9 +22,11 @@ std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo config; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param; + ngraph::helpers::MemoryTransformation transformation; + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param; std::ostringstream result; + result << "transformation=" << transformation << "_"; result << "netPrecision=" << netPrecision.name() << "_"; result << "IS=" << inputSize << "_"; result << "HS=" << hiddenSize << "_"; @@ -33,7 +38,7 @@ void MultipleLSTMCellTest::SetUp() { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); configuration.insert(config.begin(), config.end()); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); @@ -51,51 +56,55 @@ void MultipleLSTMCellTest::SetUp() { reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f); bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f); - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); - auto permute_in_params = std::make_shared(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}}); - auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); + auto permute_in_params = std::make_shared(element::i64, Shape{3}, Shape{{1, 0, 2}}); + auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto cell_memory_read = std::make_shared(cell_memory_constant, "cell_memory"); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto var_cell = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"}); + auto var_hidden = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"}); + auto cell_memory_read = std::make_shared(cell_memory_constant, var_cell); cell_memory_read->set_friendly_name("cell_memory"); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); - auto hidden_memory_read = std::make_shared(hidden_memory_constant, "hidden_memory"); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_read = std::make_shared(hidden_memory_constant, var_hidden); hidden_memory_read->set_friendly_name("hidden_memory"); // Body - inputs - auto X = std::make_shared(ngPrc, ngraph::Shape{1, 1, inputSize}); - auto H_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X = std::make_shared(ngPrc, Shape{1, 1, inputSize}); + auto H_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); // Body - layers - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(X, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(X, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); // body - outputs auto H_o = lstm->output(0); auto C_o = lstm->output(1); auto unsqueeze_o = unsqueeze->output(0); - auto body = std::make_shared(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t}); + auto body = std::make_shared(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t}); // TI construction - auto tensor_iterator = std::make_shared(); + auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); tensor_iterator->set_invariant_input(X, permute_in); tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o); @@ -108,49 +117,53 @@ void MultipleLSTMCellTest::SetUp() { out_hidden.get_tensor().set_element_type(ngPrc); out_cell.get_tensor().set_element_type(ngPrc); - auto cell_memory_write = std::make_shared(out_cell, "cell_memory"); - auto hidden_memory_write = std::make_shared(out_hidden, "hidden_memory"); + auto cell_memory_write = std::make_shared(out_cell, var_cell); + auto hidden_memory_write = std::make_shared(out_hidden, var_hidden); - auto first_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto first_reshape = std::make_shared(out_unsqueeze, first_reshape_pattern, false); + auto first_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto first_reshape = std::make_shared(out_unsqueeze, first_reshape_pattern, false); // End of TI 1 - auto inbetween_squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); + auto inbetween_squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); // Second TI - auto cell_memory_2_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto cell_memory_2_read = std::make_shared(cell_memory_2_constant, "cell_memory_2"); + auto var_cell_2 = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_2"}); + auto var_hidden_2 = + std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_2"}); + auto cell_memory_2_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_2_read = std::make_shared(cell_memory_2_constant, var_cell_2); cell_memory_2_read->set_friendly_name("cell_memory_2"); - auto hidden_memory_2_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); - auto hidden_memory_2_read = std::make_shared(hidden_memory_2_constant, "hidden_memory_2"); + auto hidden_memory_2_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_2_read = std::make_shared(hidden_memory_2_constant, var_hidden_2); hidden_memory_2_read->set_friendly_name("hidden_memory_2"); // Body - inputs - auto X_2 = std::make_shared(ngPrc, ngraph::Shape{1, 1, hiddenSize}); - auto H_t_2 = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t_2 = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X_2 = std::make_shared(ngPrc, Shape{1, 1, hiddenSize}); + auto H_t_2 = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t_2 = std::make_shared(ngPrc, Shape{1, hiddenSize}); // Body - layers - auto squeeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze_2 = std::make_shared(X_2, squeeze_2_const); + auto squeeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze_2 = std::make_shared(X_2, squeeze_2_const); - auto weightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); - auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode_2 = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm_2 = std::make_shared(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize); + auto weightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); + auto reccurrenceWeightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode_2 = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm_2 = std::make_shared(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize); - auto unsqueeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); + auto unsqueeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); // body - outputs auto H_o_2 = lstm_2->output(0); auto C_o_2 = lstm_2->output(1); auto unsqueeze_o_2 = unsqueeze_2->output(0); - auto body_2 = std::make_shared(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2}); + auto body_2 = std::make_shared(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2}); // TI construction - auto tensor_iterator_2 = std::make_shared(); + auto tensor_iterator_2 = std::make_shared(); tensor_iterator_2->set_body(body_2); tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze); tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2); @@ -163,33 +176,28 @@ void MultipleLSTMCellTest::SetUp() { out_hidden_2.get_tensor().set_element_type(ngPrc); out_cell_2.get_tensor().set_element_type(ngPrc); - auto cell_memory_2_write = std::make_shared(out_cell_2, "cell_memory_2"); - auto hidden_memory_2_write = std::make_shared(out_hidden_2, "hidden_memory_2"); + auto cell_memory_2_write = std::make_shared(out_cell_2, var_cell_2); + auto hidden_memory_2_write = std::make_shared(out_hidden_2, var_hidden_2); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(out_unsqueeze_2, final_reshape_pattern, false); + auto final_reshape_pattern = std::make_shared(element::i64, Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto final_reshape = std::make_shared(out_unsqueeze_2, final_reshape_pattern, false); cell_memory_write->add_control_dependency(cell_memory_read); - final_reshape->add_control_dependency(cell_memory_write); - hidden_memory_write->add_control_dependency(hidden_memory_read); - final_reshape->add_control_dependency(hidden_memory_write); - cell_memory_2_write->add_control_dependency(cell_memory_2_read); - final_reshape->add_control_dependency(cell_memory_2_write); - hidden_memory_2_write->add_control_dependency(hidden_memory_2_read); - final_reshape->add_control_dependency(hidden_memory_2_write); - function = std::make_shared(final_reshape, input_parameter, "TI_with_memory"); + function = std::make_shared(OutputVector {final_reshape}, + SinkVector{cell_memory_write, hidden_memory_write, cell_memory_2_write, hidden_memory_2_write}, + input_parameter, + "TI_with_memory"); } void MultipleLSTMCellTest::switchToNgraphFriendlyModel() { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); std::vector input_dims { 1, inputSize }; @@ -197,72 +205,72 @@ void MultipleLSTMCellTest::switchToNgraphFriendlyModel() { std::vector hidden_memory_dims {1, hiddenSize}; std::vector cell_memory_dims {1, hiddenSize}; - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); // Body 1 - layers - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(unsqueeze_input, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(unsqueeze_input, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode, + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); - auto first_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto first_reshape = std::make_shared(unsqueeze, first_reshape_pattern, false); + auto first_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto first_reshape = std::make_shared(unsqueeze, first_reshape_pattern, false); // Body 1 - end - auto inbetween_squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); + auto inbetween_squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); // Body 2 - layers - auto cell_memory_2_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_2_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_2_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_2_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); - auto squeeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze_2 = std::make_shared(inbetween_squeeze, squeeze_2_const); + auto squeeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze_2 = std::make_shared(inbetween_squeeze, squeeze_2_const); - auto weightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); - auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode_2 = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm_2 = std::make_shared(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2, + auto weightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); + auto reccurrenceWeightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode_2 = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm_2 = std::make_shared(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize); - auto unsqueeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); + auto unsqueeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(unsqueeze_2, final_reshape_pattern, false); + auto final_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto final_reshape = std::make_shared(unsqueeze_2, final_reshape_pattern, false); // Body 2 - end - function = std::make_shared(final_reshape, input_parameter, "TI_unrolled_without_memory"); + function = std::make_shared(final_reshape, input_parameter, "TI_unrolled_without_memory"); } void MultipleLSTMCellTest::CreatePureTensorIteratorModel() { InferenceEngine::Precision netPrecision; std::map config; size_t inputSize; - std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); + std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); std::vector input_dims { 1, inputSize }; @@ -270,49 +278,49 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() { std::vector hidden_memory_dims {1, hiddenSize}; std::vector cell_memory_dims {1, hiddenSize}; - auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims}); + auto input_parameter = builder::makeParams(ngPrc, {input_dims}); - auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias); - auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD); + auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias); + auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD); - auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights); - auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY); + auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights); + auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY); - auto unsqueeze_input_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); + auto unsqueeze_input_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_input = std::make_shared(mul, unsqueeze_input_const); - auto permute_in_params = std::make_shared(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}}); - auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); + auto permute_in_params = std::make_shared(element::i64, Shape{3}, Shape{{1, 0, 2}}); + auto permute_in = std::make_shared(unsqueeze_input, permute_in_params); - auto cell_memory_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); // Body - inputs - auto X = std::make_shared(ngPrc, ngraph::Shape{1, 1, inputSize}); - auto H_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X = std::make_shared(ngPrc, Shape{1, 1, inputSize}); + auto H_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t = std::make_shared(ngPrc, Shape{1, hiddenSize}); H_t->set_friendly_name("hidden_state_1"); C_t->set_friendly_name("cell_state_1"); // Body - layers - auto squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze = std::make_shared(X, squeeze_const); + auto squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze = std::make_shared(X, squeeze_const); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); + auto weightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals); + auto reccurrenceWeightsNode = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm = std::make_shared(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize); - auto unsqueeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); + auto unsqueeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze = std::make_shared(lstm->output(0), unsqueeze_const); // body - outputs auto H_o = lstm->output(0); auto C_o = lstm->output(1); auto unsqueeze_o = unsqueeze->output(0); - auto body = std::make_shared(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t}); + auto body = std::make_shared(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t}); // TI construction - auto tensor_iterator = std::make_shared(); + auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0); tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o); @@ -326,44 +334,44 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() { out_cell.get_tensor().set_element_type(ngPrc); tensor_iterator->validate_and_infer_types(); - auto first_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto first_reshape = std::make_shared(out_unsqueeze, first_reshape_pattern, false); + auto first_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto first_reshape = std::make_shared(out_unsqueeze, first_reshape_pattern, false); // End of TI 1 - auto inbetween_squeeze_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); + auto inbetween_squeeze_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto inbetween_squeeze = std::make_shared(first_reshape, inbetween_squeeze_const); // Second TI - auto cell_memory_2_constant = ngraph::builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); + auto cell_memory_2_constant = builder::makeConstant(ngPrc, cell_memory_dims, cell_memory_init); - auto hidden_memory_2_constant = ngraph::builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); + auto hidden_memory_2_constant = builder::makeConstant(ngPrc, hidden_memory_dims, hidden_memory_init); // Body - inputs - auto X_2 = std::make_shared(ngPrc, ngraph::Shape{1, 1, hiddenSize}); - auto H_t_2 = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); - auto C_t_2 = std::make_shared(ngPrc, ngraph::Shape{1, hiddenSize}); + auto X_2 = std::make_shared(ngPrc, Shape{1, 1, hiddenSize}); + auto H_t_2 = std::make_shared(ngPrc, Shape{1, hiddenSize}); + auto C_t_2 = std::make_shared(ngPrc, Shape{1, hiddenSize}); H_t_2->set_friendly_name("hidden_state_2"); C_t_2->set_friendly_name("cell_state_2"); // Body - layers - auto squeeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto squeeze_2 = std::make_shared(X_2, squeeze_2_const); + auto squeeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto squeeze_2 = std::make_shared(X_2, squeeze_2_const); - auto weightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); - auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); - auto biasNode_2 = ngraph::builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); - auto lstm_2 = std::make_shared(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize); + auto weightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals); + auto reccurrenceWeightsNode_2 = builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals); + auto biasNode_2 = builder::makeConstant(ngPrc, {4 * hiddenSize}, bias_vals); + auto lstm_2 = std::make_shared(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize); - auto unsqueeze_2_const = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes); - auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); + auto unsqueeze_2_const = std::make_shared(element::i64, Shape{1}, squeeze_axes); + auto unsqueeze_2 = std::make_shared(lstm_2->output(0), unsqueeze_2_const); // body - outputs auto H_o_2 = lstm_2->output(0); auto C_o_2 = lstm_2->output(1); auto unsqueeze_o_2 = unsqueeze_2->output(0); - auto body_2 = std::make_shared(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2}); + auto body_2 = std::make_shared(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2}); // TI construction - auto tensor_iterator_2 = std::make_shared(); + auto tensor_iterator_2 = std::make_shared(); tensor_iterator_2->set_body(body_2); tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0); tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2); @@ -376,70 +384,17 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() { out_hidden_2.get_tensor().set_element_type(ngPrc); out_cell_2.get_tensor().set_element_type(ngPrc); tensor_iterator_2->validate_and_infer_types(); - auto final_reshape_pattern = std::make_shared(ngraph::element::i64, - ngraph::Shape{4}, std::vector({1, 1, 1, hiddenSize})); - auto final_reshape = std::make_shared(out_unsqueeze_2, final_reshape_pattern, false); + auto final_reshape_pattern = std::make_shared(element::i64, + Shape{4}, std::vector({1, 1, 1, hiddenSize})); + auto final_reshape = std::make_shared(out_unsqueeze_2, final_reshape_pattern, false); - function = std::make_shared(final_reshape, input_parameter, "PureTI"); + function = std::make_shared(final_reshape, input_parameter, "PureTI"); } -void MultipleLSTMCellTest::Run() { - SKIP_IF_CURRENT_TEST_IS_DISABLED() +void MultipleLSTMCellTest::InitMemory() { InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32, InferenceEngine::SizeVector({1, hiddenSize}), InferenceEngine::Layout::NC); - LoadNetwork(); - IE_SUPPRESS_DEPRECATED_START - auto states = executableNetwork.QueryState(); - for (auto& state : states) { - auto name = state.GetName(); - if (name == "cell_memory") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description, - cell_memory_init.data(), cell_memory_init.size()); - state.SetState(blob); - } else if (name == "hidden_memory") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description, - hidden_memory_init.data(), hidden_memory_init.size()); - state.SetState(blob); - } else if (name == "cell_memory_2") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description, - cell_memory_init.data(), cell_memory_init.size()); - state.SetState(blob); - } else if (name == "hidden_memory_2") { - auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description, - hidden_memory_init.data(), hidden_memory_init.size()); - state.SetState(blob); - } else { - GTEST_FAIL() << "unknown memory state"; - } - } - IE_SUPPRESS_DEPRECATED_END - GenerateInputs(); - Infer(); - switchToNgraphFriendlyModel(); - Validate(); -} - -void MultipleLSTMCellTest::RunLowLatency(bool regular_api) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32, - InferenceEngine::SizeVector({1, hiddenSize}), - InferenceEngine::Layout::NC); - // Calculate values after LowLatency transformation - CreatePureTensorIteratorModel(); - if (regular_api) { - cnnNetwork = InferenceEngine::CNNNetwork{function}; - InferenceEngine::LowLatency(cnnNetwork); - ConfigureNetwork(); - executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); - } else { - function->validate_nodes_and_infer_types(); - // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator - ngraph::pass::Manager manager; - manager.register_pass(); // LowLatency enables UnrollTI - manager.run_passes(function); - LoadNetwork(); - } IE_SUPPRESS_DEPRECATED_START auto states = executableNetwork.QueryState(); for (auto& state : states) { @@ -465,14 +420,73 @@ void MultipleLSTMCellTest::RunLowLatency(bool regular_api) { } } IE_SUPPRESS_DEPRECATED_END +} + +void MultipleLSTMCellTest::ApplyLowLatency() { + // Calculate values after LowLatency transformation + CreatePureTensorIteratorModel(); + if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) { + function->validate_nodes_and_infer_types(); + // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator + pass::Manager manager; + NGRAPH_SUPPRESS_DEPRECATED_START + manager.register_pass(); + NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI + manager.run_passes(function); + bool ti_found = helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + LoadNetwork(); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) { + function->validate_nodes_and_infer_types(); + // Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator + + pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + bool ti_found = helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + LoadNetwork(); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + IE_SUPPRESS_DEPRECATED_START + InferenceEngine::LowLatency(cnnNetwork); + IE_SUPPRESS_DEPRECATED_END + + bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction()); + EXPECT_EQ(ti_found, true); + + ConfigureNetwork(); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) { + cnnNetwork = InferenceEngine::CNNNetwork{function}; + InferenceEngine::lowLatency2(cnnNetwork); + + bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction()); + EXPECT_EQ(ti_found, false); + + ConfigureNetwork(); + executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration); + } +} + +void MultipleLSTMCellTest::Run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + if (transformation != ngraph::helpers::MemoryTransformation::NONE) { + ApplyLowLatency(); + } else { + LoadNetwork(); + } + + InitMemory(); GenerateInputs(); Infer(); - // Calculate ref values for Unrolled TI - CreatePureTensorIteratorModel(); - ngraph::pass::Manager manager_2; - manager_2.register_pass(); - manager_2.run_passes(function); + // Calculate ref values + if (transformation == ngraph::helpers::MemoryTransformation::NONE) { + switchToNgraphFriendlyModel(); + } else { + CreatePureTensorIteratorModel(); + } Validate(); } } // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp index de015677b4a..de2dbab0612 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp @@ -214,6 +214,15 @@ enum class SequenceTestsMode { CONVERT_TO_TI_RAND_SEQ_LEN_PARAM, }; +enum class MemoryTransformation { + NONE, + LOW_LATENCY, + LOW_LATENCY_REGULAR_API, + LOW_LATENCY_V2, + LOW_LATENCY_V2_REGULAR_API, + LOW_LATENCY_V2_ORIGINAL_INIT +}; + std::ostream &operator<<(std::ostream &os, const ReductionType &m); std::ostream &operator<<(std::ostream &os, const PadMode &m); @@ -297,5 +306,7 @@ std::ostream& operator<<(std::ostream & os, TensorIteratorBody type); std::ostream& operator<<(std::ostream & os, SequenceTestsMode type); +std::ostream& operator<<(std::ostream & os, MemoryTransformation type); + } // namespace helpers } // namespace ngraph diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp index 5de50203ba2..2c5a07540b0 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp @@ -817,5 +817,32 @@ std::ostream& operator<<(std::ostream & os, SequenceTestsMode type) { } return os; } + +std::ostream& operator<<(std::ostream & os, MemoryTransformation type) { + switch (type) { + case MemoryTransformation::NONE: + os << "NONE"; + break; + case MemoryTransformation::LOW_LATENCY_V2: + os << "LOW_LATENCY_V2"; + break; + case MemoryTransformation::LOW_LATENCY: + os << "LOW_LATENCY"; + break; + case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API: + os << "LOW_LATENCY_V2_REGULAR_API"; + break; + case MemoryTransformation::LOW_LATENCY_REGULAR_API: + os << "LOW_LATENCY_REGULAR_API"; + break; + case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT: + os << "LOW_LATENCY_V2_ORIGINAL_INIT"; + break; + default: + throw std::runtime_error("NOT_SUPPORTED_TYPE"); + } + return os; +} + } // namespace helpers } // namespace ngraph diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py index a363a1ca250..1df5f6cb7a0 100644 --- a/model-optimizer/mo/back/offline_transformations.py +++ b/model-optimizer/mo/back/offline_transformations.py @@ -11,7 +11,7 @@ def get_available_transformations(): try: from openvino.offline_transformations import ApplyLowLatencyTransformation # pylint: disable=import-error,no-name-in-module return { - 'LowLatency': ApplyLowLatencyTransformation, + 'LowLatency2': ApplyLowLatencyTransformation, } except Exception as e: return {} diff --git a/model-optimizer/mo/utils/cli_parser.py b/model-optimizer/mo/utils/cli_parser.py index 21d2873b54e..e6b2f2d2517 100644 --- a/model-optimizer/mo/utils/cli_parser.py +++ b/model-optimizer/mo/utils/cli_parser.py @@ -8,6 +8,7 @@ import os import re from collections import OrderedDict from itertools import zip_longest +from distutils.util import strtobool import numpy as np @@ -257,9 +258,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): help='Apply additional transformations. ' + 'Usage: "--transform transformation_name1[args],transformation_name2..." ' + 'where [args] is key=value pairs separated by semicolon. ' + - 'Examples: "--transform LowLatency" or ' + - ' "--transform LowLatency[num_iterations=2]" ' + - 'Available transformations: "LowLatency"', + 'Examples: "--transform LowLatency2" or ' + + ' "--transform LowLatency2[use_const_initializer=False]" ' + + 'Available transformations: "LowLatency2"', default="") common_group.add_argument('--disable_fusing', help='Turn off fusing of linear operations to Convolution', @@ -1151,6 +1152,14 @@ def isfloat(value): return False +def isbool(value): + try: + strtobool(value) + return True + except ValueError: + return False + + def convert_string_to_real_type(value: str): values = value.split(',') for i in range(len(values)): @@ -1159,6 +1168,8 @@ def convert_string_to_real_type(value: str): values[i] = int(value) elif isfloat(value): values[i] = float(value) + elif isbool(value): + values[i] = strtobool(value) return values[0] if len(values) == 1 else values diff --git a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py index 68a9994d206..c6c1c96e46a 100644 --- a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py +++ b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py @@ -905,64 +905,65 @@ class TransformChecker(unittest.TestCase): self.assertEqual(parse_transform(""), []) def test_single_pass(self): - self.assertEqual(parse_transform("LowLatency"), [("LowLatency", {})]) + self.assertEqual(parse_transform("LowLatency2"), [("LowLatency2", {})]) def test_single_pass_with_args(self): - self.assertEqual(parse_transform("LowLatency[num_iterations=2]"), - [("LowLatency", {"num_iterations": 2})]) + self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True]"), + [("LowLatency2", {"use_const_initializer": True})]) def test_single_pass_with_multiple_args(self): - self.assertEqual(parse_transform("LowLatency[num_iterations=2;dummy_attr=3.14]"), - [("LowLatency", {"num_iterations": 2, "dummy_attr": 3.14})]) + self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True;dummy_attr=3.14]"), + [("LowLatency2", {"use_const_initializer": True, "dummy_attr": 3.14})]) def test_multiple_passes_with_args(self): - self.assertEqual(parse_transform("LowLatency[num_iterations=2],DummyPass[type=ReLU]"), - [("LowLatency", {"num_iterations": 2}), + self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True],DummyPass[type=ReLU]"), + [("LowLatency2", {"use_const_initializer": True}), ("DummyPass", {"type": "ReLU"})]) def test_multiple_passes_with_args2(self): - self.assertEqual(parse_transform("LowLatency[num_iterations=2,3,4.15],DummyPass1,DummyPass2[types=ReLU,PReLU;values=1,2,3]"), - [("LowLatency", {"num_iterations": [2,3,4.15]}), + self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True,False],DummyPass1," + "DummyPass2[types=ReLU,PReLU;values=1,2,3]"), + [("LowLatency2", {"use_const_initializer": [True, False]}), ("DummyPass1", {}), ("DummyPass2", {"types": ["ReLU", "PReLU"], "values": [1,2,3]})]) def test_multiple_passes_no_args(self): - self.assertEqual(parse_transform("DummyPass,LowLatency2"), - [("DummyPass", {}), ("LowLatency2", {})]) + self.assertEqual(parse_transform("DummyPass,LowLatency22"), + [("DummyPass", {}), ("LowLatency22", {})]) def test_single_pass_neg(self): - self.assertRaises(Error, parse_transform, "LowLatency!") + self.assertRaises(Error, parse_transform, "LowLatency2!") def test_multiple_passes_neg(self): - self.assertRaises(Error, parse_transform, "LowLatency;DummyPass") + self.assertRaises(Error, parse_transform, "LowLatency2;DummyPass") def test_single_pass_with_args_neg1(self): - self.assertRaises(Error, parse_transform, "LowLatency[=2]") + self.assertRaises(Error, parse_transform, "LowLatency2[=2]") def test_single_pass_with_args_neg2(self): - self.assertRaises(Error, parse_transform, "LowLatency[key=]") + self.assertRaises(Error, parse_transform, "LowLatency2[key=]") def test_single_pass_with_args_neg3(self): - self.assertRaises(Error, parse_transform, "LowLatency[]") + self.assertRaises(Error, parse_transform, "LowLatency2[]") def test_single_pass_with_args_neg4(self): - self.assertRaises(Error, parse_transform, "LowLatency[key=value;]") + self.assertRaises(Error, parse_transform, "LowLatency2[key=value;]") def test_single_pass_with_args_neg5(self): - self.assertRaises(Error, parse_transform, "LowLatency[value]") + self.assertRaises(Error, parse_transform, "LowLatency2[value]") def test_single_pass_with_args_neg6(self): - self.assertRaises(Error, parse_transform, "LowLatency[key=value") + self.assertRaises(Error, parse_transform, "LowLatency2[key=value") @patch("mo.back.offline_transformations.get_available_transformations") def test_check_low_latency_is_available(self, available_transformations): - available_transformations.return_value = {"LowLatency": None} + available_transformations.return_value = {"LowLatency2": None} try: - check_available_transforms([("LowLatency" ,"")], True) + check_available_transforms([("LowLatency2", "")], True) except Error as e: self.assertTrue(False, "Exception \"{}\" is unexpected".format(e)) @patch("mo.back.offline_transformations.get_available_transformations") def test_check_dummy_pass_is_available(self, available_transformations): - available_transformations.return_value = {"LowLatency": None} + available_transformations.return_value = {"LowLatency2": None} self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True) diff --git a/ngraph/core/include/ngraph/pass/low_latency.hpp b/ngraph/core/include/ngraph/pass/low_latency.hpp index 86757edb800..507ffe3a21b 100644 --- a/ngraph/core/include/ngraph/pass/low_latency.hpp +++ b/ngraph/core/include/ngraph/pass/low_latency.hpp @@ -8,13 +8,14 @@ #include #include +#include namespace ngraph { namespace pass { /** - * @brief The transformation finds all TensorIterator layers in the network, + * @brief The transformation finds all TensorIterator/Loop layers in the network, * processes all back edges that describe a connection between Result and Parameter * of the TensorIterator body,and inserts ReadValue layer between Parameter * and the next layers after this Parameter, and Assign layer after the layers @@ -42,11 +43,50 @@ namespace ngraph * by step, the states will store between inferences. */ - class NGRAPH_API LowLatency : public ngraph::pass::MatcherPass + class NGRAPH_DEPRECATED("Use LowLatency2 instead.") NGRAPH_API LowLatency + : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; LowLatency(); }; + + /** + * @brief The transformation finds all TensorIterator/Loop layers in the network, + * processes all back edges that describe a connection between Result and Parameter + * of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the + * input and output corresponding to this back edge. + * Supported platforms: CPU, GNA. + * + * The example below describes the changes made by the transformation + * [] - TensorIterator body + * () - new layer + * BE - back-edge + * + * before applying the transformation: + * -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1-> + * + * after applying the transformation: + * ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign) + * \ + * ->... + * After applying the transformation, the resulting network can be inferred + * step by step, the states will store between inferences. + */ + class NGRAPH_API LowLatency2 : public ngraph::pass::FunctionPass + { + public: + NGRAPH_RTTI_DECLARATION; + + explicit LowLatency2(bool use_const_initializer = true) + : m_use_const_initializer(use_const_initializer) + { + } + + bool run_on_function(std::shared_ptr f) override; + + private: + bool m_use_const_initializer; + }; } // namespace pass } // namespace ngraph diff --git a/ngraph/core/src/op/tensor_iterator.cpp b/ngraph/core/src/op/tensor_iterator.cpp index 5252d2124f2..35162ef3ceb 100644 --- a/ngraph/core/src/op/tensor_iterator.cpp +++ b/ngraph/core/src/op/tensor_iterator.cpp @@ -129,7 +129,6 @@ void op::v0::TensorIterator::validate_and_infer_types() m_body->get_results().at(merged_input_description->m_body_value_index)->input(0); ends.push_back(body_value.get_node()->shared_from_this()); - auto body_value_partial_shape = body_value.get_partial_shape(); auto body_parameter = m_body->get_parameters().at(merged_input_description->m_body_parameter_index); diff --git a/ngraph/core/src/pass/low_latency.cpp b/ngraph/core/src/pass/low_latency.cpp index ea5effce4ab..d290eb14b7e 100644 --- a/ngraph/core/src/pass/low_latency.cpp +++ b/ngraph/core/src/pass/low_latency.cpp @@ -6,12 +6,29 @@ #include +#include #include +#include #include +#include #include +NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency2, "LowLatency2", 0); + +NGRAPH_SUPPRESS_DEPRECATED_START NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0); +using namespace std; +using namespace ngraph; + +namespace +{ + string generate_variable_name(const string& op_name, const string& param_name, int variable_idx) + { + return op_name + "/" + param_name + "/" + "variable_" + to_string(variable_idx); + } + +} // namespace ngraph::pass::LowLatency::LowLatency() { auto tensor_iterator = ngraph::pattern::wrap_type(); @@ -58,11 +75,12 @@ ngraph::pass::LowLatency::LowLatency() const auto& inputs_to = func->get_parameters() .at(merged_in->m_body_parameter_index) ->get_output_target_inputs(0); - const std::string variable_name(sub_graph_op->get_friendly_name() + "/" + - func->get_parameters() - .at(merged_in->m_body_parameter_index) - ->get_friendly_name() + - "/variable_" + std::to_string(variable_id)); + const std::string variable_name( + generate_variable_name(sub_graph_op->get_friendly_name(), + func->get_parameters() + .at(merged_in->m_body_parameter_index) + ->get_friendly_name(), + variable_id)); auto variable = std::make_shared( VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name}); auto read_value = std::make_shared( @@ -90,3 +108,178 @@ ngraph::pass::LowLatency::LowLatency() auto m = std::make_shared(tensor_iterator, "LowLatency"); register_matcher(m, callback); } +NGRAPH_SUPPRESS_DEPRECATED_END + +void UnrollSingleIteration(const shared_ptr& sub_graph_op, + const shared_ptr& outer_f) +{ + using namespace opset7; + + const auto& params = sub_graph_op->get_function()->get_parameters(); + const auto& results = sub_graph_op->get_function()->get_results(); + + // before: Layer1 -> TI [input -> bodyParameter -> Layer2 -> ...] + // after: Layer1 -> Layer2 ->... + for (const auto& in : sub_graph_op->get_input_descriptions()) + { + const auto& connect_to = sub_graph_op->get_input_source_output(in->m_input_index); + for (auto& output : params.at(in->m_body_parameter_index)->outputs()) + { + output.replace(connect_to); + } + } + + // before: TI [...-> Layer1 -> Result -> output] -> Layer2 -> ... + // after: ...-> Layer1 -> Layer2 -> ... + NodeVector new_ops; + for (const auto& out : sub_graph_op->get_output_descriptions()) + { + const auto& connect_to = results.at(out->m_body_value_index)->get_input_source_output(0); + for (auto& input_to : sub_graph_op->output(out->m_output_index).get_target_inputs()) + { + // create IE output name + std::string out_name = sub_graph_op->get_friendly_name(); + if (sub_graph_op->get_output_size() != 1) + out_name += "." + std::to_string(out->m_output_index); + + // IECompatibility: insert identity (Unsqueeze + Squeeze) to store the TensorIterator + // output names + auto axis_1 = Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); + auto identity_1 = std::make_shared(connect_to, axis_1); + auto identity_2 = std::make_shared(identity_1, axis_1); + identity_2->set_friendly_name(out_name); + new_ops.push_back(identity_1); + new_ops.push_back(identity_2); + + input_to.replace_source_output(identity_2); + } + } + outer_f->add_sinks(sub_graph_op->get_function()->get_sinks()); + ngraph::copy_runtime_info(sub_graph_op, sub_graph_op->get_function()->get_ops()); + ngraph::copy_runtime_info(sub_graph_op, new_ops); +} + +Output create_init_subgraph(const shared_ptr& sub_graph_op, + const Output& in_node) +{ + using namespace opset7; + + auto const_zero = make_shared(in_node.get_element_type(), Shape{1}, 0); + auto shape_of = make_shared(in_node); + auto broadcast = make_shared(const_zero, shape_of); + copy_runtime_info(sub_graph_op, {const_zero, shape_of, broadcast}); + return broadcast->output(0); +} + +bool pass::LowLatency2::run_on_function(shared_ptr f) +{ + using namespace opset7; + + SinkVector assigns; + for (const auto& op : f->get_ordered_ops()) + { + if (const auto& sub_graph_op = dynamic_pointer_cast(op)) + { + int64_t variable_id = 0; + const auto& func = sub_graph_op->get_function(); + const auto& params = func->get_parameters(); + for (const auto& in : sub_graph_op->get_input_descriptions()) + { + // Process all back edges + if (const auto& merged_in = + dynamic_pointer_cast(in)) + { + // create new Variable + const string& param_name = + params.at(merged_in->m_body_parameter_index)->get_friendly_name(); + const string& var_name = generate_variable_name( + sub_graph_op->get_friendly_name(), param_name, variable_id); + + const auto& input = sub_graph_op->input(merged_in->m_input_index); + if (std::dynamic_pointer_cast( + input.get_source_output().get_node_shared_ptr()) != nullptr) + { + NGRAPH_DEBUG + << "LowLatency2 transformation cannot be applied because the " + << "ReadValue node is already an input to the TensorIterator." + << "LowLatency2 transformation may have already been applied, please " + << "do not call it more then once."; + return false; + } + + const auto& param = sub_graph_op->get_function()->get_parameters().at( + merged_in->m_body_parameter_index); + for (const auto& in_to : param->output(0).get_target_inputs()) + { + if (dynamic_cast(in_to.get_node()) != nullptr) + { + NGRAPH_DEBUG + << "LowLatency2 transformation cannot be applied because the " + << "ReadValue node is already inside the TensorIterator. " + << "LowLatency transformation may have been applied, please do " + << "not call LowLatency2 after LowLatency."; + return false; + } + } + + VariableInfo var_info{PartialShape::dynamic(), element::dynamic, var_name}; + auto variable = make_shared(var_info); + + // insert ReadValue + // Layers -> [new op: ReadValue] -> Subgraph operation + Output read_value_in = input.get_source_output(); + if (m_use_const_initializer) + { + read_value_in = create_init_subgraph(sub_graph_op, read_value_in); + } + auto read_value = make_shared(read_value_in, variable); + input.replace_source_output(read_value->output(0)); + read_value->set_friendly_name(var_name); + ngraph::copy_runtime_info(sub_graph_op, read_value); + + /* insert Assign + // Subgraph operation -> [new op: Assign] + // \ + // ---> Layers -> ... + */ + const auto& out_desc = sub_graph_op->get_output_descriptions(); + bool is_output_exist = std::any_of( + out_desc.begin(), + out_desc.end(), + [&merged_in]( + const std::shared_ptr& out) { + return out->m_body_value_index == merged_in->m_body_value_index; + }); + // Create new output if it doesn't exist. + if (!is_output_exist) + { + sub_graph_op->get_iter_value( + func->get_results().at(merged_in->m_body_value_index)); + } + for (const auto& out : sub_graph_op->get_output_descriptions()) + { + if (out->m_body_value_index == merged_in->m_body_value_index) + { + auto assign = make_shared( + sub_graph_op->output(out->m_output_index), variable); + ngraph::copy_runtime_info(sub_graph_op, assign); + // control dependency so that ReadValue is processed before Assign + assign->add_control_dependency(read_value); + assigns.emplace_back(assign); + break; + } + } + } + + variable_id++; + } + + if (sub_graph_op->get_num_iterations() == 1) + { + UnrollSingleIteration(sub_graph_op, f); + } + } + } + f->add_sinks(assigns); + return true; +}