diff --git a/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp b/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp index c2db73ea2ce..c418f629e22 100644 --- a/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp +++ b/inference-engine/src/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -19,13 +19,13 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::UnrollTensorIterator, "UnrollTensorIterator bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptr f) { RUN_ON_FUNCTION_SCOPE(UnrollTensorIterator); for (const auto& op : f->get_ops()) { - auto ti = std::dynamic_pointer_cast(op); - if (!ti || transformation_callback(ti)) { + auto sub_graph_op = std::dynamic_pointer_cast(op); + if (!sub_graph_op || transformation_callback(sub_graph_op)) { continue; } - const auto& function = ti->get_body(); - const auto num_iter = ti->get_num_iterations(); + const auto &function = sub_graph_op->get_function(); + int64_t num_iter = sub_graph_op->get_num_iterations(); // negative value means inconsistent TI if (num_iter <= -1) { @@ -38,24 +38,26 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptrget_ops()) { - node->set_friendly_name(ti->get_friendly_name() + "/" + std::to_string(idx + 1) + "/" + node->get_friendly_name()); - copy_runtime_info(ti, node); + node->set_friendly_name(sub_graph_op->get_friendly_name() + "/" + std::to_string(idx + 1) + "/" + + node->get_friendly_name()); + copy_runtime_info(sub_graph_op, node); } } // Port map : inputs and back edges - for (const auto& desc : ti->get_input_descriptions()) { - if (const auto& input_desc = std::dynamic_pointer_cast(desc)) { + for (const auto &desc : sub_graph_op->get_input_descriptions()) { + if (const auto &input_desc = std::dynamic_pointer_cast( + desc)) { // Connect the sliced input (layer before the input) to the Split layer and connect // the corresponding Split output to the corresponding copy of the body. // If the number of iterations is 1, then the Split is not needed. - auto in_data = ti->input_values()[input_desc->m_input_index]; - const auto const_axis = opset4::Constant::create(element::i64, Shape{}, {input_desc->m_axis}); + auto in_data = sub_graph_op->input_values()[input_desc->m_input_index]; + const auto const_axis = opset6::Constant::create(element::i64, Shape{}, {input_desc->m_axis}); if (num_iter > 1) { - auto split = std::make_shared(in_data, const_axis, num_iter); - copy_runtime_info(ti, split); + auto split = std::make_shared(in_data, const_axis, num_iter); + copy_runtime_info(sub_graph_op, split); auto stride = input_desc->m_stride; // connect to the body for (int64_t j = 0; j < num_iter; j++) { @@ -72,9 +74,10 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptr(desc)) { + } else if (const auto &merged_desc = std::dynamic_pointer_cast( + desc)) { // Connect the input to the corresponding copy of the body. - auto in_data = ti->input_values()[merged_desc->m_input_index]; + auto in_data = sub_graph_op->input_values()[merged_desc->m_input_index]; const auto& param = body_functions[0]->get_parameters()[merged_desc->m_body_parameter_index]; for (auto &output : param->outputs()) { output.replace(in_data); @@ -88,9 +91,10 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptrget_input_source_output(0)); } } - } else if (const auto& invariant_desc = std::dynamic_pointer_cast(desc)) { + } else if (const auto &invariant_desc = std::dynamic_pointer_cast( + desc)) { // Connect the input to the corresponding copy of the body. - auto in_data = ti->input_values()[invariant_desc->m_input_index]; + auto in_data = sub_graph_op->input_values()[invariant_desc->m_input_index]; for (int64_t j = 0; j < num_iter; j++) { auto param = body_functions[j]->get_parameters()[invariant_desc->m_body_parameter_index]; for (auto &output : param->outputs()) { @@ -104,8 +108,9 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptrget_output_descriptions()) { - if (const auto& concat_desc = std::dynamic_pointer_cast(desc)) { + for (const auto &desc : sub_graph_op->get_output_descriptions()) { + if (const auto &concat_desc = std::dynamic_pointer_cast( + desc)) { if (!concat_desc) { return false; } @@ -121,46 +126,50 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptr 0 ? j : num_iter - j - 1; - std::shared_ptr result = body_functions[idx]->get_results()[concat_desc->m_body_value_index]; + std::shared_ptr result = body_functions[idx]->get_results()[concat_desc->m_body_value_index]; auto input_to_res = result->get_input_source_output(0); to_concat[j] = input_to_res; } - auto concat = std::make_shared(to_concat, concat_desc->m_axis); - copy_runtime_info(ti, concat); + auto concat = std::make_shared(to_concat, concat_desc->m_axis); + copy_runtime_info(sub_graph_op, concat); // set output name to Tensor to store it for ngraph to cnn conversion NGRAPH_SUPPRESS_DEPRECATED_START concat->output(0).get_tensor().set_name( - op::util::create_ie_output_name(ti->output(concat_desc->m_output_index))); + op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END // connect the Concat layer to the corresponding TI outputs - for (auto &input : ti->output(concat_desc->m_output_index).get_target_inputs()) { + for (auto &input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(concat); } } else { // Connect outputs of the bodies to the corresponding TI outputs - std::shared_ptr result = body_functions[0]->get_results().at(concat_desc->m_body_value_index); + std::shared_ptr result = body_functions[0]->get_results().at( + concat_desc->m_body_value_index); const auto& input_to_res = result->get_input_source_output(0); // set output name to Tensor to store it for ngraph to cnn conversion NGRAPH_SUPPRESS_DEPRECATED_START - input_to_res.get_tensor().set_name(op::util::create_ie_output_name(ti->output(concat_desc->m_output_index))); + input_to_res.get_tensor().set_name( + op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END - for (auto &input : ti->output(concat_desc->m_output_index).get_target_inputs()) { + for (auto &input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(input_to_res); } } - } else if (const auto& output_desc = std::dynamic_pointer_cast(desc)) { + } else if (const auto &output_desc = std::dynamic_pointer_cast( + desc)) { // Connect outputs of the bodies to the corresponding TI outputs auto iter = output_desc->m_iteration; iter = iter >= 0? iter: num_iter - 1; - std::shared_ptr result = body_functions[iter]->get_results()[output_desc->m_body_value_index]; + std::shared_ptr result = body_functions[iter]->get_results()[output_desc->m_body_value_index]; const auto& in_value = result->input_value(0); // set output name to Tensor to store it for ngraph to cnn conversion NGRAPH_SUPPRESS_DEPRECATED_START - in_value.get_tensor().set_name(op::util::create_ie_output_name(ti->output(output_desc->m_output_index))); + in_value.get_tensor().set_name( + op::util::create_ie_output_name(sub_graph_op->output(output_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END - for (const auto &input : ti->output(output_desc->m_output_index).get_target_inputs()) { + for (const auto &input : sub_graph_op->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } } else { @@ -172,6 +181,33 @@ bool ngraph::pass::UnrollTensorIterator::run_on_function(std::shared_ptradd_sinks(body_func->get_sinks()); } + + // the current iteration Parameter in Loop body can be disconnected + // we are replacing it with a Constant (value = current iteration idx) + const auto &loop = std::dynamic_pointer_cast(sub_graph_op); + if (loop) { + // 1. Check CurrentIteration Parameter is not connected to outer network + bool need_to_remove_iteration_param = false; + const auto cur_iter_idx = loop->get_special_body_ports().current_iteration_input_idx; + if (cur_iter_idx >= 0) { + const auto &in_descs = loop->get_input_descriptions(); + need_to_remove_iteration_param = std::all_of(in_descs.begin(), in_descs.end(), + [cur_iter_idx](const std::shared_ptr &in_desc) { + return in_desc->m_body_parameter_index != static_cast(cur_iter_idx); + }); + } + + // 2. Replace CurrentIteration Parameter with a Constant for each copy of the body + if (need_to_remove_iteration_param) { + for (int64_t idx = 0; idx < num_iter; ++idx) { + const auto iter_idx = loop->get_special_body_ports().current_iteration_input_idx; + const auto ¶m_to_delete = body_functions[idx]->get_parameters()[iter_idx]; + auto cur_iter_const = std::make_shared(ngraph::element::i64, Shape{}, idx); + replace_node(param_to_delete, cur_iter_const); + body_functions[idx]->remove_parameter(param_to_delete); + } + } + } } return true; } diff --git a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp index 17b5916c924..7d85e9ac3b7 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/low_latency_test.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -357,3 +357,99 @@ TEST(TransformationTests, LowLatencyLSTMReshape) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; } + +TEST(TransformationTests, LowLatencyLSTM_Loop) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_init = std::make_shared(element::f32, Shape{1, 128}); + auto C_init = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512, 16}, w_val); + auto R = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512, 128}, r_val); + auto B = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, H_t, C_t, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell->output(0)); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_3 = std::make_shared(lstm_cell->output(1)); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, false); + auto body = std::make_shared(OutputVector{res_1, res_2, res_3, body_condition}, + ParameterVector{Xi, H_t, C_t}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 10); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 3}); + loop->set_function(body); + loop->set_friendly_name("LSTMLoop"); + + loop->set_merged_input(C_t, C_init, res_3); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(H_t, H_init, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(ngraph::NodeVector{res_ti_1, res_ti_2}, + ngraph::ParameterVector{X, H_init, C_init}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + } + { + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto H_t = std::make_shared(element::f32, Shape{1, 128}); + auto C_t = std::make_shared(element::f32, Shape{1, 128}); + + const std::string variable_name_H("LSTMTensorIterator/variable0"); + const std::string variable_name_C("LSTMTensorIterator/variable1"); + auto variable_H = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H}); + auto variable_C = std::make_shared(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C}); + auto read_value_H = std::make_shared(H_t, variable_H); + auto read_value_C = std::make_shared(C_t, variable_C); + // Body + auto axis = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512 * 16, 0); + auto r_val = std::vector(512 * 128, 0); + auto b_val = std::vector(512, 0); + auto W = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512, 16}, w_val); + auto R = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512, 128}, r_val); + auto B = ngraph::opset6::Constant::create(ngraph::element::f32, ngraph::Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, read_value_H, read_value_C, W, R, B, 128); + auto assign_H = std::make_shared(lstm_cell->output(0), variable_H); + auto assign_C = std::make_shared(lstm_cell->output(1), variable_C); + auto unsqueeze = std::make_shared(lstm_cell->output(0), axis); + auto res_2 = std::make_shared(unsqueeze); + auto res_1 = std::make_shared(lstm_cell->output(0)); + f_ref = std::make_shared(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t}); + f_ref->add_sinks({assign_C, assign_H}); + assign_H->add_control_dependency(read_value_H); + assign_C->add_control_dependency(read_value_C); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/unroll_loop_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/unroll_loop_test.cpp new file mode 100644 index 00000000000..9c6ce38dece --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/unroll_loop_test.cpp @@ -0,0 +1,546 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; +using namespace opset6; + +TEST(TransformationTests, UnrollLoopGRUCell) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(384*16, 0); + auto r_val = std::vector(384*128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto gru_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(gru_cell); + auto unsqueeze = std::make_shared(gru_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 2); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto axis_split = Constant::create(element::i64, Shape{}, {0}); + auto split = std::make_shared(X, axis_split, 2); + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(split->output(0), axis); + auto squeeze_2 = std::make_shared(split->output(1), axis); + + auto w_val = std::vector(384*16, 0); + auto r_val = std::vector(384*128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto gru_cell_1 = std::make_shared(squeeze_1, Y, W, R, B, 128); + auto gru_cell_2 = std::make_shared(squeeze_2, gru_cell_1, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(gru_cell_1, axis); + auto unsqueeze_2 = std::make_shared(gru_cell_2, axis); + auto concat = std::make_shared(OutputVector{unsqueeze_1, unsqueeze_2}, 0); + + auto res_ti_1 = std::make_shared(concat); + //auto res_ti_2 = std::make_shared(unsqueeze_2); + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, UnrollLoopRNNCell) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(128*16, 0); + auto r_val = std::vector(128*128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(rnn_cell); + auto unsqueeze = std::make_shared(rnn_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 2); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto axis_split = Constant::create(element::i64, Shape{}, {0}); + auto split = std::make_shared(X, axis_split, 2); + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(split->output(0), axis); + auto squeeze_2 = std::make_shared(split->output(1), axis); + + auto w_val = std::vector(128*16, 0); + auto r_val = std::vector(128*128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell_1 = std::make_shared(squeeze_1, Y, W, R, B, 128); + auto rnn_cell_2 = std::make_shared(squeeze_2, rnn_cell_1, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(rnn_cell_1, axis); + auto unsqueeze_2 = std::make_shared(rnn_cell_2, axis); + auto concat = std::make_shared(OutputVector{unsqueeze_1, unsqueeze_2}, 0); + + auto res_ti_1 = std::make_shared(concat); + //auto res_ti_2 = std::make_shared(unsqueeze_2); + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, UnrollLoopLSTMCell) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + auto Zi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512*16, 0); + auto r_val = std::vector(512*128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, Yi, Zi, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell); + auto unsqueeze = std::make_shared(lstm_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi, Zi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 2); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_invariant_input(Zi, Z); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y, Z}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{2, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 128}); + + auto axis_split = Constant::create(element::i64, Shape{}, {0}); + auto split = std::make_shared(X, axis_split, 2); + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(split->output(0), axis); + auto squeeze_2 = std::make_shared(split->output(1), axis); + + auto w_val = std::vector(512*16, 0); + auto r_val = std::vector(512*128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell_1 = std::make_shared(squeeze_1, Y, Z, W, R, B, 128); + auto lstm_cell_2 = std::make_shared(squeeze_2, lstm_cell_1, Z, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(lstm_cell_1, axis); + auto unsqueeze_2 = std::make_shared(lstm_cell_2, axis); + auto concat = std::make_shared(OutputVector{unsqueeze_1, unsqueeze_2}, 0); + + auto res_ti_1 = std::make_shared(concat); + //auto res_ti_2 = std::make_shared(unsqueeze_2); + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y, Z}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, UnrollLoopGRUCellSingleIteration) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(384*16, 0); + auto r_val = std::vector(384*128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto gru_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(gru_cell); + auto unsqueeze = std::make_shared(gru_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 1); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(X, axis); + + auto w_val = std::vector(384*16, 0); + auto r_val = std::vector(384*128, 0); + auto b_val = std::vector(384, 0); + auto W = Constant::create(element::f32, Shape{384, 16}, w_val); + auto R = Constant::create(element::f32, Shape{384, 128}, r_val); + auto B = Constant::create(element::f32, Shape{384}, b_val); + + auto gru_cell_1 = std::make_shared(squeeze_1, Y, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(gru_cell_1, axis); + + auto res_ti_1 = std::make_shared(unsqueeze_1); + //auto res_ti_2 = std::make_shared(unsqueeze_2); + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, UnrollLoopRNNCellSingleIteration) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(128*16, 0); + auto r_val = std::vector(128*128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell = std::make_shared(squeeze, Yi, W, R, B, 128); + auto res_1 = std::make_shared(rnn_cell); + auto unsqueeze = std::make_shared(rnn_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 1); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(X, axis); + + auto w_val = std::vector(128*16, 0); + auto r_val = std::vector(128*128, 0); + auto b_val = std::vector(128, 0); + auto W = Constant::create(element::f32, Shape{128, 16}, w_val); + auto R = Constant::create(element::f32, Shape{128, 128}, r_val); + auto B = Constant::create(element::f32, Shape{128}, b_val); + + auto rnn_cell_1 = std::make_shared(squeeze_1, Y, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(rnn_cell_1, axis); + auto res_ti_1 = std::make_shared(unsqueeze_1); + + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, UnrollLoopLSTMCellSingleIteration) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 128}); + + auto Xi = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Yi = std::make_shared(element::f32, Shape{1, 128}); + auto Zi = std::make_shared(element::f32, Shape{1, 128}); + + // Body + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze = std::make_shared(Xi, axis); + + auto w_val = std::vector(512*16, 0); + auto r_val = std::vector(512*128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell = std::make_shared(squeeze, Yi, Zi, W, R, B, 128); + auto res_1 = std::make_shared(lstm_cell); + auto unsqueeze = std::make_shared(lstm_cell, axis); + auto res_2 = std::make_shared(unsqueeze); + auto body_condition = std::make_shared( + ngraph::element::boolean, ngraph::Shape{1}, true); + auto body = std::make_shared(OutputVector{res_1, res_2, body_condition}, + ParameterVector{Xi, Yi, Zi}); + + auto trip_count = + std::make_shared(ngraph::element::i64, ngraph::Shape{}, 1); + auto exec_condition = + std::make_shared(ngraph::element::boolean, ngraph::Shape{}, true); + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_special_body_ports({-1, 2}); + loop->set_function(body); + + loop->set_invariant_input(Zi, Z); + loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0); + loop->set_merged_input(Yi, Y, res_1); + + auto out0 = loop->get_iter_value(res_1, -1); + auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); + + auto res_ti_1 = std::make_shared(loop->output(1)); + //auto res_ti_2 = std::make_shared(loop->output(0)); + f = std::make_shared(NodeVector{res_ti_1}, + ParameterVector{X, Y, Z}); + + pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto X = std::make_shared(element::f32, Shape{1, 1, 16}); + auto Y = std::make_shared(element::f32, Shape{1, 128}); + auto Z = std::make_shared(element::f32, Shape{1, 128}); + + auto axis = Constant::create(element::i64, Shape{}, {0}); + auto squeeze_1 = std::make_shared(X, axis); + + auto w_val = std::vector(512*16, 0); + auto r_val = std::vector(512*128, 0); + auto b_val = std::vector(512, 0); + auto W = Constant::create(element::f32, Shape{512, 16}, w_val); + auto R = Constant::create(element::f32, Shape{512, 128}, r_val); + auto B = Constant::create(element::f32, Shape{512}, b_val); + + auto lstm_cell_1 = std::make_shared(squeeze_1, Y, Z, W, R, B, 128); + + auto unsqueeze_1 = std::make_shared(lstm_cell_1, axis); + auto res_ti_1 = std::make_shared(unsqueeze_1); + //auto res_ti_2 = std::make_shared(unsqueeze_2); + f_ref = std::make_shared(NodeVector{res_ti_1}, ParameterVector{X, Y, Z}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp index c4ae2bbaeff..2b861bd1016 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -50,6 +50,7 @@ namespace { INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop, StaticShapeLoopTest, Combine( + ValuesIn(std::vector{true, false}), Values(true), ValuesIn(static_loop_types), Values(7), diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp index c5b7d9f6cbf..d2df5a447c5 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/loop.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -39,6 +39,7 @@ protected: using StaticShapeLoopParams = typename std::tuple< + bool, bool, std::tuple< bool, @@ -64,6 +65,7 @@ public: std::vector> PredefinedRefs(); private: + bool unrolling; // unroll Loop bool static_iter_num; // trip count provided by constant node bool static_continue_cond; // initial_cond provided by constant node int64_t max_iter_num; // -1 means infinity loop (expected dynamic exit condition in body) diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp index 1265757c0bf..34587a012cf 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/loop.cpp @@ -1,8 +1,9 @@ -// Copyright (C) 2019 Intel Corporation +// Copyright (C) 2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "shared_test_classes/single_layer/loop.hpp" +#include namespace LayerTestsDefinitions { @@ -143,6 +144,7 @@ namespace LayerTestsDefinitions { SKIP_IF_CURRENT_TEST_IS_DISABLED() auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis); std::tie( + unrolling, static_continue_cond, args_papck, start_value, @@ -212,6 +214,11 @@ namespace LayerTestsDefinitions { function = std::make_shared( ngraph::OutputVector {loop}, params); + if (unrolling) { + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } } InferenceEngine::Blob::Ptr StaticShapeLoopTest::GenerateInput(const InferenceEngine::InputInfo &info) const { diff --git a/ngraph/core/include/ngraph/op/loop.hpp b/ngraph/core/include/ngraph/op/loop.hpp index 1a4fc794ede..36ecbf15e98 100644 --- a/ngraph/core/include/ngraph/op/loop.hpp +++ b/ngraph/core/include/ngraph/op/loop.hpp @@ -64,7 +64,6 @@ namespace ngraph /// iteration or not. Loop(const Output& trip_count, const Output& execution_condition); - int64_t get_num_iterations() const { return m_num_iterations; } Output get_concatenated_slices(const Output& value, int64_t start, int64_t stride, @@ -93,7 +92,6 @@ namespace ngraph void clone_to(Loop& dst, const OutputVector& new_args) const; SpecialBodyPorts m_special_body_ports; - int64_t m_num_iterations = -1; // -1 means infinity }; } } diff --git a/ngraph/core/include/ngraph/op/tensor_iterator.hpp b/ngraph/core/include/ngraph/op/tensor_iterator.hpp index 7004fd4669e..ce4b31f5234 100644 --- a/ngraph/core/include/ngraph/op/tensor_iterator.hpp +++ b/ngraph/core/include/ngraph/op/tensor_iterator.hpp @@ -50,11 +50,8 @@ namespace ngraph /// \return the body of the iteration std::shared_ptr get_function() override; - int64_t get_num_iterations() const { return m_num_iterations; } private: void try_to_set_num_iterations_if_no_slice_inputs(); - - int64_t m_num_iterations = -1; }; } using v0::TensorIterator; diff --git a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp index 50aaa2f86ef..0ae39c58096 100644 --- a/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp +++ b/ngraph/core/include/ngraph/op/util/sub_graph_base.hpp @@ -327,7 +327,11 @@ namespace ngraph SubGraphOp& operator=(const SubGraphOp&) = delete; SubGraphOp& operator=(SubGraphOp&&) = default; + int64_t get_num_iterations() const { return m_num_iterations; } protected: + int64_t m_num_iterations = + -1; // -1 means infinity for Loop op, inconsistent for TensorIterator + // Find an input corresponding to value, adding one if necessary. Input input_for_value(const Output& value); diff --git a/ngraph/core/src/pass/low_latency.cpp b/ngraph/core/src/pass/low_latency.cpp index cad0a051836..ed350abd218 100644 --- a/ngraph/core/src/pass/low_latency.cpp +++ b/ngraph/core/src/pass/low_latency.cpp @@ -23,6 +23,23 @@ ngraph::pass::LowLatency::LowLatency() return false; } + if (const auto& loop = std::dynamic_pointer_cast(sub_graph_op)) + { + const auto& trip_count = + std::dynamic_pointer_cast(loop->get_input_node_shared_ptr(0)); + const auto& num_iter = loop->get_num_iterations(); + if (trip_count && num_iter > 0 && trip_count->get_output_target_inputs(0).size() == 1) + { + auto single_iter = + std::make_shared(ngraph::element::i64, Shape{}, 1); + replace_node(trip_count, single_iter); + } + else + { + // count of iterations is dynamic; + return false; + } + } // Mark the TI layer to be unrolled. Enable unconditional ti unrolling for all plugins. auto& rt_info = sub_graph_op->get_rt_info(); rt_info["UNROLL_TI"] = std::make_shared>(1);