LowLatency v2 ngraph transformation (#5160)

* LowLatency 2.0: transformation and unit tests

* low latency 2.0: unit tests

* documentation and ngraph codestyle

* update CNN Interface of LowLatency transformation

* fix build on Windows

* fix build on Windows

* investigation of a failed build on Win OS

* ngraph codestyle

* fix build (werrors)

* New unit tests, refactoring

* update functional tests for Memory

* update LowLatency functional tests

* extend Memory tests to cover LowLatency v2 transformation

* clean up, code style

* fix unit tests

* update and fix unit tests, add feature to apply LLTv2 after LLTv1

* update docs, refactoring

* add several gna tests to skip config

* fix python api tests

* update python api, rename LowLatency_v2 to LowLatency2

* deprecate LowLatency v1

* Deprecate LowLatency v1 in IE

* fix wrong merge, codestyle

* resolve review comments

* fix python test

* update skip config

* apply online review notes, fix unit tests

* clean up, code style

* fix docs

* Use debug_messages instead of exceptions in llt v2

* fix unit tests

* Resolve review remarks
This commit is contained in:
Ivan Tikhonov 2021-06-07 15:13:41 +03:00 committed by GitHub
parent f9b27c3714
commit c1608628d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 1811 additions and 432 deletions

View File

@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
C.ApplyPOTTransformations(network.impl, device)
def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
C.ApplyLowLatencyTransformation(network.impl, num_iterations)
def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True):
C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)
def ApplyPruningTransformation(IENetwork network):

View File

@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
manager.run_passes(network.actual->getFunction());
}
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
ngraph::pass::Manager manager;
// TODO: pass num_iterations to LowLatency
manager.register_pass<ngraph::pass::LowLatency>();
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
auto pass_config = manager.get_pass_config();
pass_config->set_callback<ngraph::pass::UnrollTensorIterator>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
return node->get_rt_info().count("UNROLL_TI") == 0;
});
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(network.actual->getFunction());
}

View File

@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true);
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);

View File

@ -3,7 +3,6 @@
from libcpp cimport bool
from libcpp.string cimport string
from libc.stdint cimport int64_t
from ..inference_engine.ie_api_impl_defs cimport IENetwork
@ -12,7 +11,7 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
cdef void ApplyPOTTransformations(IENetwork network, string device)
cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer)
cdef void ApplyPruningTransformation(IENetwork network)

View File

@ -52,5 +52,41 @@ namespace InferenceEngine {
* @param network A network to apply LowLatency transformation
* *
*/
INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
"Use InferenceEngine::lowLatency2 instead.")
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
/**
* @brief The transformation finds all TensorIterator/Loop layers in the network,
* processes all back edges that describe a connection between Result and Parameter
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
* input and output corresponding to this back edge.
* Supported platforms: CPU, GNA.
*
* The example below describes the changes made by the transformation
* [] - TensorIterator body
* () - new layer
* BE - back-edge
*
* before applying the transformation:
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
*
* after applying the transformation:
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
* \
* ->...
* After applying the transformation, the resulting network can be inferred
* step by step, the states will store between inferences.
* @param network A network to apply LowLatency transformation
* @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations.
If "true", then the transformation inserts Constant before ReadValue operation.
If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
* Loop operation by a given number. Does not affect TensorIterators.
* *
*/
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
bool use_const_initializer = true);
} // namespace InferenceEngine

View File

@ -11,6 +11,16 @@ using namespace InferenceEngine;
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
auto function = network.getFunction();
ngraph::pass::Manager manager;
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.run_passes(function);
}
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
bool use_const_initializer) {
auto function = network.getFunction();
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(function);
}

View File

@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}
@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}
@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(f);
}

View File

@ -0,0 +1,829 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <queue>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/common_optimizations/low_latency.hpp>
#include <transformations/serialize.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
using namespace opset7;
using namespace std;
Output<Node> create_init_subgraph(const Output<Node>& in_node) {
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
auto shape_of = make_shared<ShapeOf>(in_node);
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
return broadcast->output(0);
}
Output<Node> insert_identity(const Output<Node>& in_node) {
auto axis_1 = Constant::create(element::i64, Shape{1}, {1});
auto identity_1 = std::make_shared<Unsqueeze>(in_node, axis_1);
return std::make_shared<Squeeze>(identity_1, axis_1);
}
std::shared_ptr<Function> createLSTMBody(const std::shared_ptr<Parameter>& Xi,
const std::shared_ptr<Parameter>& H_t,
const std::shared_ptr<Parameter>& C_t,
bool is_loop = false) {
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto func = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
ParameterVector{Xi, H_t, C_t});
if (is_loop) {
auto body_condition = std::make_shared<Constant>(
element::boolean, Shape{1}, true);
auto cond_res = std::make_shared<Result>(body_condition);
func->add_results({cond_res});
}
return func;
}
TEST(TransformationTests, LowLatency2_LSTM) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_friendly_name("LSTMTensorIterator");
tensor_iterator->set_merged_input(C_t, C_init, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H_init, results[0]);
tensor_iterator->get_iter_value(results[0], -1);
tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_GRU) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(384 * 16, 0);
auto r_val = std::vector<float>(384 * 128, 0);
auto b_val = std::vector<float>(384, 0);
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
auto B = Constant::create(element::f32, Shape{384}, b_val);
auto gru_cell = std::make_shared<GRUCell>(squeeze, Yi, W, R, B, 128);
auto res_1 = std::make_shared<Result>(gru_cell);
auto unsqueeze = std::make_shared<Unsqueeze>(gru_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(Yi, Y, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("GRUTensorIterator/variable0");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(384 * 16, 0);
auto r_val = std::vector<float>(384 * 128, 0);
auto b_val = std::vector<float>(384, 0);
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
auto B = Constant::create(element::f32, Shape{384}, b_val);
auto rnn_cell = std::make_shared<GRUCell>(squeeze, read_value_H, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
auto res_1 = std::make_shared<Result>(assign_H);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
f_ref = std::make_shared<Function>(ResultVector {res_2}, ParameterVector{Xi, H_t});
f_ref->add_sinks({assign_H});
assign_H->add_control_dependency(read_value_H);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_RNN) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(128 * 16, 0);
auto r_val = std::vector<float>(128 * 128, 0);
auto b_val = std::vector<float>(128, 0);
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
auto B = Constant::create(element::f32, Shape{128}, b_val);
auto rnn_cell = std::make_shared<RNNCell>(squeeze, Yi, W, R, B, 128);
auto res_1 = std::make_shared<Result>(rnn_cell);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi,
Yi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(Yi, Y, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("RNNTensorIterator/variable0");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(128 * 16, 0);
auto r_val = std::vector<float>(128 * 128, 0);
auto b_val = std::vector<float>(128, 0);
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
auto B = Constant::create(element::f32, Shape{128}, b_val);
auto rnn_cell = std::make_shared<RNNCell>(squeeze, read_value_H, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
auto res_1 = std::make_shared<Result>(assign_H);
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
f_ref = std::make_shared<Function>(ResultVector{res_2}, ParameterVector{Xi, H_t});
f_ref->add_sinks({assign_H});
assign_H->add_control_dependency(read_value_H);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTMReshape) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, C, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H, results[0]);
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
C});
// Reshape
// change the number of iteration of TI. 2 -> 1
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
f->replace_parameter(0, new_X);
f->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 1);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_several_iterations) {
constexpr int ITER_CNT = 5;
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t);
auto results = body->get_results();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, C, results[2]);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H, results[0]);
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
C});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
// TensorIterator not unrolled.
{
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
// Body
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell, axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
ParameterVector{Xi, H_t, C_t});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(C_t, read_value_C, res_3);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, read_value_H, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto out2 = tensor_iterator->get_iter_value(res_3, -1);
auto assign_H = std::make_shared<Assign>(out0, variable_H);
auto assign_C = std::make_shared<Assign>(out2, variable_C);
auto outer_res_2 = std::make_shared<Result>(out1);
auto outer_res_1 = std::make_shared<Result>(out0);
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto shape_of = std::make_shared<ShapeOf>(X);
const auto trip_count = std::make_shared<Gather>(shape_of, Constant::create(ngraph::element::i64, {1}, {0}),
Constant::create(ngraph::element::i64, {1}, {0}));
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
// Reshape
// change the number of iteration of Loop. 10 -> 1
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
f->replace_parameter(0, new_X);
f->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto body = createLSTMBody(Xi, H_t, C_t, true);
auto results = body->get_results();
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 10);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, C_init, results[2]);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, H_init, results[0]);
auto out0 = loop->get_iter_value(results[0], -1);
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::LowLatency2>(true);
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
const std::string variable_name_H("LSTMTensorIterator/variable0");
const std::string variable_name_C("LSTMTensorIterator/variable1");
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
// Body
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body_condition = std::make_shared<Constant>(
element::boolean, Shape{1}, true);
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3, body_condition},
ParameterVector{Xi, H_t, C_t});
auto trip_count =
std::make_shared<Constant>(element::i64, Shape{}, 10);
auto exec_condition =
std::make_shared<Constant>(element::boolean, Shape{}, true);
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
loop->set_special_body_ports({-1, 3});
loop->set_function(body);
loop->set_friendly_name("LSTMLoop");
loop->set_merged_input(C_t, read_value_C, res_3);
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
loop->set_merged_input(H_t, read_value_H, res_1);
auto out0 = loop->get_iter_value(res_1, -1);
auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto out3 = loop->get_iter_value(res_3, -1);
auto assign_H = std::make_shared<Assign>(out0, variable_H);
auto assign_C = std::make_shared<Assign>(out3, variable_C);
auto outer_res_2 = std::make_shared<Result>(out1);
auto outer_res_1 = std::make_shared<Result>(out0);
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
f_ref->add_sinks({assign_C, assign_H});
assign_H->add_control_dependency(read_value_H);
assign_C->add_control_dependency(read_value_C);
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
// Body
auto axis = Constant::create(element::i64, Shape{}, {0});
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
auto w_val = std::vector<float>(512 * 16, 0);
auto r_val = std::vector<float>(512 * 128, 0);
auto b_val = std::vector<float>(512, 0);
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
auto B = Constant::create(element::f32, Shape{512}, b_val);
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
auto res_2 = std::make_shared<Result>(unsqueeze);
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_friendly_name("LSTMTensorIterator");
tensor_iterator->set_merged_input(C_t, C_init, res_3);
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, H_init, res_1);
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
ParameterVector{X, H_init, C_init});
auto f_2 = ngraph::clone_function(*f);
pass::Manager manager_2;
manager_2.register_pass<pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager_2.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
EXPECT_NO_THROW(manager_2.run_passes(f_2));
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END
// LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted
// but unrolls TI/Loop
manager.register_pass<pass::LowLatency2>();
EXPECT_NO_THROW(manager.run_passes(f));
}
}

View File

@ -10,6 +10,13 @@ using namespace LayerTestsDefinitions;
namespace {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT,
};
const std::vector<InferenceEngine::SizeVector> inShapes = {
{3},
{100, 100},
@ -27,6 +34,7 @@ const std::vector<int64_t> iterationCount {
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::ValuesIn(iterationCount),
::testing::ValuesIn(inShapes),
::testing::ValuesIn(inputPrecisions),

View File

@ -0,0 +1,45 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <subgraph_tests/memory_LSTMCell.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
};
std::vector<size_t> input_sizes = {
80,
32,
64,
100,
25
};
std::vector<size_t> hidden_sizes = {
128,
200,
300,
24,
32,
};
std::map<std::string, std::string> additional_config = {
};
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(hidden_sizes),
::testing::Values(additional_config)),
MemoryLSTMCellTest::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -7,6 +7,15 @@
namespace SubgraphTestsDefinitions {
namespace {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
};
std::vector<size_t> input_sizes = {
80,
32,
@ -28,6 +37,7 @@ std::map<std::string, std::string> additional_config = {
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),

View File

@ -10,9 +10,17 @@ using namespace LayerTestsDefinitions;
namespace {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT
};
const std::vector<InferenceEngine::SizeVector> inShapes = {
{1, 1},
{1, 2}
{1, 2},
{1, 10}
};
const std::vector<InferenceEngine::Precision> inputPrecisions = {
@ -22,11 +30,13 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
const std::vector<int64_t> iterationCount {
1,
3,
4,
10
};
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::ValuesIn(iterationCount),
::testing::ValuesIn(inShapes),
::testing::ValuesIn(inputPrecisions),

View File

@ -64,5 +64,13 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*CachingSupport.*_batch2_.*)",
// TODO: Issue 51525
R"(.*CachingSupport.*KSOFunction.*)",
// TODO: Issue 57363 (Param -> Result subgraphs)
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
// TODO: Issue 57368 (accuracy)
R"(.*smoke_MemoryTest.*LOW_LATENCY.*IS=\(1.10\).*)",
R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)",
R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)",
R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)",
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
};
}

View File

@ -6,6 +6,14 @@
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
};
std::vector<size_t> input_sizes = {
80,
32,
@ -30,6 +38,7 @@ namespace SubgraphTestsDefinitions {
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),

View File

@ -7,6 +7,15 @@
namespace SubgraphTestsDefinitions {
namespace {
std::vector<ngraph::helpers::MemoryTransformation> transformation {
ngraph::helpers::MemoryTransformation::NONE,
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
};
std::vector<size_t> input_sizes = {
80,
32,
@ -31,6 +40,7 @@ std::map<std::string, std::string> additional_config = {
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
::testing::Combine(
::testing::ValuesIn(transformation),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_sizes),

View File

@ -39,7 +39,7 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
// Apply LowLatency and UnrollTensorIterator transformations
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
manager.run_passes(function);
LoadNetwork();
IE_SUPPRESS_DEPRECATED_START

View File

@ -12,12 +12,4 @@ TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
Run();
};
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
RunLowLatency();
};
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
RunLowLatency(true);
};
} // namespace SubgraphTestsDefinitions

View File

@ -12,12 +12,4 @@ TEST_P(MultipleLSTMCellTest, CompareWithRefs) {
Run();
};
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
RunLowLatency();
};
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
RunLowLatency(true);
};
} // namespace SubgraphTestsDefinitions

View File

@ -14,6 +14,7 @@
namespace LayerTestsDefinitions {
using MemoryTestParams = std::tuple<
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
int64_t, // iterationCount
InferenceEngine::SizeVector, // inputShape
InferenceEngine::Precision, // netPrecision
@ -28,9 +29,17 @@ protected:
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
void SetUp() override;
private:
void CreateTIFunc();
void CreateCommonFunc();
void ApplyLowLatency();
InferenceEngine::Precision netPrecision;
ngraph::EvaluationContext eval_context;
ngraph::helpers::MemoryTransformation transformation;
int64_t iteration_count;
ngraph::element::Type ngPrc;
InferenceEngine::SizeVector inputShape;
};
} // namespace LayerTestsDefinitions

View File

@ -10,6 +10,7 @@
namespace SubgraphTestsDefinitions {
typedef std::tuple<
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
std::string, // Target device name
InferenceEngine::Precision, // Network precision
size_t, // Input size
@ -21,9 +22,13 @@ class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
public testing::WithParamInterface<memoryLSTMCellParams> {
private:
// you have to Unroll TI manually and remove memory untill ngraph supports it
// since we switching models we need to generate and save weights biases and inputs in SetUp
void switchToNgraphFriendlyModel();
void CreatePureTensorIteratorModel();
// since we switching models we need to generate and save weights biases and inputs in SetUp
void InitMemory();
void ApplyLowLatency();
ngraph::helpers::MemoryTransformation transformation;
std::vector<float> input_bias;
std::vector<float> input_weights;
std::vector<float> hidden_memory_init;
@ -34,7 +39,6 @@ private:
protected:
void SetUp() override;
void Run() override;
void RunLowLatency(bool regular_api = false);
public:
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
};

View File

@ -10,6 +10,7 @@
namespace SubgraphTestsDefinitions {
typedef std::tuple<
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
std::string, // Target device name
InferenceEngine::Precision, // Network precision
size_t, // Input size
@ -21,9 +22,12 @@ class MultipleLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
public testing::WithParamInterface<multipleLSTMCellParams> {
private:
// you have to Unroll TI manually and remove memory untill ngraph supports it
// since we switching models we need to generate and save weights biases and inputs in SetUp
void switchToNgraphFriendlyModel();
void CreatePureTensorIteratorModel();
// since we switching models we need to generate and save weights biases and inputs in SetUp
void InitMemory();
void ApplyLowLatency();
size_t hiddenSize;
std::vector<float> input_bias;
std::vector<float> input_weights;
@ -33,10 +37,10 @@ private:
std::vector<float> weights_2_vals;
std::vector<float> reccurrenceWeights_vals;
std::vector<float> bias_vals;
ngraph::helpers::MemoryTransformation transformation;
protected:
void SetUp() override;
void Run() override;
void RunLowLatency(bool regular_api = false);
public:
static std::string getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj);
};

View File

@ -3,10 +3,18 @@
//
#include <signal.h>
#include <ie_transformations.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/serialize.hpp>
#include <functional_test_utils/core_config.hpp>
#include "ngraph/opsets/opset7.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph/pass/low_latency.hpp"
#include "shared_test_classes/single_layer/memory.hpp"
using namespace ngraph;
using namespace opset7;
namespace LayerTestsDefinitions {
std::string MemoryTest::getTestCaseName(const testing::TestParamInfo<MemoryTestParams> &obj) {
@ -14,9 +22,11 @@ namespace LayerTestsDefinitions {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
ngraph::helpers::MemoryTransformation transformation;
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
std::ostringstream result;
result << "transformation=" << transformation << "_";
result << "iteration_count=" << iteration_count << "_";
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "netPRC=" << netPrecision.name() << "_";
@ -26,20 +36,17 @@ namespace LayerTestsDefinitions {
}
void MemoryTest::SetUp() {
using namespace ngraph;
InferenceEngine::SizeVector inputShape;
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto param = ngraph::builder::makeParams(ngPrc, {inputShape});
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
auto read_value = std::make_shared<opset7::ReadValue>(param.at(0), variable);
auto add = std::make_shared<opset7::Add>(read_value, param.at(0));
auto assign = std::make_shared<opset7::Assign>(add, variable);
auto res = std::make_shared<opset7::Result>(add);
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
CreateCommonFunc();
} else {
CreateTIFunc();
ApplyLowLatency();
}
auto hostTensor = std::make_shared<ngraph::HostTensor>(ngPrc, inputShape);
auto hostTensor = std::make_shared<HostTensor>(ngPrc, inputShape);
auto variable_context = std::make_shared<VariantWrapper<VariableContext>>(VariableContext());
auto variable_value = std::make_shared<VariableValue>(hostTensor);
variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value);
@ -48,6 +55,7 @@ namespace LayerTestsDefinitions {
void MemoryTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
using namespace LayerTestsUtils;
auto crashHandler = [](int errCode) {
auto &s = Summary::getInstance();
@ -68,7 +76,13 @@ namespace LayerTestsDefinitions {
}
try {
LoadNetwork();
if (transformation != ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
LoadNetwork();
} else {
CoreConfiguration(this);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
}
GenerateInputs();
for (int64_t i = 0; i < iteration_count; ++i) {
Infer();
@ -88,12 +102,12 @@ namespace LayerTestsDefinitions {
}
}
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
using namespace ngraph;
function->validate_nodes_and_infer_types();
auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
auto refInputsTypes = std::vector<ngraph::element::Type>(inputs.size());
auto refInputsTypes = std::vector<element::Type>(inputs.size());
HostTensorVector inputTensors;
for (auto & input : inputs) {
const auto &dataSize = input->byteSize();
@ -104,17 +118,25 @@ namespace LayerTestsDefinitions {
const auto lockedMemory = memory->wmap();
const auto buffer = lockedMemory.as<const std::uint8_t *>();
auto hostTensor = std::make_shared<ngraph::HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
auto hostTensor = std::make_shared<HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
tensorDesc.getDims());
hostTensor->write(buffer, dataSize);
inputTensors.push_back(hostTensor);
}
// evaluate method is not implemented for TI op.
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(function);
const auto &outInfo = executableNetwork.GetOutputsInfo();
HostTensorVector outputTensors(outInfo.size(), std::make_shared<ngraph::HostTensor>());
HostTensorVector outputTensors(outInfo.size());
for (auto& outTensor : outputTensors) {
outTensor = std::make_shared<HostTensor>();
}
function->evaluate(outputTensors, inputTensors, eval_context);
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
for (size_t idx = 0; idx < outInfo.size(); ++idx) {
outputs[idx].first = outputTensors[idx]->get_element_type();
outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes());
@ -123,5 +145,61 @@ namespace LayerTestsDefinitions {
return outputs;
}
void MemoryTest::CreateTIFunc() {
auto param = builder::makeParams(ngPrc, {inputShape}).at(0);
std::vector<std::vector<size_t>> shape = {{static_cast<size_t>(iteration_count), 1}};
auto iter_count = builder::makeParams(ngPrc, shape).at(0);
// Body
auto X = builder::makeParams(ngPrc, {inputShape}).at(0);
auto Y = builder::makeParams(ngPrc, {inputShape}).at(0);
auto Iter = builder::makeParams(ngPrc, {Shape{1, 1}}).at(0);
auto add = std::make_shared<Add>(X, Y);
auto res = std::make_shared<Result>(add);
auto Iter_res = std::make_shared<Result>(Iter);
auto body = std::make_shared<Function>(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter});
// TI construction
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_merged_input(X, param, res);
tensor_iterator->set_invariant_input(Y, param);
tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0);
auto output = tensor_iterator->get_iter_value(res, -1);
auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0);
function = std::make_shared<Function>(OutputVector{output, output_iter},
ParameterVector{param, iter_count},
"PureTI");
}
void MemoryTest::CreateCommonFunc() {
auto param = builder::makeParams(ngPrc, {inputShape});
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
auto add = std::make_shared<Add>(read_value, param.at(0));
auto assign = std::make_shared<Assign>(add, variable);
auto res = std::make_shared<Result>(add);
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
}
void MemoryTest::ApplyLowLatency() {
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
function->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::LowLatency2>();
manager.run_passes(function);
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) {
function->validate_nodes_and_infer_types();
pass::Manager manager;
manager.register_pass<pass::LowLatency2>(false);
manager.run_passes(function);
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
InferenceEngine::lowLatency2(cnnNetwork, iteration_count);
}
}
} // namespace LayerTestsDefinitions

View File

@ -9,6 +9,9 @@
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/subgraph/memory_LSTMCell.hpp"
using namespace ngraph;
using namespace opset7;
namespace SubgraphTestsDefinitions {
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
@ -17,9 +20,11 @@ namespace SubgraphTestsDefinitions {
size_t inputSize;
size_t hiddenSize;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
ngraph::helpers::MemoryTransformation transformation;
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
std::ostringstream result;
result << "transformation=" << transformation << "_";
result << "netPrecision=" << netPrecision.name() << "_";
result << "IS=" << inputSize << "_";
result << "HS=" << hiddenSize << "_";
@ -34,7 +39,7 @@ namespace SubgraphTestsDefinitions {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
@ -51,49 +56,53 @@ namespace SubgraphTestsDefinitions {
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f);
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto var_cell =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
auto var_hidden =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
// Body - inputs
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
// Body - layers
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
// body - outputs
auto H_o = lstm->output(0);
auto C_o = lstm->output(1);
auto unsqueeze_o = unsqueeze->output(0);
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
// TI construction
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_invariant_input(X, permute_in);
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
@ -107,27 +116,27 @@ namespace SubgraphTestsDefinitions {
out_hidden.get_tensor().set_element_type(ngPrc);
out_cell.get_tensor().set_element_type(ngPrc);
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
cell_memory_write->add_control_dependency(cell_memory_read);
final_reshape->add_control_dependency(cell_memory_write);
hidden_memory_write->add_control_dependency(hidden_memory_read);
final_reshape->add_control_dependency(hidden_memory_write);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
function = std::make_shared<Function>(OutputVector{final_reshape},
SinkVector{cell_memory_write, hidden_memory_write},
input_parameter,
"TI_with_memory");
}
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
@ -135,46 +144,46 @@ namespace SubgraphTestsDefinitions {
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
// Body - layers
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, final_reshape_pattern, false);
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<Reshape>(unsqueeze, final_reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
}
void MemoryLSTMCellTest::CreatePureTensorIteratorModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
@ -182,49 +191,49 @@ namespace SubgraphTestsDefinitions {
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
// Body - inputs
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
H_t->set_friendly_name("hidden_state_1");
C_t->set_friendly_name("cell_state_1");
// Body - layers
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
// body - outputs
auto H_o = lstm->output(0);
auto C_o = lstm->output(1);
auto unsqueeze_o = unsqueeze->output(0);
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
// TI construction
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
@ -237,56 +246,35 @@ namespace SubgraphTestsDefinitions {
out_hidden.get_tensor().set_element_type(ngPrc);
out_cell.get_tensor().set_element_type(ngPrc);
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
}
void MemoryLSTMCellTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
IE_SUPPRESS_DEPRECATED_START
LoadNetwork();
auto states = executableNetwork.QueryState();
for (auto& state : states) {
auto name = state.GetName();
if (name == "cell_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
cell_memory_init.data(), cell_memory_init.size());
state.SetState(blob);
} else if (name == "hidden_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
hidden_memory_init.data(), hidden_memory_init.size());
state.SetState(blob);
} else {
GTEST_FAIL() << "unknown memory state";
}
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
ApplyLowLatency();
} else {
LoadNetwork();
}
IE_SUPPRESS_DEPRECATED_END
InitMemory();
GenerateInputs();
Infer();
switchToNgraphFriendlyModel();
// Calculate ref values
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
switchToNgraphFriendlyModel();
} else {
CreatePureTensorIteratorModel();
}
Validate();
}
void MemoryLSTMCellTest::RunLowLatency(bool regular_api) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
CreatePureTensorIteratorModel();
if (regular_api) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
InferenceEngine::LowLatency(cnnNetwork);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(static_cast<const InferenceEngine::CNNNetwork>(cnnNetwork), targetDevice, configuration);
} else {
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
manager.run_passes(function);
LoadNetwork();
}
void MemoryLSTMCellTest::InitMemory() {
IE_SUPPRESS_DEPRECATED_START
auto states = executableNetwork.QueryState();
for (auto& state : states) {
@ -304,13 +292,52 @@ namespace SubgraphTestsDefinitions {
}
}
IE_SUPPRESS_DEPRECATED_END
GenerateInputs();
Infer();
}
void MemoryLSTMCellTest::ApplyLowLatency() {
// Calculate values after LowLatency transformation
CreatePureTensorIteratorModel();
ngraph::pass::Manager manager_2;
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
manager_2.run_passes(function);
Validate();
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
function->validate_nodes_and_infer_types();
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
pass::Manager manager;
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
manager.run_passes(function);
bool ti_found = helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, true);
LoadNetwork();
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
function->validate_nodes_and_infer_types();
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
pass::Manager manager;
manager.register_pass<pass::LowLatency2>();
manager.run_passes(function);
bool ti_found = helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, false);
LoadNetwork();
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::LowLatency(cnnNetwork);
IE_SUPPRESS_DEPRECATED_END
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
EXPECT_EQ(ti_found, true);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
InferenceEngine::lowLatency2(cnnNetwork);
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
EXPECT_EQ(ti_found, false);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
}
}
} // namespace SubgraphTestsDefinitions

View File

@ -2,16 +2,19 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/opsets/opset5.hpp"
#include "ie_transformations.hpp"
#include "ngraph/opsets/opset7.hpp"
#include "ngraph/op/util/variable_context.hpp"
#include "ngraph/pass/low_latency.hpp"
#include "ie_transformations.hpp"
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "shared_test_classes/subgraph/multiple_LSTMCell.hpp"
using namespace ngraph;
using namespace opset7;
namespace SubgraphTestsDefinitions {
std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj) {
std::string targetDevice;
@ -19,9 +22,11 @@ std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<m
size_t inputSize;
size_t hiddenSize;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
ngraph::helpers::MemoryTransformation transformation;
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
std::ostringstream result;
result << "transformation=" << transformation << "_";
result << "netPrecision=" << netPrecision.name() << "_";
result << "IS=" << inputSize << "_";
result << "HS=" << hiddenSize << "_";
@ -33,7 +38,7 @@ void MultipleLSTMCellTest::SetUp() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
@ -51,51 +56,55 @@ void MultipleLSTMCellTest::SetUp() {
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f);
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto var_cell =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
auto var_hidden =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
cell_memory_read->set_friendly_name("cell_memory");
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
hidden_memory_read->set_friendly_name("hidden_memory");
// Body - inputs
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
// Body - layers
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
// body - outputs
auto H_o = lstm->output(0);
auto C_o = lstm->output(1);
auto unsqueeze_o = unsqueeze->output(0);
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
// TI construction
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_invariant_input(X, permute_in);
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
@ -108,49 +117,53 @@ void MultipleLSTMCellTest::SetUp() {
out_hidden.get_tensor().set_element_type(ngPrc);
out_cell.get_tensor().set_element_type(ngPrc);
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
// End of TI 1
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
// Second TI
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_2_constant, "cell_memory_2");
auto var_cell_2 =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_2"});
auto var_hidden_2 =
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_2"});
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_2_read = std::make_shared<ReadValue>(cell_memory_2_constant, var_cell_2);
cell_memory_2_read->set_friendly_name("cell_memory_2");
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_2_constant, "hidden_memory_2");
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_2_read = std::make_shared<ReadValue>(hidden_memory_2_constant, var_hidden_2);
hidden_memory_2_read->set_friendly_name("hidden_memory_2");
// Body - inputs
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
// Body - layers
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
// body - outputs
auto H_o_2 = lstm_2->output(0);
auto C_o_2 = lstm_2->output(1);
auto unsqueeze_o_2 = unsqueeze_2->output(0);
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
// TI construction
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
tensor_iterator_2->set_body(body_2);
tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze);
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2);
@ -163,33 +176,28 @@ void MultipleLSTMCellTest::SetUp() {
out_hidden_2.get_tensor().set_element_type(ngPrc);
out_cell_2.get_tensor().set_element_type(ngPrc);
auto cell_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_cell_2, "cell_memory_2");
auto hidden_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_hidden_2, "hidden_memory_2");
auto cell_memory_2_write = std::make_shared<Assign>(out_cell_2, var_cell_2);
auto hidden_memory_2_write = std::make_shared<Assign>(out_hidden_2, var_hidden_2);
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
cell_memory_write->add_control_dependency(cell_memory_read);
final_reshape->add_control_dependency(cell_memory_write);
hidden_memory_write->add_control_dependency(hidden_memory_read);
final_reshape->add_control_dependency(hidden_memory_write);
cell_memory_2_write->add_control_dependency(cell_memory_2_read);
final_reshape->add_control_dependency(cell_memory_2_write);
hidden_memory_2_write->add_control_dependency(hidden_memory_2_read);
final_reshape->add_control_dependency(hidden_memory_2_write);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
function = std::make_shared<Function>(OutputVector {final_reshape},
SinkVector{cell_memory_write, hidden_memory_write, cell_memory_2_write, hidden_memory_2_write},
input_parameter,
"TI_with_memory");
}
void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
@ -197,72 +205,72 @@ void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
// Body 1 - layers
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, first_reshape_pattern, false);
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<Reshape>(unsqueeze, first_reshape_pattern, false);
// Body 1 - end
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
// Body 2 - layers
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(inbetween_squeeze, squeeze_2_const);
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<Squeeze>(inbetween_squeeze, squeeze_2_const);
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze_2, final_reshape_pattern, false);
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<Reshape>(unsqueeze_2, final_reshape_pattern, false);
// Body 2 - end
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
}
void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
size_t inputSize;
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::vector<size_t> input_dims { 1, inputSize };
@ -270,49 +278,49 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
std::vector<size_t> cell_memory_dims {1, hiddenSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
// Body - inputs
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
H_t->set_friendly_name("hidden_state_1");
C_t->set_friendly_name("cell_state_1");
// Body - layers
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
// body - outputs
auto H_o = lstm->output(0);
auto C_o = lstm->output(1);
auto unsqueeze_o = unsqueeze->output(0);
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
// TI construction
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
@ -326,44 +334,44 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
out_cell.get_tensor().set_element_type(ngPrc);
tensor_iterator->validate_and_infer_types();
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
// End of TI 1
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
// Second TI
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
// Body - inputs
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
H_t_2->set_friendly_name("hidden_state_2");
C_t_2->set_friendly_name("cell_state_2");
// Body - layers
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
// body - outputs
auto H_o_2 = lstm_2->output(0);
auto C_o_2 = lstm_2->output(1);
auto unsqueeze_o_2 = unsqueeze_2->output(0);
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
// TI construction
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
tensor_iterator_2->set_body(body_2);
tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0);
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2);
@ -376,70 +384,17 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
out_hidden_2.get_tensor().set_element_type(ngPrc);
out_cell_2.get_tensor().set_element_type(ngPrc);
tensor_iterator_2->validate_and_infer_types();
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
}
void MultipleLSTMCellTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
void MultipleLSTMCellTest::InitMemory() {
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({1, hiddenSize}),
InferenceEngine::Layout::NC);
LoadNetwork();
IE_SUPPRESS_DEPRECATED_START
auto states = executableNetwork.QueryState();
for (auto& state : states) {
auto name = state.GetName();
if (name == "cell_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
cell_memory_init.data(), cell_memory_init.size());
state.SetState(blob);
} else if (name == "hidden_memory") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
hidden_memory_init.data(), hidden_memory_init.size());
state.SetState(blob);
} else if (name == "cell_memory_2") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
cell_memory_init.data(), cell_memory_init.size());
state.SetState(blob);
} else if (name == "hidden_memory_2") {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
hidden_memory_init.data(), hidden_memory_init.size());
state.SetState(blob);
} else {
GTEST_FAIL() << "unknown memory state";
}
}
IE_SUPPRESS_DEPRECATED_END
GenerateInputs();
Infer();
switchToNgraphFriendlyModel();
Validate();
}
void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({1, hiddenSize}),
InferenceEngine::Layout::NC);
// Calculate values after LowLatency transformation
CreatePureTensorIteratorModel();
if (regular_api) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
InferenceEngine::LowLatency(cnnNetwork);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
} else {
function->validate_nodes_and_infer_types();
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
manager.run_passes(function);
LoadNetwork();
}
IE_SUPPRESS_DEPRECATED_START
auto states = executableNetwork.QueryState();
for (auto& state : states) {
@ -465,14 +420,73 @@ void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
}
}
IE_SUPPRESS_DEPRECATED_END
}
void MultipleLSTMCellTest::ApplyLowLatency() {
// Calculate values after LowLatency transformation
CreatePureTensorIteratorModel();
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
function->validate_nodes_and_infer_types();
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
pass::Manager manager;
NGRAPH_SUPPRESS_DEPRECATED_START
manager.register_pass<ngraph::pass::LowLatency>();
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
manager.run_passes(function);
bool ti_found = helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, true);
LoadNetwork();
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
function->validate_nodes_and_infer_types();
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
pass::Manager manager;
manager.register_pass<pass::LowLatency2>();
manager.run_passes(function);
bool ti_found = helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, false);
LoadNetwork();
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::LowLatency(cnnNetwork);
IE_SUPPRESS_DEPRECATED_END
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
EXPECT_EQ(ti_found, true);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
cnnNetwork = InferenceEngine::CNNNetwork{function};
InferenceEngine::lowLatency2(cnnNetwork);
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
EXPECT_EQ(ti_found, false);
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
}
}
void MultipleLSTMCellTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
ApplyLowLatency();
} else {
LoadNetwork();
}
InitMemory();
GenerateInputs();
Infer();
// Calculate ref values for Unrolled TI
CreatePureTensorIteratorModel();
ngraph::pass::Manager manager_2;
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
manager_2.run_passes(function);
// Calculate ref values
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
switchToNgraphFriendlyModel();
} else {
CreatePureTensorIteratorModel();
}
Validate();
}
} // namespace SubgraphTestsDefinitions

View File

@ -214,6 +214,15 @@ enum class SequenceTestsMode {
CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
};
enum class MemoryTransformation {
NONE,
LOW_LATENCY,
LOW_LATENCY_REGULAR_API,
LOW_LATENCY_V2,
LOW_LATENCY_V2_REGULAR_API,
LOW_LATENCY_V2_ORIGINAL_INIT
};
std::ostream &operator<<(std::ostream &os, const ReductionType &m);
std::ostream &operator<<(std::ostream &os, const PadMode &m);
@ -297,5 +306,7 @@ std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
std::ostream& operator<<(std::ostream & os, SequenceTestsMode type);
std::ostream& operator<<(std::ostream & os, MemoryTransformation type);
} // namespace helpers
} // namespace ngraph

View File

@ -817,5 +817,32 @@ std::ostream& operator<<(std::ostream & os, SequenceTestsMode type) {
}
return os;
}
std::ostream& operator<<(std::ostream & os, MemoryTransformation type) {
switch (type) {
case MemoryTransformation::NONE:
os << "NONE";
break;
case MemoryTransformation::LOW_LATENCY_V2:
os << "LOW_LATENCY_V2";
break;
case MemoryTransformation::LOW_LATENCY:
os << "LOW_LATENCY";
break;
case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API:
os << "LOW_LATENCY_V2_REGULAR_API";
break;
case MemoryTransformation::LOW_LATENCY_REGULAR_API:
os << "LOW_LATENCY_REGULAR_API";
break;
case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT:
os << "LOW_LATENCY_V2_ORIGINAL_INIT";
break;
default:
throw std::runtime_error("NOT_SUPPORTED_TYPE");
}
return os;
}
} // namespace helpers
} // namespace ngraph

View File

@ -11,7 +11,7 @@ def get_available_transformations():
try:
from openvino.offline_transformations import ApplyLowLatencyTransformation # pylint: disable=import-error,no-name-in-module
return {
'LowLatency': ApplyLowLatencyTransformation,
'LowLatency2': ApplyLowLatencyTransformation,
}
except Exception as e:
return {}

View File

@ -8,6 +8,7 @@ import os
import re
from collections import OrderedDict
from itertools import zip_longest
from distutils.util import strtobool
import numpy as np
@ -257,9 +258,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
help='Apply additional transformations. ' +
'Usage: "--transform transformation_name1[args],transformation_name2..." ' +
'where [args] is key=value pairs separated by semicolon. ' +
'Examples: "--transform LowLatency" or ' +
' "--transform LowLatency[num_iterations=2]" ' +
'Available transformations: "LowLatency"',
'Examples: "--transform LowLatency2" or ' +
' "--transform LowLatency2[use_const_initializer=False]" ' +
'Available transformations: "LowLatency2"',
default="")
common_group.add_argument('--disable_fusing',
help='Turn off fusing of linear operations to Convolution',
@ -1151,6 +1152,14 @@ def isfloat(value):
return False
def isbool(value):
try:
strtobool(value)
return True
except ValueError:
return False
def convert_string_to_real_type(value: str):
values = value.split(',')
for i in range(len(values)):
@ -1159,6 +1168,8 @@ def convert_string_to_real_type(value: str):
values[i] = int(value)
elif isfloat(value):
values[i] = float(value)
elif isbool(value):
values[i] = strtobool(value)
return values[0] if len(values) == 1 else values

View File

@ -905,64 +905,65 @@ class TransformChecker(unittest.TestCase):
self.assertEqual(parse_transform(""), [])
def test_single_pass(self):
self.assertEqual(parse_transform("LowLatency"), [("LowLatency", {})])
self.assertEqual(parse_transform("LowLatency2"), [("LowLatency2", {})])
def test_single_pass_with_args(self):
self.assertEqual(parse_transform("LowLatency[num_iterations=2]"),
[("LowLatency", {"num_iterations": 2})])
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True]"),
[("LowLatency2", {"use_const_initializer": True})])
def test_single_pass_with_multiple_args(self):
self.assertEqual(parse_transform("LowLatency[num_iterations=2;dummy_attr=3.14]"),
[("LowLatency", {"num_iterations": 2, "dummy_attr": 3.14})])
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True;dummy_attr=3.14]"),
[("LowLatency2", {"use_const_initializer": True, "dummy_attr": 3.14})])
def test_multiple_passes_with_args(self):
self.assertEqual(parse_transform("LowLatency[num_iterations=2],DummyPass[type=ReLU]"),
[("LowLatency", {"num_iterations": 2}),
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True],DummyPass[type=ReLU]"),
[("LowLatency2", {"use_const_initializer": True}),
("DummyPass", {"type": "ReLU"})])
def test_multiple_passes_with_args2(self):
self.assertEqual(parse_transform("LowLatency[num_iterations=2,3,4.15],DummyPass1,DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
[("LowLatency", {"num_iterations": [2,3,4.15]}),
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True,False],DummyPass1,"
"DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
[("LowLatency2", {"use_const_initializer": [True, False]}),
("DummyPass1", {}),
("DummyPass2", {"types": ["ReLU", "PReLU"], "values": [1,2,3]})])
def test_multiple_passes_no_args(self):
self.assertEqual(parse_transform("DummyPass,LowLatency2"),
[("DummyPass", {}), ("LowLatency2", {})])
self.assertEqual(parse_transform("DummyPass,LowLatency22"),
[("DummyPass", {}), ("LowLatency22", {})])
def test_single_pass_neg(self):
self.assertRaises(Error, parse_transform, "LowLatency!")
self.assertRaises(Error, parse_transform, "LowLatency2!")
def test_multiple_passes_neg(self):
self.assertRaises(Error, parse_transform, "LowLatency;DummyPass")
self.assertRaises(Error, parse_transform, "LowLatency2;DummyPass")
def test_single_pass_with_args_neg1(self):
self.assertRaises(Error, parse_transform, "LowLatency[=2]")
self.assertRaises(Error, parse_transform, "LowLatency2[=2]")
def test_single_pass_with_args_neg2(self):
self.assertRaises(Error, parse_transform, "LowLatency[key=]")
self.assertRaises(Error, parse_transform, "LowLatency2[key=]")
def test_single_pass_with_args_neg3(self):
self.assertRaises(Error, parse_transform, "LowLatency[]")
self.assertRaises(Error, parse_transform, "LowLatency2[]")
def test_single_pass_with_args_neg4(self):
self.assertRaises(Error, parse_transform, "LowLatency[key=value;]")
self.assertRaises(Error, parse_transform, "LowLatency2[key=value;]")
def test_single_pass_with_args_neg5(self):
self.assertRaises(Error, parse_transform, "LowLatency[value]")
self.assertRaises(Error, parse_transform, "LowLatency2[value]")
def test_single_pass_with_args_neg6(self):
self.assertRaises(Error, parse_transform, "LowLatency[key=value")
self.assertRaises(Error, parse_transform, "LowLatency2[key=value")
@patch("mo.back.offline_transformations.get_available_transformations")
def test_check_low_latency_is_available(self, available_transformations):
available_transformations.return_value = {"LowLatency": None}
available_transformations.return_value = {"LowLatency2": None}
try:
check_available_transforms([("LowLatency" ,"")], True)
check_available_transforms([("LowLatency2", "")], True)
except Error as e:
self.assertTrue(False, "Exception \"{}\" is unexpected".format(e))
@patch("mo.back.offline_transformations.get_available_transformations")
def test_check_dummy_pass_is_available(self, available_transformations):
available_transformations.return_value = {"LowLatency": None}
available_transformations.return_value = {"LowLatency2": None}
self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True)

View File

@ -8,13 +8,14 @@
#include <vector>
#include <ngraph/pass/graph_rewrite.hpp>
#include <ngraph/pass/pass.hpp>
namespace ngraph
{
namespace pass
{
/**
* @brief The transformation finds all TensorIterator layers in the network,
* @brief The transformation finds all TensorIterator/Loop layers in the network,
* processes all back edges that describe a connection between Result and Parameter
* of the TensorIterator body,and inserts ReadValue layer between Parameter
* and the next layers after this Parameter, and Assign layer after the layers
@ -42,11 +43,50 @@ namespace ngraph
* by step, the states will store between inferences.
*/
class NGRAPH_API LowLatency : public ngraph::pass::MatcherPass
class NGRAPH_DEPRECATED("Use LowLatency2 instead.") NGRAPH_API LowLatency
: public ngraph::pass::MatcherPass
{
public:
NGRAPH_RTTI_DECLARATION;
LowLatency();
};
/**
* @brief The transformation finds all TensorIterator/Loop layers in the network,
* processes all back edges that describe a connection between Result and Parameter
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
* input and output corresponding to this back edge.
* Supported platforms: CPU, GNA.
*
* The example below describes the changes made by the transformation
* [] - TensorIterator body
* () - new layer
* BE - back-edge
*
* before applying the transformation:
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
*
* after applying the transformation:
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
* \
* ->...
* After applying the transformation, the resulting network can be inferred
* step by step, the states will store between inferences.
*/
class NGRAPH_API LowLatency2 : public ngraph::pass::FunctionPass
{
public:
NGRAPH_RTTI_DECLARATION;
explicit LowLatency2(bool use_const_initializer = true)
: m_use_const_initializer(use_const_initializer)
{
}
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
private:
bool m_use_const_initializer;
};
} // namespace pass
} // namespace ngraph

View File

@ -129,7 +129,6 @@ void op::v0::TensorIterator::validate_and_infer_types()
m_body->get_results().at(merged_input_description->m_body_value_index)->input(0);
ends.push_back(body_value.get_node()->shared_from_this());
auto body_value_partial_shape = body_value.get_partial_shape();
auto body_parameter =
m_body->get_parameters().at(merged_input_description->m_body_parameter_index);

View File

@ -6,12 +6,29 @@
#include <memory>
#include <ngraph/log.hpp>
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/variant.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency2, "LowLatency2", 0);
NGRAPH_SUPPRESS_DEPRECATED_START
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
using namespace std;
using namespace ngraph;
namespace
{
string generate_variable_name(const string& op_name, const string& param_name, int variable_idx)
{
return op_name + "/" + param_name + "/" + "variable_" + to_string(variable_idx);
}
} // namespace
ngraph::pass::LowLatency::LowLatency()
{
auto tensor_iterator = ngraph::pattern::wrap_type<opset6::TensorIterator, opset6::Loop>();
@ -58,11 +75,12 @@ ngraph::pass::LowLatency::LowLatency()
const auto& inputs_to = func->get_parameters()
.at(merged_in->m_body_parameter_index)
->get_output_target_inputs(0);
const std::string variable_name(sub_graph_op->get_friendly_name() + "/" +
func->get_parameters()
.at(merged_in->m_body_parameter_index)
->get_friendly_name() +
"/variable_" + std::to_string(variable_id));
const std::string variable_name(
generate_variable_name(sub_graph_op->get_friendly_name(),
func->get_parameters()
.at(merged_in->m_body_parameter_index)
->get_friendly_name(),
variable_id));
auto variable = std::make_shared<Variable>(
VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
auto read_value = std::make_shared<opset6::ReadValue>(
@ -90,3 +108,178 @@ ngraph::pass::LowLatency::LowLatency()
auto m = std::make_shared<ngraph::pattern::Matcher>(tensor_iterator, "LowLatency");
register_matcher(m, callback);
}
NGRAPH_SUPPRESS_DEPRECATED_END
void UnrollSingleIteration(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
const shared_ptr<Function>& outer_f)
{
using namespace opset7;
const auto& params = sub_graph_op->get_function()->get_parameters();
const auto& results = sub_graph_op->get_function()->get_results();
// before: Layer1 -> TI [input -> bodyParameter -> Layer2 -> ...]
// after: Layer1 -> Layer2 ->...
for (const auto& in : sub_graph_op->get_input_descriptions())
{
const auto& connect_to = sub_graph_op->get_input_source_output(in->m_input_index);
for (auto& output : params.at(in->m_body_parameter_index)->outputs())
{
output.replace(connect_to);
}
}
// before: TI [...-> Layer1 -> Result -> output] -> Layer2 -> ...
// after: ...-> Layer1 -> Layer2 -> ...
NodeVector new_ops;
for (const auto& out : sub_graph_op->get_output_descriptions())
{
const auto& connect_to = results.at(out->m_body_value_index)->get_input_source_output(0);
for (auto& input_to : sub_graph_op->output(out->m_output_index).get_target_inputs())
{
// create IE output name
std::string out_name = sub_graph_op->get_friendly_name();
if (sub_graph_op->get_output_size() != 1)
out_name += "." + std::to_string(out->m_output_index);
// IECompatibility: insert identity (Unsqueeze + Squeeze) to store the TensorIterator
// output names
auto axis_1 = Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
auto identity_1 = std::make_shared<Unsqueeze>(connect_to, axis_1);
auto identity_2 = std::make_shared<Squeeze>(identity_1, axis_1);
identity_2->set_friendly_name(out_name);
new_ops.push_back(identity_1);
new_ops.push_back(identity_2);
input_to.replace_source_output(identity_2);
}
}
outer_f->add_sinks(sub_graph_op->get_function()->get_sinks());
ngraph::copy_runtime_info(sub_graph_op, sub_graph_op->get_function()->get_ops());
ngraph::copy_runtime_info(sub_graph_op, new_ops);
}
Output<Node> create_init_subgraph(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
const Output<Node>& in_node)
{
using namespace opset7;
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
auto shape_of = make_shared<ShapeOf>(in_node);
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
copy_runtime_info(sub_graph_op, {const_zero, shape_of, broadcast});
return broadcast->output(0);
}
bool pass::LowLatency2::run_on_function(shared_ptr<Function> f)
{
using namespace opset7;
SinkVector assigns;
for (const auto& op : f->get_ordered_ops())
{
if (const auto& sub_graph_op = dynamic_pointer_cast<op::util::SubGraphOp>(op))
{
int64_t variable_id = 0;
const auto& func = sub_graph_op->get_function();
const auto& params = func->get_parameters();
for (const auto& in : sub_graph_op->get_input_descriptions())
{
// Process all back edges
if (const auto& merged_in =
dynamic_pointer_cast<op::util::SubGraphOp::MergedInputDescription>(in))
{
// create new Variable
const string& param_name =
params.at(merged_in->m_body_parameter_index)->get_friendly_name();
const string& var_name = generate_variable_name(
sub_graph_op->get_friendly_name(), param_name, variable_id);
const auto& input = sub_graph_op->input(merged_in->m_input_index);
if (std::dynamic_pointer_cast<op::ReadValueBase>(
input.get_source_output().get_node_shared_ptr()) != nullptr)
{
NGRAPH_DEBUG
<< "LowLatency2 transformation cannot be applied because the "
<< "ReadValue node is already an input to the TensorIterator."
<< "LowLatency2 transformation may have already been applied, please "
<< "do not call it more then once.";
return false;
}
const auto& param = sub_graph_op->get_function()->get_parameters().at(
merged_in->m_body_parameter_index);
for (const auto& in_to : param->output(0).get_target_inputs())
{
if (dynamic_cast<op::ReadValueBase*>(in_to.get_node()) != nullptr)
{
NGRAPH_DEBUG
<< "LowLatency2 transformation cannot be applied because the "
<< "ReadValue node is already inside the TensorIterator. "
<< "LowLatency transformation may have been applied, please do "
<< "not call LowLatency2 after LowLatency.";
return false;
}
}
VariableInfo var_info{PartialShape::dynamic(), element::dynamic, var_name};
auto variable = make_shared<Variable>(var_info);
// insert ReadValue
// Layers -> [new op: ReadValue] -> Subgraph operation
Output<Node> read_value_in = input.get_source_output();
if (m_use_const_initializer)
{
read_value_in = create_init_subgraph(sub_graph_op, read_value_in);
}
auto read_value = make_shared<ReadValue>(read_value_in, variable);
input.replace_source_output(read_value->output(0));
read_value->set_friendly_name(var_name);
ngraph::copy_runtime_info(sub_graph_op, read_value);
/* insert Assign
// Subgraph operation -> [new op: Assign]
// \
// ---> Layers -> ...
*/
const auto& out_desc = sub_graph_op->get_output_descriptions();
bool is_output_exist = std::any_of(
out_desc.begin(),
out_desc.end(),
[&merged_in](
const std::shared_ptr<op::util::SubGraphOp::OutputDescription>& out) {
return out->m_body_value_index == merged_in->m_body_value_index;
});
// Create new output if it doesn't exist.
if (!is_output_exist)
{
sub_graph_op->get_iter_value(
func->get_results().at(merged_in->m_body_value_index));
}
for (const auto& out : sub_graph_op->get_output_descriptions())
{
if (out->m_body_value_index == merged_in->m_body_value_index)
{
auto assign = make_shared<Assign>(
sub_graph_op->output(out->m_output_index), variable);
ngraph::copy_runtime_info(sub_graph_op, assign);
// control dependency so that ReadValue is processed before Assign
assign->add_control_dependency(read_value);
assigns.emplace_back(assign);
break;
}
}
}
variable_id++;
}
if (sub_graph_op->get_num_iterations() == 1)
{
UnrollSingleIteration(sub_graph_op, f);
}
}
}
f->add_sinks(assigns);
return true;
}