LowLatency v2 ngraph transformation (#5160)
* LowLatency 2.0: transformation and unit tests * low latency 2.0: unit tests * documentation and ngraph codestyle * update CNN Interface of LowLatency transformation * fix build on Windows * fix build on Windows * investigation of a failed build on Win OS * ngraph codestyle * fix build (werrors) * New unit tests, refactoring * update functional tests for Memory * update LowLatency functional tests * extend Memory tests to cover LowLatency v2 transformation * clean up, code style * fix unit tests * update and fix unit tests, add feature to apply LLTv2 after LLTv1 * update docs, refactoring * add several gna tests to skip config * fix python api tests * update python api, rename LowLatency_v2 to LowLatency2 * deprecate LowLatency v1 * Deprecate LowLatency v1 in IE * fix wrong merge, codestyle * resolve review comments * fix python test * update skip config * apply online review notes, fix unit tests * clean up, code style * fix docs * Use debug_messages instead of exceptions in llt v2 * fix unit tests * Resolve review remarks
This commit is contained in:
parent
f9b27c3714
commit
c1608628d4
@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
|
||||
C.ApplyPOTTransformations(network.impl, device)
|
||||
|
||||
|
||||
def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
|
||||
C.ApplyLowLatencyTransformation(network.impl, num_iterations)
|
||||
def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True):
|
||||
C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)
|
||||
|
||||
|
||||
def ApplyPruningTransformation(IENetwork network):
|
||||
|
@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
}
|
||||
|
||||
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
|
||||
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
|
||||
ngraph::pass::Manager manager;
|
||||
// TODO: pass num_iterations to LowLatency
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
|
||||
auto pass_config = manager.get_pass_config();
|
||||
pass_config->set_callback<ngraph::pass::UnrollTensorIterator>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||
});
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
|
||||
|
||||
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
|
||||
|
||||
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
|
||||
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true);
|
||||
|
||||
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
from libcpp cimport bool
|
||||
from libcpp.string cimport string
|
||||
from libc.stdint cimport int64_t
|
||||
|
||||
from ..inference_engine.ie_api_impl_defs cimport IENetwork
|
||||
|
||||
@ -12,10 +11,10 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
|
||||
|
||||
cdef void ApplyPOTTransformations(IENetwork network, string device)
|
||||
|
||||
cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
|
||||
cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer)
|
||||
|
||||
cdef void ApplyPruningTransformation(IENetwork network)
|
||||
|
||||
cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
|
||||
|
||||
cdef void CheckAPI()
|
||||
cdef void CheckAPI()
|
||||
|
@ -49,4 +49,4 @@ def test_pruning_transformations():
|
||||
|
||||
f = ng.function_from_cnn(net)
|
||||
assert f != None
|
||||
assert len(f.get_ops()) == 3
|
||||
assert len(f.get_ops()) == 3
|
||||
|
@ -52,5 +52,41 @@ namespace InferenceEngine {
|
||||
* @param network A network to apply LowLatency transformation
|
||||
* *
|
||||
*/
|
||||
|
||||
INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
|
||||
"Use InferenceEngine::lowLatency2 instead.")
|
||||
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
|
||||
|
||||
|
||||
/**
|
||||
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||
* processes all back edges that describe a connection between Result and Parameter
|
||||
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
|
||||
* input and output corresponding to this back edge.
|
||||
* Supported platforms: CPU, GNA.
|
||||
*
|
||||
* The example below describes the changes made by the transformation
|
||||
* [] - TensorIterator body
|
||||
* () - new layer
|
||||
* BE - back-edge
|
||||
*
|
||||
* before applying the transformation:
|
||||
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
|
||||
*
|
||||
* after applying the transformation:
|
||||
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
|
||||
* \
|
||||
* ->...
|
||||
* After applying the transformation, the resulting network can be inferred
|
||||
* step by step, the states will store between inferences.
|
||||
* @param network A network to apply LowLatency transformation
|
||||
* @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations.
|
||||
If "true", then the transformation inserts Constant before ReadValue operation.
|
||||
If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
|
||||
* Loop operation by a given number. Does not affect TensorIterators.
|
||||
* *
|
||||
*/
|
||||
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
|
||||
bool use_const_initializer = true);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -11,6 +11,16 @@ using namespace InferenceEngine;
|
||||
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
|
||||
auto function = network.getFunction();
|
||||
ngraph::pass::Manager manager;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.run_passes(function);
|
||||
}
|
||||
|
||||
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
|
||||
bool use_const_initializer) {
|
||||
auto function = network.getFunction();
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||
manager.run_passes(function);
|
||||
}
|
||||
|
@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) {
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(f);
|
||||
}
|
||||
@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) {
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(f);
|
||||
|
||||
@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) {
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(f);
|
||||
|
||||
@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) {
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(f);
|
||||
}
|
||||
@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) {
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(f);
|
||||
}
|
||||
|
@ -0,0 +1,829 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/common_optimizations/low_latency.hpp>
|
||||
#include <transformations/serialize.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace opset7;
|
||||
using namespace std;
|
||||
|
||||
Output<Node> create_init_subgraph(const Output<Node>& in_node) {
|
||||
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
|
||||
auto shape_of = make_shared<ShapeOf>(in_node);
|
||||
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
|
||||
return broadcast->output(0);
|
||||
}
|
||||
|
||||
Output<Node> insert_identity(const Output<Node>& in_node) {
|
||||
auto axis_1 = Constant::create(element::i64, Shape{1}, {1});
|
||||
auto identity_1 = std::make_shared<Unsqueeze>(in_node, axis_1);
|
||||
return std::make_shared<Squeeze>(identity_1, axis_1);
|
||||
}
|
||||
|
||||
std::shared_ptr<Function> createLSTMBody(const std::shared_ptr<Parameter>& Xi,
|
||||
const std::shared_ptr<Parameter>& H_t,
|
||||
const std::shared_ptr<Parameter>& C_t,
|
||||
bool is_loop = false) {
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||
|
||||
auto func = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
|
||||
ParameterVector{Xi, H_t, C_t});
|
||||
if (is_loop) {
|
||||
auto body_condition = std::make_shared<Constant>(
|
||||
element::boolean, Shape{1}, true);
|
||||
auto cond_res = std::make_shared<Result>(body_condition);
|
||||
func->add_results({cond_res});
|
||||
}
|
||||
return func;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTM) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_friendly_name("LSTMTensorIterator");
|
||||
|
||||
tensor_iterator->set_merged_input(C_t, C_init, results[2]);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, H_init, results[0]);
|
||||
|
||||
tensor_iterator->get_iter_value(results[0], -1);
|
||||
tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||
ParameterVector{X, H_init, C_init});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_GRU) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(384 * 16, 0);
|
||||
auto r_val = std::vector<float>(384 * 128, 0);
|
||||
auto b_val = std::vector<float>(384, 0);
|
||||
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{384}, b_val);
|
||||
|
||||
auto gru_cell = std::make_shared<GRUCell>(squeeze, Yi, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(gru_cell);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(gru_cell, axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi});
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(Yi, Y, res_1);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("GRUTensorIterator/variable0");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(384 * 16, 0);
|
||||
auto r_val = std::vector<float>(384 * 128, 0);
|
||||
auto b_val = std::vector<float>(384, 0);
|
||||
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{384}, b_val);
|
||||
|
||||
auto rnn_cell = std::make_shared<GRUCell>(squeeze, read_value_H, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
|
||||
auto res_1 = std::make_shared<Result>(assign_H);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
f_ref = std::make_shared<Function>(ResultVector {res_2}, ParameterVector{Xi, H_t});
|
||||
f_ref->add_sinks({assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_RNN) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(128 * 16, 0);
|
||||
auto r_val = std::vector<float>(128 * 128, 0);
|
||||
auto b_val = std::vector<float>(128, 0);
|
||||
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{128}, b_val);
|
||||
|
||||
auto rnn_cell = std::make_shared<RNNCell>(squeeze, Yi, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(rnn_cell);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell, axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi,
|
||||
Yi});
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(Yi, Y, res_1);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("RNNTensorIterator/variable0");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(128 * 16, 0);
|
||||
auto r_val = std::vector<float>(128 * 128, 0);
|
||||
auto b_val = std::vector<float>(128, 0);
|
||||
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{128}, b_val);
|
||||
|
||||
auto rnn_cell = std::make_shared<RNNCell>(squeeze, read_value_H, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
|
||||
auto res_1 = std::make_shared<Result>(assign_H);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
f_ref = std::make_shared<Function>(ResultVector{res_2}, ParameterVector{Xi, H_t});
|
||||
f_ref->add_sinks({assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTMReshape) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 1, 16});
|
||||
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_merged_input(C_t, C, results[2]);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, H, results[0]);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
|
||||
C});
|
||||
|
||||
// Reshape
|
||||
// change the number of iteration of TI. 2 -> 1
|
||||
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
f->replace_parameter(0, new_X);
|
||||
f->validate_nodes_and_infer_types();
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTM_Loop) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto trip_count =
|
||||
std::make_shared<Constant>(element::i64, Shape{}, 1);
|
||||
auto exec_condition =
|
||||
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||
loop->set_special_body_ports({-1, 3});
|
||||
loop->set_function(body);
|
||||
loop->set_friendly_name("LSTMLoop");
|
||||
|
||||
loop->set_merged_input(C_t, C_init, results[2]);
|
||||
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
loop->set_merged_input(H_t, H_init, results[0]);
|
||||
|
||||
auto out0 = loop->get_iter_value(results[0], -1);
|
||||
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||
ParameterVector{X, H_init, C_init});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTM_several_iterations) {
|
||||
constexpr int ITER_CNT = 5;
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
|
||||
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_merged_input(C_t, C, results[2]);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, H, results[0]);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
|
||||
C});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
|
||||
// TensorIterator not unrolled.
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
|
||||
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
|
||||
|
||||
// Body
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell, axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
|
||||
ParameterVector{Xi, H_t, C_t});
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_merged_input(C_t, read_value_C, res_3);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, read_value_H, res_1);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||
auto out2 = tensor_iterator->get_iter_value(res_3, -1);
|
||||
|
||||
auto assign_H = std::make_shared<Assign>(out0, variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(out2, variable_C);
|
||||
auto outer_res_2 = std::make_shared<Result>(out1);
|
||||
auto outer_res_1 = std::make_shared<Result>(out0);
|
||||
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto shape_of = std::make_shared<ShapeOf>(X);
|
||||
const auto trip_count = std::make_shared<Gather>(shape_of, Constant::create(ngraph::element::i64, {1}, {0}),
|
||||
Constant::create(ngraph::element::i64, {1}, {0}));
|
||||
auto exec_condition =
|
||||
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||
loop->set_special_body_ports({-1, 3});
|
||||
loop->set_function(body);
|
||||
loop->set_friendly_name("LSTMLoop");
|
||||
|
||||
loop->set_merged_input(C_t, C_init, results[2]);
|
||||
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
loop->set_merged_input(H_t, H_init, results[0]);
|
||||
|
||||
auto out0 = loop->get_iter_value(results[0], -1);
|
||||
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||
ParameterVector{X, H_init, C_init});
|
||||
|
||||
// Reshape
|
||||
// change the number of iteration of Loop. 10 -> 1
|
||||
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
f->replace_parameter(0, new_X);
|
||||
f->validate_nodes_and_infer_types();
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
|
||||
TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||
auto results = body->get_results();
|
||||
|
||||
auto trip_count =
|
||||
std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto exec_condition =
|
||||
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||
loop->set_special_body_ports({-1, 3});
|
||||
loop->set_function(body);
|
||||
loop->set_friendly_name("LSTMLoop");
|
||||
|
||||
loop->set_merged_input(C_t, C_init, results[2]);
|
||||
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
loop->set_merged_input(H_t, H_init, results[0]);
|
||||
|
||||
auto out0 = loop->get_iter_value(results[0], -1);
|
||||
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||
ParameterVector{X, H_init, C_init});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::LowLatency2>(true);
|
||||
|
||||
manager.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
|
||||
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
|
||||
|
||||
// Body
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||
auto body_condition = std::make_shared<Constant>(
|
||||
element::boolean, Shape{1}, true);
|
||||
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3, body_condition},
|
||||
ParameterVector{Xi, H_t, C_t});
|
||||
|
||||
auto trip_count =
|
||||
std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto exec_condition =
|
||||
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||
loop->set_special_body_ports({-1, 3});
|
||||
loop->set_function(body);
|
||||
loop->set_friendly_name("LSTMLoop");
|
||||
|
||||
loop->set_merged_input(C_t, read_value_C, res_3);
|
||||
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
loop->set_merged_input(H_t, read_value_H, res_1);
|
||||
|
||||
auto out0 = loop->get_iter_value(res_1, -1);
|
||||
auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||
auto out3 = loop->get_iter_value(res_3, -1);
|
||||
|
||||
auto assign_H = std::make_shared<Assign>(out0, variable_H);
|
||||
auto assign_C = std::make_shared<Assign>(out3, variable_C);
|
||||
auto outer_res_2 = std::make_shared<Result>(out1);
|
||||
auto outer_res_1 = std::make_shared<Result>(out0);
|
||||
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
|
||||
f_ref->add_sinks({assign_C, assign_H});
|
||||
assign_H->add_control_dependency(read_value_H);
|
||||
assign_C->add_control_dependency(read_value_C);
|
||||
}
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||
|
||||
// Body
|
||||
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||
|
||||
auto w_val = std::vector<float>(512 * 16, 0);
|
||||
auto r_val = std::vector<float>(512 * 128, 0);
|
||||
auto b_val = std::vector<float>(512, 0);
|
||||
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||
|
||||
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t});
|
||||
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_friendly_name("LSTMTensorIterator");
|
||||
|
||||
tensor_iterator->set_merged_input(C_t, C_init, res_3);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, H_init, res_1);
|
||||
|
||||
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||
|
||||
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||
ParameterVector{X, H_init, C_init});
|
||||
|
||||
auto f_2 = ngraph::clone_function(*f);
|
||||
pass::Manager manager_2;
|
||||
manager_2.register_pass<pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager_2.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
EXPECT_NO_THROW(manager_2.run_passes(f_2));
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
// LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted
|
||||
// but unrolls TI/Loop
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
|
||||
EXPECT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
}
|
@ -10,6 +10,13 @@ using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT,
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
||||
{3},
|
||||
{100, 100},
|
||||
@ -27,6 +34,7 @@ const std::vector<int64_t> iterationCount {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::ValuesIn(iterationCount),
|
||||
::testing::ValuesIn(inShapes),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <subgraph_tests/memory_LSTMCell.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||
};
|
||||
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
64,
|
||||
100,
|
||||
25
|
||||
};
|
||||
|
||||
std::vector<size_t> hidden_sizes = {
|
||||
128,
|
||||
200,
|
||||
300,
|
||||
24,
|
||||
32,
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(hidden_sizes),
|
||||
::testing::Values(additional_config)),
|
||||
MemoryLSTMCellTest::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -7,6 +7,15 @@
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||
};
|
||||
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
@ -28,6 +37,7 @@ std::map<std::string, std::string> additional_config = {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
|
@ -10,9 +10,17 @@ using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
||||
{1, 1},
|
||||
{1, 2}
|
||||
{1, 2},
|
||||
{1, 10}
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
@ -22,11 +30,13 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
const std::vector<int64_t> iterationCount {
|
||||
1,
|
||||
3,
|
||||
4,
|
||||
10
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::ValuesIn(iterationCount),
|
||||
::testing::ValuesIn(inShapes),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
|
@ -64,5 +64,13 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*CachingSupport.*_batch2_.*)",
|
||||
// TODO: Issue 51525
|
||||
R"(.*CachingSupport.*KSOFunction.*)",
|
||||
// TODO: Issue 57363 (Param -> Result subgraphs)
|
||||
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
|
||||
// TODO: Issue 57368 (accuracy)
|
||||
R"(.*smoke_MemoryTest.*LOW_LATENCY.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)",
|
||||
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
|
||||
};
|
||||
}
|
||||
|
@ -6,6 +6,14 @@
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||
};
|
||||
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
@ -30,6 +38,7 @@ namespace SubgraphTestsDefinitions {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
|
@ -7,6 +7,15 @@
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
|
||||
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||
ngraph::helpers::MemoryTransformation::NONE,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||
};
|
||||
|
||||
std::vector<size_t> input_sizes = {
|
||||
80,
|
||||
32,
|
||||
@ -31,6 +40,7 @@ std::map<std::string, std::string> additional_config = {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(transformation),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_sizes),
|
||||
|
@ -39,7 +39,7 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
|
||||
|
||||
// Apply LowLatency and UnrollTensorIterator transformations
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
LoadNetwork();
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
@ -12,12 +12,4 @@ TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
|
||||
RunLowLatency();
|
||||
};
|
||||
|
||||
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
|
||||
RunLowLatency(true);
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -12,12 +12,4 @@ TEST_P(MultipleLSTMCellTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
|
||||
RunLowLatency();
|
||||
};
|
||||
|
||||
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
|
||||
RunLowLatency(true);
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -14,6 +14,7 @@
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
using MemoryTestParams = std::tuple<
|
||||
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||
int64_t, // iterationCount
|
||||
InferenceEngine::SizeVector, // inputShape
|
||||
InferenceEngine::Precision, // netPrecision
|
||||
@ -28,9 +29,17 @@ protected:
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
|
||||
void SetUp() override;
|
||||
private:
|
||||
void CreateTIFunc();
|
||||
void CreateCommonFunc();
|
||||
void ApplyLowLatency();
|
||||
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::EvaluationContext eval_context;
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
|
||||
int64_t iteration_count;
|
||||
ngraph::element::Type ngPrc;
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||
std::string, // Target device name
|
||||
InferenceEngine::Precision, // Network precision
|
||||
size_t, // Input size
|
||||
@ -21,9 +22,13 @@ class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
|
||||
public testing::WithParamInterface<memoryLSTMCellParams> {
|
||||
private:
|
||||
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||
void switchToNgraphFriendlyModel();
|
||||
void CreatePureTensorIteratorModel();
|
||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||
void InitMemory();
|
||||
void ApplyLowLatency();
|
||||
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
std::vector<float> input_bias;
|
||||
std::vector<float> input_weights;
|
||||
std::vector<float> hidden_memory_init;
|
||||
@ -34,7 +39,6 @@ private:
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void Run() override;
|
||||
void RunLowLatency(bool regular_api = false);
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
|
||||
};
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||
std::string, // Target device name
|
||||
InferenceEngine::Precision, // Network precision
|
||||
size_t, // Input size
|
||||
@ -21,9 +22,12 @@ class MultipleLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
|
||||
public testing::WithParamInterface<multipleLSTMCellParams> {
|
||||
private:
|
||||
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||
void switchToNgraphFriendlyModel();
|
||||
void CreatePureTensorIteratorModel();
|
||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||
void InitMemory();
|
||||
void ApplyLowLatency();
|
||||
|
||||
size_t hiddenSize;
|
||||
std::vector<float> input_bias;
|
||||
std::vector<float> input_weights;
|
||||
@ -33,10 +37,10 @@ private:
|
||||
std::vector<float> weights_2_vals;
|
||||
std::vector<float> reccurrenceWeights_vals;
|
||||
std::vector<float> bias_vals;
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void Run() override;
|
||||
void RunLowLatency(bool regular_api = false);
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj);
|
||||
};
|
||||
|
@ -3,10 +3,18 @@
|
||||
//
|
||||
|
||||
#include <signal.h>
|
||||
#include <ie_transformations.hpp>
|
||||
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||
#include <transformations/serialize.hpp>
|
||||
#include <functional_test_utils/core_config.hpp>
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "ngraph/pass/low_latency.hpp"
|
||||
#include "shared_test_classes/single_layer/memory.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace opset7;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string MemoryTest::getTestCaseName(const testing::TestParamInfo<MemoryTestParams> &obj) {
|
||||
@ -14,9 +22,11 @@ namespace LayerTestsDefinitions {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
std::string targetDevice;
|
||||
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "transformation=" << transformation << "_";
|
||||
result << "iteration_count=" << iteration_count << "_";
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
@ -26,20 +36,17 @@ namespace LayerTestsDefinitions {
|
||||
}
|
||||
|
||||
void MemoryTest::SetUp() {
|
||||
using namespace ngraph;
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
|
||||
ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
auto param = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
|
||||
auto read_value = std::make_shared<opset7::ReadValue>(param.at(0), variable);
|
||||
auto add = std::make_shared<opset7::Add>(read_value, param.at(0));
|
||||
auto assign = std::make_shared<opset7::Assign>(add, variable);
|
||||
auto res = std::make_shared<opset7::Result>(add);
|
||||
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||
CreateCommonFunc();
|
||||
} else {
|
||||
CreateTIFunc();
|
||||
ApplyLowLatency();
|
||||
}
|
||||
|
||||
auto hostTensor = std::make_shared<ngraph::HostTensor>(ngPrc, inputShape);
|
||||
auto hostTensor = std::make_shared<HostTensor>(ngPrc, inputShape);
|
||||
auto variable_context = std::make_shared<VariantWrapper<VariableContext>>(VariableContext());
|
||||
auto variable_value = std::make_shared<VariableValue>(hostTensor);
|
||||
variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value);
|
||||
@ -48,6 +55,7 @@ namespace LayerTestsDefinitions {
|
||||
|
||||
|
||||
void MemoryTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
using namespace LayerTestsUtils;
|
||||
auto crashHandler = [](int errCode) {
|
||||
auto &s = Summary::getInstance();
|
||||
@ -68,7 +76,13 @@ namespace LayerTestsDefinitions {
|
||||
}
|
||||
|
||||
try {
|
||||
LoadNetwork();
|
||||
if (transformation != ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||
LoadNetwork();
|
||||
} else {
|
||||
CoreConfiguration(this);
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
}
|
||||
GenerateInputs();
|
||||
for (int64_t i = 0; i < iteration_count; ++i) {
|
||||
Infer();
|
||||
@ -88,12 +102,12 @@ namespace LayerTestsDefinitions {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
|
||||
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
|
||||
using namespace ngraph;
|
||||
function->validate_nodes_and_infer_types();
|
||||
|
||||
auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
|
||||
auto refInputsTypes = std::vector<ngraph::element::Type>(inputs.size());
|
||||
auto refInputsTypes = std::vector<element::Type>(inputs.size());
|
||||
HostTensorVector inputTensors;
|
||||
for (auto & input : inputs) {
|
||||
const auto &dataSize = input->byteSize();
|
||||
@ -104,17 +118,25 @@ namespace LayerTestsDefinitions {
|
||||
const auto lockedMemory = memory->wmap();
|
||||
const auto buffer = lockedMemory.as<const std::uint8_t *>();
|
||||
|
||||
auto hostTensor = std::make_shared<ngraph::HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
|
||||
auto hostTensor = std::make_shared<HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
|
||||
tensorDesc.getDims());
|
||||
hostTensor->write(buffer, dataSize);
|
||||
inputTensors.push_back(hostTensor);
|
||||
}
|
||||
|
||||
// evaluate method is not implemented for TI op.
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager.run_passes(function);
|
||||
|
||||
const auto &outInfo = executableNetwork.GetOutputsInfo();
|
||||
HostTensorVector outputTensors(outInfo.size(), std::make_shared<ngraph::HostTensor>());
|
||||
HostTensorVector outputTensors(outInfo.size());
|
||||
for (auto& outTensor : outputTensors) {
|
||||
outTensor = std::make_shared<HostTensor>();
|
||||
}
|
||||
function->evaluate(outputTensors, inputTensors, eval_context);
|
||||
|
||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
|
||||
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
|
||||
for (size_t idx = 0; idx < outInfo.size(); ++idx) {
|
||||
outputs[idx].first = outputTensors[idx]->get_element_type();
|
||||
outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes());
|
||||
@ -123,5 +145,61 @@ namespace LayerTestsDefinitions {
|
||||
return outputs;
|
||||
}
|
||||
|
||||
void MemoryTest::CreateTIFunc() {
|
||||
auto param = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||
std::vector<std::vector<size_t>> shape = {{static_cast<size_t>(iteration_count), 1}};
|
||||
auto iter_count = builder::makeParams(ngPrc, shape).at(0);
|
||||
|
||||
// Body
|
||||
auto X = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||
auto Y = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||
auto Iter = builder::makeParams(ngPrc, {Shape{1, 1}}).at(0);
|
||||
auto add = std::make_shared<Add>(X, Y);
|
||||
auto res = std::make_shared<Result>(add);
|
||||
auto Iter_res = std::make_shared<Result>(Iter);
|
||||
auto body = std::make_shared<Function>(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter});
|
||||
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
|
||||
tensor_iterator->set_merged_input(X, param, res);
|
||||
tensor_iterator->set_invariant_input(Y, param);
|
||||
tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0);
|
||||
|
||||
auto output = tensor_iterator->get_iter_value(res, -1);
|
||||
auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0);
|
||||
function = std::make_shared<Function>(OutputVector{output, output_iter},
|
||||
ParameterVector{param, iter_count},
|
||||
"PureTI");
|
||||
}
|
||||
|
||||
void MemoryTest::CreateCommonFunc() {
|
||||
auto param = builder::makeParams(ngPrc, {inputShape});
|
||||
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
|
||||
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
|
||||
auto add = std::make_shared<Add>(read_value, param.at(0));
|
||||
auto assign = std::make_shared<Assign>(add, variable);
|
||||
auto res = std::make_shared<Result>(add);
|
||||
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
|
||||
}
|
||||
|
||||
void MemoryTest::ApplyLowLatency() {
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
manager.run_passes(function);
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::LowLatency2>(false);
|
||||
manager.run_passes(function);
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::lowLatency2(cnnNetwork, iteration_count);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
||||
|
@ -9,6 +9,9 @@
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/subgraph/memory_LSTMCell.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace opset7;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
|
||||
@ -17,9 +20,11 @@ namespace SubgraphTestsDefinitions {
|
||||
size_t inputSize;
|
||||
size_t hiddenSize;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||
std::ostringstream result;
|
||||
|
||||
result << "transformation=" << transformation << "_";
|
||||
result << "netPrecision=" << netPrecision.name() << "_";
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "HS=" << hiddenSize << "_";
|
||||
@ -34,7 +39,7 @@ namespace SubgraphTestsDefinitions {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
@ -51,49 +56,53 @@ namespace SubgraphTestsDefinitions {
|
||||
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
||||
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f);
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
|
||||
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto var_cell =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
|
||||
auto var_hidden =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
|
||||
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
|
||||
|
||||
// Body - inputs
|
||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
// body - outputs
|
||||
auto H_o = lstm->output(0);
|
||||
auto C_o = lstm->output(1);
|
||||
auto unsqueeze_o = unsqueeze->output(0);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
||||
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_invariant_input(X, permute_in);
|
||||
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
||||
@ -107,27 +116,27 @@ namespace SubgraphTestsDefinitions {
|
||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||
out_cell.get_tensor().set_element_type(ngPrc);
|
||||
|
||||
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
|
||||
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
|
||||
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
|
||||
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
|
||||
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||
|
||||
cell_memory_write->add_control_dependency(cell_memory_read);
|
||||
final_reshape->add_control_dependency(cell_memory_write);
|
||||
|
||||
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
||||
final_reshape->add_control_dependency(hidden_memory_write);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
|
||||
function = std::make_shared<Function>(OutputVector{final_reshape},
|
||||
SinkVector{cell_memory_write, hidden_memory_write},
|
||||
input_parameter,
|
||||
"TI_with_memory");
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
@ -135,46 +144,46 @@ namespace SubgraphTestsDefinitions {
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||
reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, final_reshape_pattern, false);
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<Reshape>(unsqueeze, final_reshape_pattern, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
@ -182,49 +191,49 @@ namespace SubgraphTestsDefinitions {
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
|
||||
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
// Body - inputs
|
||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
H_t->set_friendly_name("hidden_state_1");
|
||||
C_t->set_friendly_name("cell_state_1");
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
// body - outputs
|
||||
auto H_o = lstm->output(0);
|
||||
auto C_o = lstm->output(1);
|
||||
auto unsqueeze_o = unsqueeze->output(0);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
||||
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
||||
@ -237,56 +246,35 @@ namespace SubgraphTestsDefinitions {
|
||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||
out_cell.get_tensor().set_element_type(ngPrc);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
|
||||
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
|
||||
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
LoadNetwork();
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
if (name == "cell_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
||||
cell_memory_init.data(), cell_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else if (name == "hidden_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
||||
hidden_memory_init.data(), hidden_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else {
|
||||
GTEST_FAIL() << "unknown memory state";
|
||||
}
|
||||
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
|
||||
ApplyLowLatency();
|
||||
} else {
|
||||
LoadNetwork();
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
InitMemory();
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
switchToNgraphFriendlyModel();
|
||||
|
||||
// Calculate ref values
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||
switchToNgraphFriendlyModel();
|
||||
} else {
|
||||
CreatePureTensorIteratorModel();
|
||||
}
|
||||
Validate();
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::RunLowLatency(bool regular_api) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
CreatePureTensorIteratorModel();
|
||||
if (regular_api) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(static_cast<const InferenceEngine::CNNNetwork>(cnnNetwork), targetDevice, configuration);
|
||||
} else {
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
LoadNetwork();
|
||||
}
|
||||
void MemoryLSTMCellTest::InitMemory() {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
@ -304,13 +292,52 @@ namespace SubgraphTestsDefinitions {
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
}
|
||||
|
||||
void MemoryLSTMCellTest::ApplyLowLatency() {
|
||||
// Calculate values after LowLatency transformation
|
||||
CreatePureTensorIteratorModel();
|
||||
ngraph::pass::Manager manager_2;
|
||||
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager_2.run_passes(function);
|
||||
Validate();
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
pass::Manager manager;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||
EXPECT_EQ(ti_found, true);
|
||||
LoadNetwork();
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
manager.run_passes(function);
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||
EXPECT_EQ(ti_found, false);
|
||||
LoadNetwork();
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||
EXPECT_EQ(ti_found, true);
|
||||
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::lowLatency2(cnnNetwork);
|
||||
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||
EXPECT_EQ(ti_found, false);
|
||||
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
}
|
||||
}
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -2,16 +2,19 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/opsets/opset5.hpp"
|
||||
#include "ie_transformations.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "ngraph/op/util/variable_context.hpp"
|
||||
#include "ngraph/pass/low_latency.hpp"
|
||||
|
||||
#include "ie_transformations.hpp"
|
||||
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
|
||||
#include "shared_test_classes/subgraph/multiple_LSTMCell.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace opset7;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj) {
|
||||
std::string targetDevice;
|
||||
@ -19,9 +22,11 @@ std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<m
|
||||
size_t inputSize;
|
||||
size_t hiddenSize;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||
ngraph::helpers::MemoryTransformation transformation;
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||
std::ostringstream result;
|
||||
|
||||
result << "transformation=" << transformation << "_";
|
||||
result << "netPrecision=" << netPrecision.name() << "_";
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "HS=" << hiddenSize << "_";
|
||||
@ -33,7 +38,7 @@ void MultipleLSTMCellTest::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
@ -51,51 +56,55 @@ void MultipleLSTMCellTest::SetUp() {
|
||||
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
||||
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f);
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
|
||||
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto var_cell =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
|
||||
auto var_hidden =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
|
||||
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
|
||||
cell_memory_read->set_friendly_name("cell_memory");
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
|
||||
hidden_memory_read->set_friendly_name("hidden_memory");
|
||||
|
||||
// Body - inputs
|
||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
// body - outputs
|
||||
auto H_o = lstm->output(0);
|
||||
auto C_o = lstm->output(1);
|
||||
auto unsqueeze_o = unsqueeze->output(0);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
||||
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_invariant_input(X, permute_in);
|
||||
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
||||
@ -108,49 +117,53 @@ void MultipleLSTMCellTest::SetUp() {
|
||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||
out_cell.get_tensor().set_element_type(ngPrc);
|
||||
|
||||
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
|
||||
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
|
||||
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
|
||||
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
|
||||
|
||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||
// End of TI 1
|
||||
|
||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
|
||||
// Second TI
|
||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_2_constant, "cell_memory_2");
|
||||
auto var_cell_2 =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_2"});
|
||||
auto var_hidden_2 =
|
||||
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_2"});
|
||||
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_2_read = std::make_shared<ReadValue>(cell_memory_2_constant, var_cell_2);
|
||||
cell_memory_2_read->set_friendly_name("cell_memory_2");
|
||||
|
||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_2_constant, "hidden_memory_2");
|
||||
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_2_read = std::make_shared<ReadValue>(hidden_memory_2_constant, var_hidden_2);
|
||||
hidden_memory_2_read->set_friendly_name("hidden_memory_2");
|
||||
|
||||
// Body - inputs
|
||||
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
|
||||
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
|
||||
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
// Body - layers
|
||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
|
||||
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
|
||||
|
||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||
|
||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
// body - outputs
|
||||
auto H_o_2 = lstm_2->output(0);
|
||||
auto C_o_2 = lstm_2->output(1);
|
||||
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
||||
|
||||
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
|
||||
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
|
||||
// TI construction
|
||||
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
|
||||
tensor_iterator_2->set_body(body_2);
|
||||
tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze);
|
||||
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2);
|
||||
@ -163,33 +176,28 @@ void MultipleLSTMCellTest::SetUp() {
|
||||
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
||||
out_cell_2.get_tensor().set_element_type(ngPrc);
|
||||
|
||||
auto cell_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_cell_2, "cell_memory_2");
|
||||
auto hidden_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_hidden_2, "hidden_memory_2");
|
||||
auto cell_memory_2_write = std::make_shared<Assign>(out_cell_2, var_cell_2);
|
||||
auto hidden_memory_2_write = std::make_shared<Assign>(out_hidden_2, var_hidden_2);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||
|
||||
cell_memory_write->add_control_dependency(cell_memory_read);
|
||||
final_reshape->add_control_dependency(cell_memory_write);
|
||||
|
||||
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
||||
final_reshape->add_control_dependency(hidden_memory_write);
|
||||
|
||||
cell_memory_2_write->add_control_dependency(cell_memory_2_read);
|
||||
final_reshape->add_control_dependency(cell_memory_2_write);
|
||||
|
||||
hidden_memory_2_write->add_control_dependency(hidden_memory_2_read);
|
||||
final_reshape->add_control_dependency(hidden_memory_2_write);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
|
||||
function = std::make_shared<Function>(OutputVector {final_reshape},
|
||||
SinkVector{cell_memory_write, hidden_memory_write, cell_memory_2_write, hidden_memory_2_write},
|
||||
input_parameter,
|
||||
"TI_with_memory");
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
@ -197,72 +205,72 @@ void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
// Body 1 - layers
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||
reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
|
||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, first_reshape_pattern, false);
|
||||
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<Reshape>(unsqueeze, first_reshape_pattern, false);
|
||||
// Body 1 - end
|
||||
|
||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
|
||||
// Body 2 - layers
|
||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(inbetween_squeeze, squeeze_2_const);
|
||||
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<Squeeze>(inbetween_squeeze, squeeze_2_const);
|
||||
|
||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
|
||||
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
|
||||
reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||
|
||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze_2, final_reshape_pattern, false);
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<Reshape>(unsqueeze_2, final_reshape_pattern, false);
|
||||
// Body 2 - end
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
size_t inputSize;
|
||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<size_t> input_dims { 1, inputSize };
|
||||
@ -270,49 +278,49 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||
|
||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
||||
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||
|
||||
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
|
||||
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||
|
||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
// Body - inputs
|
||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
H_t->set_friendly_name("hidden_state_1");
|
||||
C_t->set_friendly_name("cell_state_1");
|
||||
// Body - layers
|
||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
||||
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||
|
||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||
|
||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||
// body - outputs
|
||||
auto H_o = lstm->output(0);
|
||||
auto C_o = lstm->output(1);
|
||||
auto unsqueeze_o = unsqueeze->output(0);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
||||
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||
// TI construction
|
||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
||||
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
||||
@ -326,44 +334,44 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||
out_cell.get_tensor().set_element_type(ngPrc);
|
||||
tensor_iterator->validate_and_infer_types();
|
||||
|
||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||
// End of TI 1
|
||||
|
||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||
|
||||
// Second TI
|
||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||
|
||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||
|
||||
// Body - inputs
|
||||
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
|
||||
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
||||
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
|
||||
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||
H_t_2->set_friendly_name("hidden_state_2");
|
||||
C_t_2->set_friendly_name("cell_state_2");
|
||||
// Body - layers
|
||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
|
||||
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
|
||||
|
||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||
|
||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||
// body - outputs
|
||||
auto H_o_2 = lstm_2->output(0);
|
||||
auto C_o_2 = lstm_2->output(1);
|
||||
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
||||
|
||||
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
|
||||
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
|
||||
// TI construction
|
||||
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
|
||||
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
|
||||
tensor_iterator_2->set_body(body_2);
|
||||
tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0);
|
||||
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2);
|
||||
@ -376,70 +384,17 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
||||
out_cell_2.get_tensor().set_element_type(ngPrc);
|
||||
tensor_iterator_2->validate_and_infer_types();
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
|
||||
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
void MultipleLSTMCellTest::InitMemory() {
|
||||
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({1, hiddenSize}),
|
||||
InferenceEngine::Layout::NC);
|
||||
LoadNetwork();
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
if (name == "cell_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
||||
cell_memory_init.data(), cell_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else if (name == "hidden_memory") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
||||
hidden_memory_init.data(), hidden_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else if (name == "cell_memory_2") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
||||
cell_memory_init.data(), cell_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else if (name == "hidden_memory_2") {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
||||
hidden_memory_init.data(), hidden_memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else {
|
||||
GTEST_FAIL() << "unknown memory state";
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
switchToNgraphFriendlyModel();
|
||||
Validate();
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({1, hiddenSize}),
|
||||
InferenceEngine::Layout::NC);
|
||||
// Calculate values after LowLatency transformation
|
||||
CreatePureTensorIteratorModel();
|
||||
if (regular_api) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
} else {
|
||||
function->validate_nodes_and_infer_types();
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
LoadNetwork();
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
@ -465,14 +420,73 @@ void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::ApplyLowLatency() {
|
||||
// Calculate values after LowLatency transformation
|
||||
CreatePureTensorIteratorModel();
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
pass::Manager manager;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
manager.register_pass<ngraph::pass::LowLatency>();
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
|
||||
manager.run_passes(function);
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||
EXPECT_EQ(ti_found, true);
|
||||
LoadNetwork();
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||
function->validate_nodes_and_infer_types();
|
||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::LowLatency2>();
|
||||
manager.run_passes(function);
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||
EXPECT_EQ(ti_found, false);
|
||||
LoadNetwork();
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||
EXPECT_EQ(ti_found, true);
|
||||
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::lowLatency2(cnnNetwork);
|
||||
|
||||
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||
EXPECT_EQ(ti_found, false);
|
||||
|
||||
ConfigureNetwork();
|
||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||
}
|
||||
}
|
||||
|
||||
void MultipleLSTMCellTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
|
||||
ApplyLowLatency();
|
||||
} else {
|
||||
LoadNetwork();
|
||||
}
|
||||
|
||||
InitMemory();
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
|
||||
// Calculate ref values for Unrolled TI
|
||||
CreatePureTensorIteratorModel();
|
||||
ngraph::pass::Manager manager_2;
|
||||
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
manager_2.run_passes(function);
|
||||
// Calculate ref values
|
||||
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||
switchToNgraphFriendlyModel();
|
||||
} else {
|
||||
CreatePureTensorIteratorModel();
|
||||
}
|
||||
Validate();
|
||||
}
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -214,6 +214,15 @@ enum class SequenceTestsMode {
|
||||
CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
|
||||
};
|
||||
|
||||
enum class MemoryTransformation {
|
||||
NONE,
|
||||
LOW_LATENCY,
|
||||
LOW_LATENCY_REGULAR_API,
|
||||
LOW_LATENCY_V2,
|
||||
LOW_LATENCY_V2_REGULAR_API,
|
||||
LOW_LATENCY_V2_ORIGINAL_INIT
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const ReductionType &m);
|
||||
std::ostream &operator<<(std::ostream &os, const PadMode &m);
|
||||
|
||||
@ -297,5 +306,7 @@ std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
|
||||
|
||||
std::ostream& operator<<(std::ostream & os, SequenceTestsMode type);
|
||||
|
||||
std::ostream& operator<<(std::ostream & os, MemoryTransformation type);
|
||||
|
||||
} // namespace helpers
|
||||
} // namespace ngraph
|
||||
|
@ -817,5 +817,32 @@ std::ostream& operator<<(std::ostream & os, SequenceTestsMode type) {
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & os, MemoryTransformation type) {
|
||||
switch (type) {
|
||||
case MemoryTransformation::NONE:
|
||||
os << "NONE";
|
||||
break;
|
||||
case MemoryTransformation::LOW_LATENCY_V2:
|
||||
os << "LOW_LATENCY_V2";
|
||||
break;
|
||||
case MemoryTransformation::LOW_LATENCY:
|
||||
os << "LOW_LATENCY";
|
||||
break;
|
||||
case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API:
|
||||
os << "LOW_LATENCY_V2_REGULAR_API";
|
||||
break;
|
||||
case MemoryTransformation::LOW_LATENCY_REGULAR_API:
|
||||
os << "LOW_LATENCY_REGULAR_API";
|
||||
break;
|
||||
case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT:
|
||||
os << "LOW_LATENCY_V2_ORIGINAL_INIT";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("NOT_SUPPORTED_TYPE");
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace helpers
|
||||
} // namespace ngraph
|
||||
|
@ -11,7 +11,7 @@ def get_available_transformations():
|
||||
try:
|
||||
from openvino.offline_transformations import ApplyLowLatencyTransformation # pylint: disable=import-error,no-name-in-module
|
||||
return {
|
||||
'LowLatency': ApplyLowLatencyTransformation,
|
||||
'LowLatency2': ApplyLowLatencyTransformation,
|
||||
}
|
||||
except Exception as e:
|
||||
return {}
|
||||
|
@ -8,6 +8,7 @@ import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from itertools import zip_longest
|
||||
from distutils.util import strtobool
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -257,9 +258,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
|
||||
help='Apply additional transformations. ' +
|
||||
'Usage: "--transform transformation_name1[args],transformation_name2..." ' +
|
||||
'where [args] is key=value pairs separated by semicolon. ' +
|
||||
'Examples: "--transform LowLatency" or ' +
|
||||
' "--transform LowLatency[num_iterations=2]" ' +
|
||||
'Available transformations: "LowLatency"',
|
||||
'Examples: "--transform LowLatency2" or ' +
|
||||
' "--transform LowLatency2[use_const_initializer=False]" ' +
|
||||
'Available transformations: "LowLatency2"',
|
||||
default="")
|
||||
common_group.add_argument('--disable_fusing',
|
||||
help='Turn off fusing of linear operations to Convolution',
|
||||
@ -1151,6 +1152,14 @@ def isfloat(value):
|
||||
return False
|
||||
|
||||
|
||||
def isbool(value):
|
||||
try:
|
||||
strtobool(value)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def convert_string_to_real_type(value: str):
|
||||
values = value.split(',')
|
||||
for i in range(len(values)):
|
||||
@ -1159,6 +1168,8 @@ def convert_string_to_real_type(value: str):
|
||||
values[i] = int(value)
|
||||
elif isfloat(value):
|
||||
values[i] = float(value)
|
||||
elif isbool(value):
|
||||
values[i] = strtobool(value)
|
||||
|
||||
return values[0] if len(values) == 1 else values
|
||||
|
||||
|
@ -905,64 +905,65 @@ class TransformChecker(unittest.TestCase):
|
||||
self.assertEqual(parse_transform(""), [])
|
||||
|
||||
def test_single_pass(self):
|
||||
self.assertEqual(parse_transform("LowLatency"), [("LowLatency", {})])
|
||||
self.assertEqual(parse_transform("LowLatency2"), [("LowLatency2", {})])
|
||||
|
||||
def test_single_pass_with_args(self):
|
||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2]"),
|
||||
[("LowLatency", {"num_iterations": 2})])
|
||||
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True]"),
|
||||
[("LowLatency2", {"use_const_initializer": True})])
|
||||
|
||||
def test_single_pass_with_multiple_args(self):
|
||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2;dummy_attr=3.14]"),
|
||||
[("LowLatency", {"num_iterations": 2, "dummy_attr": 3.14})])
|
||||
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True;dummy_attr=3.14]"),
|
||||
[("LowLatency2", {"use_const_initializer": True, "dummy_attr": 3.14})])
|
||||
|
||||
def test_multiple_passes_with_args(self):
|
||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2],DummyPass[type=ReLU]"),
|
||||
[("LowLatency", {"num_iterations": 2}),
|
||||
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True],DummyPass[type=ReLU]"),
|
||||
[("LowLatency2", {"use_const_initializer": True}),
|
||||
("DummyPass", {"type": "ReLU"})])
|
||||
|
||||
def test_multiple_passes_with_args2(self):
|
||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2,3,4.15],DummyPass1,DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
|
||||
[("LowLatency", {"num_iterations": [2,3,4.15]}),
|
||||
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True,False],DummyPass1,"
|
||||
"DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
|
||||
[("LowLatency2", {"use_const_initializer": [True, False]}),
|
||||
("DummyPass1", {}),
|
||||
("DummyPass2", {"types": ["ReLU", "PReLU"], "values": [1,2,3]})])
|
||||
|
||||
def test_multiple_passes_no_args(self):
|
||||
self.assertEqual(parse_transform("DummyPass,LowLatency2"),
|
||||
[("DummyPass", {}), ("LowLatency2", {})])
|
||||
self.assertEqual(parse_transform("DummyPass,LowLatency22"),
|
||||
[("DummyPass", {}), ("LowLatency22", {})])
|
||||
|
||||
def test_single_pass_neg(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency!")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2!")
|
||||
|
||||
def test_multiple_passes_neg(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency;DummyPass")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2;DummyPass")
|
||||
|
||||
def test_single_pass_with_args_neg1(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[=2]")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[=2]")
|
||||
|
||||
def test_single_pass_with_args_neg2(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[key=]")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[key=]")
|
||||
|
||||
def test_single_pass_with_args_neg3(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[]")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[]")
|
||||
|
||||
def test_single_pass_with_args_neg4(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[key=value;]")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[key=value;]")
|
||||
|
||||
def test_single_pass_with_args_neg5(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[value]")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[value]")
|
||||
|
||||
def test_single_pass_with_args_neg6(self):
|
||||
self.assertRaises(Error, parse_transform, "LowLatency[key=value")
|
||||
self.assertRaises(Error, parse_transform, "LowLatency2[key=value")
|
||||
|
||||
@patch("mo.back.offline_transformations.get_available_transformations")
|
||||
def test_check_low_latency_is_available(self, available_transformations):
|
||||
available_transformations.return_value = {"LowLatency": None}
|
||||
available_transformations.return_value = {"LowLatency2": None}
|
||||
try:
|
||||
check_available_transforms([("LowLatency" ,"")], True)
|
||||
check_available_transforms([("LowLatency2", "")], True)
|
||||
except Error as e:
|
||||
self.assertTrue(False, "Exception \"{}\" is unexpected".format(e))
|
||||
|
||||
@patch("mo.back.offline_transformations.get_available_transformations")
|
||||
def test_check_dummy_pass_is_available(self, available_transformations):
|
||||
available_transformations.return_value = {"LowLatency": None}
|
||||
available_transformations.return_value = {"LowLatency2": None}
|
||||
self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True)
|
||||
|
@ -8,13 +8,14 @@
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace pass
|
||||
{
|
||||
/**
|
||||
* @brief The transformation finds all TensorIterator layers in the network,
|
||||
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||
* processes all back edges that describe a connection between Result and Parameter
|
||||
* of the TensorIterator body,and inserts ReadValue layer between Parameter
|
||||
* and the next layers after this Parameter, and Assign layer after the layers
|
||||
@ -42,11 +43,50 @@ namespace ngraph
|
||||
* by step, the states will store between inferences.
|
||||
*/
|
||||
|
||||
class NGRAPH_API LowLatency : public ngraph::pass::MatcherPass
|
||||
class NGRAPH_DEPRECATED("Use LowLatency2 instead.") NGRAPH_API LowLatency
|
||||
: public ngraph::pass::MatcherPass
|
||||
{
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
LowLatency();
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||
* processes all back edges that describe a connection between Result and Parameter
|
||||
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
|
||||
* input and output corresponding to this back edge.
|
||||
* Supported platforms: CPU, GNA.
|
||||
*
|
||||
* The example below describes the changes made by the transformation
|
||||
* [] - TensorIterator body
|
||||
* () - new layer
|
||||
* BE - back-edge
|
||||
*
|
||||
* before applying the transformation:
|
||||
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
|
||||
*
|
||||
* after applying the transformation:
|
||||
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
|
||||
* \
|
||||
* ->...
|
||||
* After applying the transformation, the resulting network can be inferred
|
||||
* step by step, the states will store between inferences.
|
||||
*/
|
||||
class NGRAPH_API LowLatency2 : public ngraph::pass::FunctionPass
|
||||
{
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
explicit LowLatency2(bool use_const_initializer = true)
|
||||
: m_use_const_initializer(use_const_initializer)
|
||||
{
|
||||
}
|
||||
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
|
||||
private:
|
||||
bool m_use_const_initializer;
|
||||
};
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -129,7 +129,6 @@ void op::v0::TensorIterator::validate_and_infer_types()
|
||||
m_body->get_results().at(merged_input_description->m_body_value_index)->input(0);
|
||||
ends.push_back(body_value.get_node()->shared_from_this());
|
||||
|
||||
auto body_value_partial_shape = body_value.get_partial_shape();
|
||||
auto body_parameter =
|
||||
m_body->get_parameters().at(merged_input_description->m_body_parameter_index);
|
||||
|
||||
|
@ -6,12 +6,29 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ngraph/log.hpp>
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency2, "LowLatency2", 0);
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
namespace
|
||||
{
|
||||
string generate_variable_name(const string& op_name, const string& param_name, int variable_idx)
|
||||
{
|
||||
return op_name + "/" + param_name + "/" + "variable_" + to_string(variable_idx);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
ngraph::pass::LowLatency::LowLatency()
|
||||
{
|
||||
auto tensor_iterator = ngraph::pattern::wrap_type<opset6::TensorIterator, opset6::Loop>();
|
||||
@ -58,11 +75,12 @@ ngraph::pass::LowLatency::LowLatency()
|
||||
const auto& inputs_to = func->get_parameters()
|
||||
.at(merged_in->m_body_parameter_index)
|
||||
->get_output_target_inputs(0);
|
||||
const std::string variable_name(sub_graph_op->get_friendly_name() + "/" +
|
||||
func->get_parameters()
|
||||
.at(merged_in->m_body_parameter_index)
|
||||
->get_friendly_name() +
|
||||
"/variable_" + std::to_string(variable_id));
|
||||
const std::string variable_name(
|
||||
generate_variable_name(sub_graph_op->get_friendly_name(),
|
||||
func->get_parameters()
|
||||
.at(merged_in->m_body_parameter_index)
|
||||
->get_friendly_name(),
|
||||
variable_id));
|
||||
auto variable = std::make_shared<Variable>(
|
||||
VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
|
||||
auto read_value = std::make_shared<opset6::ReadValue>(
|
||||
@ -90,3 +108,178 @@ ngraph::pass::LowLatency::LowLatency()
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(tensor_iterator, "LowLatency");
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
|
||||
void UnrollSingleIteration(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
|
||||
const shared_ptr<Function>& outer_f)
|
||||
{
|
||||
using namespace opset7;
|
||||
|
||||
const auto& params = sub_graph_op->get_function()->get_parameters();
|
||||
const auto& results = sub_graph_op->get_function()->get_results();
|
||||
|
||||
// before: Layer1 -> TI [input -> bodyParameter -> Layer2 -> ...]
|
||||
// after: Layer1 -> Layer2 ->...
|
||||
for (const auto& in : sub_graph_op->get_input_descriptions())
|
||||
{
|
||||
const auto& connect_to = sub_graph_op->get_input_source_output(in->m_input_index);
|
||||
for (auto& output : params.at(in->m_body_parameter_index)->outputs())
|
||||
{
|
||||
output.replace(connect_to);
|
||||
}
|
||||
}
|
||||
|
||||
// before: TI [...-> Layer1 -> Result -> output] -> Layer2 -> ...
|
||||
// after: ...-> Layer1 -> Layer2 -> ...
|
||||
NodeVector new_ops;
|
||||
for (const auto& out : sub_graph_op->get_output_descriptions())
|
||||
{
|
||||
const auto& connect_to = results.at(out->m_body_value_index)->get_input_source_output(0);
|
||||
for (auto& input_to : sub_graph_op->output(out->m_output_index).get_target_inputs())
|
||||
{
|
||||
// create IE output name
|
||||
std::string out_name = sub_graph_op->get_friendly_name();
|
||||
if (sub_graph_op->get_output_size() != 1)
|
||||
out_name += "." + std::to_string(out->m_output_index);
|
||||
|
||||
// IECompatibility: insert identity (Unsqueeze + Squeeze) to store the TensorIterator
|
||||
// output names
|
||||
auto axis_1 = Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
|
||||
auto identity_1 = std::make_shared<Unsqueeze>(connect_to, axis_1);
|
||||
auto identity_2 = std::make_shared<Squeeze>(identity_1, axis_1);
|
||||
identity_2->set_friendly_name(out_name);
|
||||
new_ops.push_back(identity_1);
|
||||
new_ops.push_back(identity_2);
|
||||
|
||||
input_to.replace_source_output(identity_2);
|
||||
}
|
||||
}
|
||||
outer_f->add_sinks(sub_graph_op->get_function()->get_sinks());
|
||||
ngraph::copy_runtime_info(sub_graph_op, sub_graph_op->get_function()->get_ops());
|
||||
ngraph::copy_runtime_info(sub_graph_op, new_ops);
|
||||
}
|
||||
|
||||
Output<Node> create_init_subgraph(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
|
||||
const Output<Node>& in_node)
|
||||
{
|
||||
using namespace opset7;
|
||||
|
||||
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
|
||||
auto shape_of = make_shared<ShapeOf>(in_node);
|
||||
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
|
||||
copy_runtime_info(sub_graph_op, {const_zero, shape_of, broadcast});
|
||||
return broadcast->output(0);
|
||||
}
|
||||
|
||||
bool pass::LowLatency2::run_on_function(shared_ptr<Function> f)
|
||||
{
|
||||
using namespace opset7;
|
||||
|
||||
SinkVector assigns;
|
||||
for (const auto& op : f->get_ordered_ops())
|
||||
{
|
||||
if (const auto& sub_graph_op = dynamic_pointer_cast<op::util::SubGraphOp>(op))
|
||||
{
|
||||
int64_t variable_id = 0;
|
||||
const auto& func = sub_graph_op->get_function();
|
||||
const auto& params = func->get_parameters();
|
||||
for (const auto& in : sub_graph_op->get_input_descriptions())
|
||||
{
|
||||
// Process all back edges
|
||||
if (const auto& merged_in =
|
||||
dynamic_pointer_cast<op::util::SubGraphOp::MergedInputDescription>(in))
|
||||
{
|
||||
// create new Variable
|
||||
const string& param_name =
|
||||
params.at(merged_in->m_body_parameter_index)->get_friendly_name();
|
||||
const string& var_name = generate_variable_name(
|
||||
sub_graph_op->get_friendly_name(), param_name, variable_id);
|
||||
|
||||
const auto& input = sub_graph_op->input(merged_in->m_input_index);
|
||||
if (std::dynamic_pointer_cast<op::ReadValueBase>(
|
||||
input.get_source_output().get_node_shared_ptr()) != nullptr)
|
||||
{
|
||||
NGRAPH_DEBUG
|
||||
<< "LowLatency2 transformation cannot be applied because the "
|
||||
<< "ReadValue node is already an input to the TensorIterator."
|
||||
<< "LowLatency2 transformation may have already been applied, please "
|
||||
<< "do not call it more then once.";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& param = sub_graph_op->get_function()->get_parameters().at(
|
||||
merged_in->m_body_parameter_index);
|
||||
for (const auto& in_to : param->output(0).get_target_inputs())
|
||||
{
|
||||
if (dynamic_cast<op::ReadValueBase*>(in_to.get_node()) != nullptr)
|
||||
{
|
||||
NGRAPH_DEBUG
|
||||
<< "LowLatency2 transformation cannot be applied because the "
|
||||
<< "ReadValue node is already inside the TensorIterator. "
|
||||
<< "LowLatency transformation may have been applied, please do "
|
||||
<< "not call LowLatency2 after LowLatency.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
VariableInfo var_info{PartialShape::dynamic(), element::dynamic, var_name};
|
||||
auto variable = make_shared<Variable>(var_info);
|
||||
|
||||
// insert ReadValue
|
||||
// Layers -> [new op: ReadValue] -> Subgraph operation
|
||||
Output<Node> read_value_in = input.get_source_output();
|
||||
if (m_use_const_initializer)
|
||||
{
|
||||
read_value_in = create_init_subgraph(sub_graph_op, read_value_in);
|
||||
}
|
||||
auto read_value = make_shared<ReadValue>(read_value_in, variable);
|
||||
input.replace_source_output(read_value->output(0));
|
||||
read_value->set_friendly_name(var_name);
|
||||
ngraph::copy_runtime_info(sub_graph_op, read_value);
|
||||
|
||||
/* insert Assign
|
||||
// Subgraph operation -> [new op: Assign]
|
||||
// \
|
||||
// ---> Layers -> ...
|
||||
*/
|
||||
const auto& out_desc = sub_graph_op->get_output_descriptions();
|
||||
bool is_output_exist = std::any_of(
|
||||
out_desc.begin(),
|
||||
out_desc.end(),
|
||||
[&merged_in](
|
||||
const std::shared_ptr<op::util::SubGraphOp::OutputDescription>& out) {
|
||||
return out->m_body_value_index == merged_in->m_body_value_index;
|
||||
});
|
||||
// Create new output if it doesn't exist.
|
||||
if (!is_output_exist)
|
||||
{
|
||||
sub_graph_op->get_iter_value(
|
||||
func->get_results().at(merged_in->m_body_value_index));
|
||||
}
|
||||
for (const auto& out : sub_graph_op->get_output_descriptions())
|
||||
{
|
||||
if (out->m_body_value_index == merged_in->m_body_value_index)
|
||||
{
|
||||
auto assign = make_shared<Assign>(
|
||||
sub_graph_op->output(out->m_output_index), variable);
|
||||
ngraph::copy_runtime_info(sub_graph_op, assign);
|
||||
// control dependency so that ReadValue is processed before Assign
|
||||
assign->add_control_dependency(read_value);
|
||||
assigns.emplace_back(assign);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable_id++;
|
||||
}
|
||||
|
||||
if (sub_graph_op->get_num_iterations() == 1)
|
||||
{
|
||||
UnrollSingleIteration(sub_graph_op, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
f->add_sinks(assigns);
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user