LowLatency v2 ngraph transformation (#5160)
* LowLatency 2.0: transformation and unit tests * low latency 2.0: unit tests * documentation and ngraph codestyle * update CNN Interface of LowLatency transformation * fix build on Windows * fix build on Windows * investigation of a failed build on Win OS * ngraph codestyle * fix build (werrors) * New unit tests, refactoring * update functional tests for Memory * update LowLatency functional tests * extend Memory tests to cover LowLatency v2 transformation * clean up, code style * fix unit tests * update and fix unit tests, add feature to apply LLTv2 after LLTv1 * update docs, refactoring * add several gna tests to skip config * fix python api tests * update python api, rename LowLatency_v2 to LowLatency2 * deprecate LowLatency v1 * Deprecate LowLatency v1 in IE * fix wrong merge, codestyle * resolve review comments * fix python test * update skip config * apply online review notes, fix unit tests * clean up, code style * fix docs * Use debug_messages instead of exceptions in llt v2 * fix unit tests * Resolve review remarks
This commit is contained in:
parent
f9b27c3714
commit
c1608628d4
@ -17,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
|
|||||||
C.ApplyPOTTransformations(network.impl, device)
|
C.ApplyPOTTransformations(network.impl, device)
|
||||||
|
|
||||||
|
|
||||||
def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
|
def ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer = True):
|
||||||
C.ApplyLowLatencyTransformation(network.impl, num_iterations)
|
C.ApplyLowLatencyTransformation(network.impl, use_const_initializer)
|
||||||
|
|
||||||
|
|
||||||
def ApplyPruningTransformation(IENetwork network):
|
def ApplyPruningTransformation(IENetwork network):
|
||||||
|
@ -26,16 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
|
|||||||
manager.run_passes(network.actual->getFunction());
|
manager.run_passes(network.actual->getFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
|
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
// TODO: pass num_iterations to LowLatency
|
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
|
||||||
|
|
||||||
auto pass_config = manager.get_pass_config();
|
|
||||||
pass_config->set_callback<ngraph::pass::UnrollTensorIterator>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
|
||||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
|
||||||
});
|
|
||||||
manager.run_passes(network.actual->getFunction());
|
manager.run_passes(network.actual->getFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
|
|||||||
|
|
||||||
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
|
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
|
||||||
|
|
||||||
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
|
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer = true);
|
||||||
|
|
||||||
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
|
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
from libcpp cimport bool
|
from libcpp cimport bool
|
||||||
from libcpp.string cimport string
|
from libcpp.string cimport string
|
||||||
from libc.stdint cimport int64_t
|
|
||||||
|
|
||||||
from ..inference_engine.ie_api_impl_defs cimport IENetwork
|
from ..inference_engine.ie_api_impl_defs cimport IENetwork
|
||||||
|
|
||||||
@ -12,10 +11,10 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
|
|||||||
|
|
||||||
cdef void ApplyPOTTransformations(IENetwork network, string device)
|
cdef void ApplyPOTTransformations(IENetwork network, string device)
|
||||||
|
|
||||||
cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
|
cdef void ApplyLowLatencyTransformation(IENetwork network, bool use_const_initializer)
|
||||||
|
|
||||||
cdef void ApplyPruningTransformation(IENetwork network)
|
cdef void ApplyPruningTransformation(IENetwork network)
|
||||||
|
|
||||||
cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
|
cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
|
||||||
|
|
||||||
cdef void CheckAPI()
|
cdef void CheckAPI()
|
||||||
|
@ -49,4 +49,4 @@ def test_pruning_transformations():
|
|||||||
|
|
||||||
f = ng.function_from_cnn(net)
|
f = ng.function_from_cnn(net)
|
||||||
assert f != None
|
assert f != None
|
||||||
assert len(f.get_ops()) == 3
|
assert len(f.get_ops()) == 3
|
||||||
|
@ -52,5 +52,41 @@ namespace InferenceEngine {
|
|||||||
* @param network A network to apply LowLatency transformation
|
* @param network A network to apply LowLatency transformation
|
||||||
* *
|
* *
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
|
||||||
|
"Use InferenceEngine::lowLatency2 instead.")
|
||||||
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
|
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||||
|
* processes all back edges that describe a connection between Result and Parameter
|
||||||
|
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
|
||||||
|
* input and output corresponding to this back edge.
|
||||||
|
* Supported platforms: CPU, GNA.
|
||||||
|
*
|
||||||
|
* The example below describes the changes made by the transformation
|
||||||
|
* [] - TensorIterator body
|
||||||
|
* () - new layer
|
||||||
|
* BE - back-edge
|
||||||
|
*
|
||||||
|
* before applying the transformation:
|
||||||
|
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
|
||||||
|
*
|
||||||
|
* after applying the transformation:
|
||||||
|
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
|
||||||
|
* \
|
||||||
|
* ->...
|
||||||
|
* After applying the transformation, the resulting network can be inferred
|
||||||
|
* step by step, the states will store between inferences.
|
||||||
|
* @param network A network to apply LowLatency transformation
|
||||||
|
* @param use_const_initializer Changes the type of the initializing subgraph for ReadValue operations.
|
||||||
|
If "true", then the transformation inserts Constant before ReadValue operation.
|
||||||
|
If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
|
||||||
|
* Loop operation by a given number. Does not affect TensorIterators.
|
||||||
|
* *
|
||||||
|
*/
|
||||||
|
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
|
||||||
|
bool use_const_initializer = true);
|
||||||
|
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -11,6 +11,16 @@ using namespace InferenceEngine;
|
|||||||
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
|
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
|
||||||
auto function = network.getFunction();
|
auto function = network.getFunction();
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
|
manager.run_passes(function);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
|
||||||
|
bool use_const_initializer) {
|
||||||
|
auto function = network.getFunction();
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||||
manager.run_passes(function);
|
manager.run_passes(function);
|
||||||
}
|
}
|
||||||
|
@ -68,7 +68,9 @@ TEST(TransformationTests, LowLatencyLSTM) {
|
|||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
manager.run_passes(f);
|
manager.run_passes(f);
|
||||||
}
|
}
|
||||||
@ -149,7 +151,9 @@ TEST(TransformationTests, LowLatencyGRU) {
|
|||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
manager.run_passes(f);
|
manager.run_passes(f);
|
||||||
|
|
||||||
@ -227,7 +231,9 @@ TEST(TransformationTests, LowLatencyRNN) {
|
|||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
manager.run_passes(f);
|
manager.run_passes(f);
|
||||||
|
|
||||||
@ -317,7 +323,9 @@ TEST(TransformationTests, LowLatencyLSTMReshape) {
|
|||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
manager.run_passes(f);
|
manager.run_passes(f);
|
||||||
}
|
}
|
||||||
@ -413,7 +421,9 @@ TEST(TransformationTests, LowLatencyLSTM_Loop) {
|
|||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
manager.register_pass<ngraph::pass::LowLatency>();
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
manager.run_passes(f);
|
manager.run_passes(f);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,829 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
#include <ngraph/function.hpp>
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
#include <ngraph/pass/manager.hpp>
|
||||||
|
|
||||||
|
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||||
|
#include <transformations/init_node_info.hpp>
|
||||||
|
#include <transformations/common_optimizations/low_latency.hpp>
|
||||||
|
#include <transformations/serialize.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace opset7;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
Output<Node> create_init_subgraph(const Output<Node>& in_node) {
|
||||||
|
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
|
||||||
|
auto shape_of = make_shared<ShapeOf>(in_node);
|
||||||
|
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
|
||||||
|
return broadcast->output(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Output<Node> insert_identity(const Output<Node>& in_node) {
|
||||||
|
auto axis_1 = Constant::create(element::i64, Shape{1}, {1});
|
||||||
|
auto identity_1 = std::make_shared<Unsqueeze>(in_node, axis_1);
|
||||||
|
return std::make_shared<Squeeze>(identity_1, axis_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Function> createLSTMBody(const std::shared_ptr<Parameter>& Xi,
|
||||||
|
const std::shared_ptr<Parameter>& H_t,
|
||||||
|
const std::shared_ptr<Parameter>& C_t,
|
||||||
|
bool is_loop = false) {
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||||
|
|
||||||
|
auto func = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
|
||||||
|
ParameterVector{Xi, H_t, C_t});
|
||||||
|
if (is_loop) {
|
||||||
|
auto body_condition = std::make_shared<Constant>(
|
||||||
|
element::boolean, Shape{1}, true);
|
||||||
|
auto cond_res = std::make_shared<Result>(body_condition);
|
||||||
|
func->add_results({cond_res});
|
||||||
|
}
|
||||||
|
return func;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTM) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
tensor_iterator->set_friendly_name("LSTMTensorIterator");
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(C_t, C_init, results[2]);
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(H_t, H_init, results[0]);
|
||||||
|
|
||||||
|
tensor_iterator->get_iter_value(results[0], -1);
|
||||||
|
tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||||
|
ParameterVector{X, H_init, C_init});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_GRU) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(384 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(384 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(384, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{384}, b_val);
|
||||||
|
|
||||||
|
auto gru_cell = std::make_shared<GRUCell>(squeeze, Yi, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(gru_cell);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(gru_cell, axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, Yi});
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(Yi, Y, res_1);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("GRUTensorIterator/variable0");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(384 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(384 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(384, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{384, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{384, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{384}, b_val);
|
||||||
|
|
||||||
|
auto rnn_cell = std::make_shared<GRUCell>(squeeze, read_value_H, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
|
||||||
|
auto res_1 = std::make_shared<Result>(assign_H);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
f_ref = std::make_shared<Function>(ResultVector {res_2}, ParameterVector{Xi, H_t});
|
||||||
|
f_ref->add_sinks({assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_RNN) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto Y = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto Yi = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(128 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(128 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(128, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{128}, b_val);
|
||||||
|
|
||||||
|
auto rnn_cell = std::make_shared<RNNCell>(squeeze, Yi, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(rnn_cell);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell, axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi,
|
||||||
|
Yi});
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(Yi, Y, res_1);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1}, ParameterVector{X, Y});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("RNNTensorIterator/variable0");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(128 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(128 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(128, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{128, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{128, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{128}, b_val);
|
||||||
|
|
||||||
|
auto rnn_cell = std::make_shared<RNNCell>(squeeze, read_value_H, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(rnn_cell->output(0), variable_H);
|
||||||
|
auto res_1 = std::make_shared<Result>(assign_H);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(rnn_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
f_ref = std::make_shared<Function>(ResultVector{res_2}, ParameterVector{Xi, H_t});
|
||||||
|
f_ref->add_sinks({assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTMReshape) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 1, 16});
|
||||||
|
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(C_t, C, results[2]);
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(H_t, H, results[0]);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
|
||||||
|
C});
|
||||||
|
|
||||||
|
// Reshape
|
||||||
|
// change the number of iteration of TI. 2 -> 1
|
||||||
|
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
f->replace_parameter(0, new_X);
|
||||||
|
f->validate_nodes_and_infer_types();
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTM_Loop) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto trip_count =
|
||||||
|
std::make_shared<Constant>(element::i64, Shape{}, 1);
|
||||||
|
auto exec_condition =
|
||||||
|
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||||
|
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||||
|
loop->set_special_body_ports({-1, 3});
|
||||||
|
loop->set_function(body);
|
||||||
|
loop->set_friendly_name("LSTMLoop");
|
||||||
|
|
||||||
|
loop->set_merged_input(C_t, C_init, results[2]);
|
||||||
|
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
loop->set_merged_input(H_t, H_init, results[0]);
|
||||||
|
|
||||||
|
auto out0 = loop->get_iter_value(results[0], -1);
|
||||||
|
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||||
|
ParameterVector{X, H_init, C_init});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTM_several_iterations) {
|
||||||
|
constexpr int ITER_CNT = 5;
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
|
||||||
|
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(C_t, C, results[2]);
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(H_t, H, results[0]);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(results[0], -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2}, ParameterVector{X, H,
|
||||||
|
C});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TensorIterator not unrolled.
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{ITER_CNT, 1, 16});
|
||||||
|
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
|
||||||
|
|
||||||
|
// Body
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell, axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3},
|
||||||
|
ParameterVector{Xi, H_t, C_t});
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(C_t, read_value_C, res_3);
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(H_t, read_value_H, res_1);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||||
|
auto out2 = tensor_iterator->get_iter_value(res_3, -1);
|
||||||
|
|
||||||
|
auto assign_H = std::make_shared<Assign>(out0, variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(out2, variable_C);
|
||||||
|
auto outer_res_2 = std::make_shared<Result>(out1);
|
||||||
|
auto outer_res_1 = std::make_shared<Result>(out0);
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTM_Loop_Reshape) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||||
|
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ShapeOf>(X);
|
||||||
|
const auto trip_count = std::make_shared<Gather>(shape_of, Constant::create(ngraph::element::i64, {1}, {0}),
|
||||||
|
Constant::create(ngraph::element::i64, {1}, {0}));
|
||||||
|
auto exec_condition =
|
||||||
|
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||||
|
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||||
|
loop->set_special_body_ports({-1, 3});
|
||||||
|
loop->set_function(body);
|
||||||
|
loop->set_friendly_name("LSTMLoop");
|
||||||
|
|
||||||
|
loop->set_merged_input(C_t, C_init, results[2]);
|
||||||
|
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
loop->set_merged_input(H_t, H_init, results[0]);
|
||||||
|
|
||||||
|
auto out0 = loop->get_iter_value(results[0], -1);
|
||||||
|
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||||
|
ParameterVector{X, H_init, C_init});
|
||||||
|
|
||||||
|
// Reshape
|
||||||
|
// change the number of iteration of Loop. 10 -> 1
|
||||||
|
auto new_X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
f->replace_parameter(0, new_X);
|
||||||
|
f->validate_nodes_and_infer_types();
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H_t), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C_t), variable_C);
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, read_value_H, read_value_C, W, R, B, 128);
|
||||||
|
auto assign_H = std::make_shared<Assign>(lstm_cell->output(0), variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(lstm_cell->output(1), variable_C);
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(insert_identity(unsqueeze));
|
||||||
|
auto res_1 = std::make_shared<Result>(insert_identity(lstm_cell->output(0)));
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{res_1, res_2}, ParameterVector{Xi, H_t, C_t});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatency2_LSTM_Loop_several_iterations) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||||
|
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto body = createLSTMBody(Xi, H_t, C_t, true);
|
||||||
|
auto results = body->get_results();
|
||||||
|
|
||||||
|
auto trip_count =
|
||||||
|
std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||||
|
auto exec_condition =
|
||||||
|
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||||
|
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||||
|
loop->set_special_body_ports({-1, 3});
|
||||||
|
loop->set_function(body);
|
||||||
|
loop->set_friendly_name("LSTMLoop");
|
||||||
|
|
||||||
|
loop->set_merged_input(C_t, C_init, results[2]);
|
||||||
|
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
loop->set_merged_input(H_t, H_init, results[0]);
|
||||||
|
|
||||||
|
auto out0 = loop->get_iter_value(results[0], -1);
|
||||||
|
auto out1 = loop->get_concatenated_slices(results[1], 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(loop->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(loop->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||||
|
ParameterVector{X, H_init, C_init});
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<pass::LowLatency2>(true);
|
||||||
|
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{10, 1, 16});
|
||||||
|
auto H = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
const std::string variable_name_H("LSTMTensorIterator/variable0");
|
||||||
|
const std::string variable_name_C("LSTMTensorIterator/variable1");
|
||||||
|
auto variable_H = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_H});
|
||||||
|
auto variable_C = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name_C});
|
||||||
|
auto read_value_H = std::make_shared<ReadValue>(create_init_subgraph(H), variable_H);
|
||||||
|
auto read_value_C = std::make_shared<ReadValue>(create_init_subgraph(C), variable_C);
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||||
|
auto body_condition = std::make_shared<Constant>(
|
||||||
|
element::boolean, Shape{1}, true);
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3, body_condition},
|
||||||
|
ParameterVector{Xi, H_t, C_t});
|
||||||
|
|
||||||
|
auto trip_count =
|
||||||
|
std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||||
|
auto exec_condition =
|
||||||
|
std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||||
|
auto loop = std::make_shared<Loop>(trip_count, exec_condition);
|
||||||
|
loop->set_special_body_ports({-1, 3});
|
||||||
|
loop->set_function(body);
|
||||||
|
loop->set_friendly_name("LSTMLoop");
|
||||||
|
|
||||||
|
loop->set_merged_input(C_t, read_value_C, res_3);
|
||||||
|
loop->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
loop->set_merged_input(H_t, read_value_H, res_1);
|
||||||
|
|
||||||
|
auto out0 = loop->get_iter_value(res_1, -1);
|
||||||
|
auto out1 = loop->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||||
|
auto out3 = loop->get_iter_value(res_3, -1);
|
||||||
|
|
||||||
|
auto assign_H = std::make_shared<Assign>(out0, variable_H);
|
||||||
|
auto assign_C = std::make_shared<Assign>(out3, variable_C);
|
||||||
|
auto outer_res_2 = std::make_shared<Result>(out1);
|
||||||
|
auto outer_res_1 = std::make_shared<Result>(out0);
|
||||||
|
f_ref = std::make_shared<Function>(OutputVector{outer_res_1, outer_res_2}, ParameterVector{X, H, C});
|
||||||
|
f_ref->add_sinks({assign_C, assign_H});
|
||||||
|
assign_H->add_control_dependency(read_value_H);
|
||||||
|
assign_C->add_control_dependency(read_value_C);
|
||||||
|
}
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, LowLatencyLSTM_LLTv1_LLTv2) {
|
||||||
|
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto X = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_init = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
auto Xi = std::make_shared<Parameter>(element::f32, Shape{1, 1, 16});
|
||||||
|
auto H_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
auto C_t = std::make_shared<Parameter>(element::f32, Shape{1, 128});
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto axis = Constant::create(element::i64, Shape{}, {0});
|
||||||
|
auto squeeze = std::make_shared<Squeeze>(Xi, axis);
|
||||||
|
|
||||||
|
auto w_val = std::vector<float>(512 * 16, 0);
|
||||||
|
auto r_val = std::vector<float>(512 * 128, 0);
|
||||||
|
auto b_val = std::vector<float>(512, 0);
|
||||||
|
auto W = Constant::create(element::f32, Shape{512, 16}, w_val);
|
||||||
|
auto R = Constant::create(element::f32, Shape{512, 128}, r_val);
|
||||||
|
auto B = Constant::create(element::f32, Shape{512}, b_val);
|
||||||
|
|
||||||
|
auto lstm_cell = std::make_shared<LSTMCell>(squeeze, H_t, C_t, W, R, B, 128);
|
||||||
|
auto res_1 = std::make_shared<Result>(lstm_cell->output(0));
|
||||||
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm_cell->output(0), axis);
|
||||||
|
auto res_2 = std::make_shared<Result>(unsqueeze);
|
||||||
|
auto res_3 = std::make_shared<Result>(lstm_cell->output(1));
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res_1, res_2, res_3}, ParameterVector{Xi, H_t, C_t});
|
||||||
|
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
tensor_iterator->set_friendly_name("LSTMTensorIterator");
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(C_t, C_init, res_3);
|
||||||
|
tensor_iterator->set_sliced_input(Xi, X, 0, 1, 1, -1, 0);
|
||||||
|
tensor_iterator->set_merged_input(H_t, H_init, res_1);
|
||||||
|
|
||||||
|
auto out0 = tensor_iterator->get_iter_value(res_1, -1);
|
||||||
|
auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto res_ti_1 = std::make_shared<Result>(tensor_iterator->output(1));
|
||||||
|
auto res_ti_2 = std::make_shared<Result>(tensor_iterator->output(0));
|
||||||
|
f = std::make_shared<Function>(NodeVector{res_ti_1, res_ti_2},
|
||||||
|
ParameterVector{X, H_init, C_init});
|
||||||
|
|
||||||
|
auto f_2 = ngraph::clone_function(*f);
|
||||||
|
pass::Manager manager_2;
|
||||||
|
manager_2.register_pass<pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
|
manager_2.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
|
EXPECT_NO_THROW(manager_2.run_passes(f_2));
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::InitNodeInfo>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
|
// LLT v2 doesn't insert Assign/ReadValue ops, they are already inserted
|
||||||
|
// but unrolls TI/Loop
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
|
||||||
|
EXPECT_NO_THROW(manager.run_passes(f));
|
||||||
|
}
|
||||||
|
}
|
@ -10,6 +10,13 @@ using namespace LayerTestsDefinitions;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT,
|
||||||
|
};
|
||||||
|
|
||||||
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
||||||
{3},
|
{3},
|
||||||
{100, 100},
|
{100, 100},
|
||||||
@ -27,6 +34,7 @@ const std::vector<int64_t> iterationCount {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
::testing::ValuesIn(iterationCount),
|
::testing::ValuesIn(iterationCount),
|
||||||
::testing::ValuesIn(inShapes),
|
::testing::ValuesIn(inShapes),
|
||||||
::testing::ValuesIn(inputPrecisions),
|
::testing::ValuesIn(inputPrecisions),
|
||||||
|
@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (C) 2018-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <subgraph_tests/memory_LSTMCell.hpp>
|
||||||
|
#include "common_test_utils/test_constants.hpp"
|
||||||
|
|
||||||
|
namespace SubgraphTestsDefinitions {
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<size_t> input_sizes = {
|
||||||
|
80,
|
||||||
|
32,
|
||||||
|
64,
|
||||||
|
100,
|
||||||
|
25
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<size_t> hidden_sizes = {
|
||||||
|
128,
|
||||||
|
200,
|
||||||
|
300,
|
||||||
|
24,
|
||||||
|
32,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, std::string> additional_config = {
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||||
|
::testing::Values(InferenceEngine::Precision::FP32),
|
||||||
|
::testing::ValuesIn(input_sizes),
|
||||||
|
::testing::ValuesIn(hidden_sizes),
|
||||||
|
::testing::Values(additional_config)),
|
||||||
|
MemoryLSTMCellTest::getTestCaseName);
|
||||||
|
} // namespace SubgraphTestsDefinitions
|
@ -7,6 +7,15 @@
|
|||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<size_t> input_sizes = {
|
std::vector<size_t> input_sizes = {
|
||||||
80,
|
80,
|
||||||
32,
|
32,
|
||||||
@ -28,6 +37,7 @@ std::map<std::string, std::string> additional_config = {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||||
::testing::Values(InferenceEngine::Precision::FP32),
|
::testing::Values(InferenceEngine::Precision::FP32),
|
||||||
::testing::ValuesIn(input_sizes),
|
::testing::ValuesIn(input_sizes),
|
||||||
|
@ -10,9 +10,17 @@ using namespace LayerTestsDefinitions;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT
|
||||||
|
};
|
||||||
|
|
||||||
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
const std::vector<InferenceEngine::SizeVector> inShapes = {
|
||||||
{1, 1},
|
{1, 1},
|
||||||
{1, 2}
|
{1, 2},
|
||||||
|
{1, 10}
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||||
@ -22,11 +30,13 @@ const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
|||||||
const std::vector<int64_t> iterationCount {
|
const std::vector<int64_t> iterationCount {
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
|
4,
|
||||||
10
|
10
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
::testing::ValuesIn(iterationCount),
|
::testing::ValuesIn(iterationCount),
|
||||||
::testing::ValuesIn(inShapes),
|
::testing::ValuesIn(inShapes),
|
||||||
::testing::ValuesIn(inputPrecisions),
|
::testing::ValuesIn(inputPrecisions),
|
||||||
|
@ -64,5 +64,13 @@ std::vector<std::string> disabledTestPatterns() {
|
|||||||
R"(.*CachingSupport.*_batch2_.*)",
|
R"(.*CachingSupport.*_batch2_.*)",
|
||||||
// TODO: Issue 51525
|
// TODO: Issue 51525
|
||||||
R"(.*CachingSupport.*KSOFunction.*)",
|
R"(.*CachingSupport.*KSOFunction.*)",
|
||||||
|
// TODO: Issue 57363 (Param -> Result subgraphs)
|
||||||
|
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=1_.*)",
|
||||||
|
// TODO: Issue 57368 (accuracy)
|
||||||
|
R"(.*smoke_MemoryTest.*LOW_LATENCY.*IS=\(1.10\).*)",
|
||||||
|
R"(.*smoke_MemoryTest.*iteration_count=3.*IS=\(1.10\).*)",
|
||||||
|
R"(.*smoke_MemoryTest.*iteration_count=4.*IS=\(1.10\).*)",
|
||||||
|
R"(.*smoke_MemoryTest.*iteration_count=10.*IS=\(1.10\).*)",
|
||||||
|
R"(.*smoke_MemoryTest.*LOW_LATENCY.*iteration_count=10.*IS=\(1.2\).*)",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,14 @@
|
|||||||
#include "common_test_utils/test_constants.hpp"
|
#include "common_test_utils/test_constants.hpp"
|
||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<size_t> input_sizes = {
|
std::vector<size_t> input_sizes = {
|
||||||
80,
|
80,
|
||||||
32,
|
32,
|
||||||
@ -30,6 +38,7 @@ namespace SubgraphTestsDefinitions {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
|
INSTANTIATE_TEST_CASE_P(smoke_MemoryLSTMCellTest, MemoryLSTMCellTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
::testing::Values(InferenceEngine::Precision::FP32),
|
::testing::Values(InferenceEngine::Precision::FP32),
|
||||||
::testing::ValuesIn(input_sizes),
|
::testing::ValuesIn(input_sizes),
|
||||||
|
@ -7,6 +7,15 @@
|
|||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::MemoryTransformation> transformation {
|
||||||
|
ngraph::helpers::MemoryTransformation::NONE,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2,
|
||||||
|
ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<size_t> input_sizes = {
|
std::vector<size_t> input_sizes = {
|
||||||
80,
|
80,
|
||||||
32,
|
32,
|
||||||
@ -31,6 +40,7 @@ std::map<std::string, std::string> additional_config = {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
INSTANTIATE_TEST_CASE_P(MultipleLSTMCellTest, MultipleLSTMCellTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(transformation),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
::testing::Values(InferenceEngine::Precision::FP32),
|
::testing::Values(InferenceEngine::Precision::FP32),
|
||||||
::testing::ValuesIn(input_sizes),
|
::testing::ValuesIn(input_sizes),
|
||||||
|
@ -39,7 +39,7 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
|
|||||||
|
|
||||||
// Apply LowLatency and UnrollTensorIterator transformations
|
// Apply LowLatency and UnrollTensorIterator transformations
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
|
||||||
manager.run_passes(function);
|
manager.run_passes(function);
|
||||||
LoadNetwork();
|
LoadNetwork();
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
|
@ -12,12 +12,4 @@ TEST_P(MemoryLSTMCellTest, CompareWithRefs) {
|
|||||||
Run();
|
Run();
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
|
|
||||||
RunLowLatency();
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_P(MemoryLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
|
|
||||||
RunLowLatency(true);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace SubgraphTestsDefinitions
|
} // namespace SubgraphTestsDefinitions
|
||||||
|
@ -12,12 +12,4 @@ TEST_P(MultipleLSTMCellTest, CompareWithRefs) {
|
|||||||
Run();
|
Run();
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyTransformation) {
|
|
||||||
RunLowLatency();
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_P(MultipleLSTMCellTest, CompareWithRefs_LowLatencyRegularAPITransformation) {
|
|
||||||
RunLowLatency(true);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace SubgraphTestsDefinitions
|
} // namespace SubgraphTestsDefinitions
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
namespace LayerTestsDefinitions {
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
using MemoryTestParams = std::tuple<
|
using MemoryTestParams = std::tuple<
|
||||||
|
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||||
int64_t, // iterationCount
|
int64_t, // iterationCount
|
||||||
InferenceEngine::SizeVector, // inputShape
|
InferenceEngine::SizeVector, // inputShape
|
||||||
InferenceEngine::Precision, // netPrecision
|
InferenceEngine::Precision, // netPrecision
|
||||||
@ -28,9 +29,17 @@ protected:
|
|||||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
|
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
|
||||||
void SetUp() override;
|
void SetUp() override;
|
||||||
private:
|
private:
|
||||||
|
void CreateTIFunc();
|
||||||
|
void CreateCommonFunc();
|
||||||
|
void ApplyLowLatency();
|
||||||
|
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
ngraph::EvaluationContext eval_context;
|
ngraph::EvaluationContext eval_context;
|
||||||
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
|
|
||||||
int64_t iteration_count;
|
int64_t iteration_count;
|
||||||
|
ngraph::element::Type ngPrc;
|
||||||
|
InferenceEngine::SizeVector inputShape;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace LayerTestsDefinitions
|
} // namespace LayerTestsDefinitions
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
typedef std::tuple<
|
typedef std::tuple<
|
||||||
|
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||||
std::string, // Target device name
|
std::string, // Target device name
|
||||||
InferenceEngine::Precision, // Network precision
|
InferenceEngine::Precision, // Network precision
|
||||||
size_t, // Input size
|
size_t, // Input size
|
||||||
@ -21,9 +22,13 @@ class MemoryLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
|
|||||||
public testing::WithParamInterface<memoryLSTMCellParams> {
|
public testing::WithParamInterface<memoryLSTMCellParams> {
|
||||||
private:
|
private:
|
||||||
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
||||||
|
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||||
void switchToNgraphFriendlyModel();
|
void switchToNgraphFriendlyModel();
|
||||||
void CreatePureTensorIteratorModel();
|
void CreatePureTensorIteratorModel();
|
||||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
void InitMemory();
|
||||||
|
void ApplyLowLatency();
|
||||||
|
|
||||||
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
std::vector<float> input_bias;
|
std::vector<float> input_bias;
|
||||||
std::vector<float> input_weights;
|
std::vector<float> input_weights;
|
||||||
std::vector<float> hidden_memory_init;
|
std::vector<float> hidden_memory_init;
|
||||||
@ -34,7 +39,6 @@ private:
|
|||||||
protected:
|
protected:
|
||||||
void SetUp() override;
|
void SetUp() override;
|
||||||
void Run() override;
|
void Run() override;
|
||||||
void RunLowLatency(bool regular_api = false);
|
|
||||||
public:
|
public:
|
||||||
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
|
static std::string getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj);
|
||||||
};
|
};
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
typedef std::tuple<
|
typedef std::tuple<
|
||||||
|
ngraph::helpers::MemoryTransformation, // Apply Memory transformation
|
||||||
std::string, // Target device name
|
std::string, // Target device name
|
||||||
InferenceEngine::Precision, // Network precision
|
InferenceEngine::Precision, // Network precision
|
||||||
size_t, // Input size
|
size_t, // Input size
|
||||||
@ -21,9 +22,12 @@ class MultipleLSTMCellTest : public LayerTestsUtils::LayerTestsCommon,
|
|||||||
public testing::WithParamInterface<multipleLSTMCellParams> {
|
public testing::WithParamInterface<multipleLSTMCellParams> {
|
||||||
private:
|
private:
|
||||||
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
// you have to Unroll TI manually and remove memory untill ngraph supports it
|
||||||
|
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
||||||
void switchToNgraphFriendlyModel();
|
void switchToNgraphFriendlyModel();
|
||||||
void CreatePureTensorIteratorModel();
|
void CreatePureTensorIteratorModel();
|
||||||
// since we switching models we need to generate and save weights biases and inputs in SetUp
|
void InitMemory();
|
||||||
|
void ApplyLowLatency();
|
||||||
|
|
||||||
size_t hiddenSize;
|
size_t hiddenSize;
|
||||||
std::vector<float> input_bias;
|
std::vector<float> input_bias;
|
||||||
std::vector<float> input_weights;
|
std::vector<float> input_weights;
|
||||||
@ -33,10 +37,10 @@ private:
|
|||||||
std::vector<float> weights_2_vals;
|
std::vector<float> weights_2_vals;
|
||||||
std::vector<float> reccurrenceWeights_vals;
|
std::vector<float> reccurrenceWeights_vals;
|
||||||
std::vector<float> bias_vals;
|
std::vector<float> bias_vals;
|
||||||
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override;
|
void SetUp() override;
|
||||||
void Run() override;
|
void Run() override;
|
||||||
void RunLowLatency(bool regular_api = false);
|
|
||||||
public:
|
public:
|
||||||
static std::string getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj);
|
static std::string getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj);
|
||||||
};
|
};
|
||||||
|
@ -3,10 +3,18 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <ie_transformations.hpp>
|
||||||
|
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||||
|
#include <transformations/serialize.hpp>
|
||||||
|
#include <functional_test_utils/core_config.hpp>
|
||||||
#include "ngraph/opsets/opset7.hpp"
|
#include "ngraph/opsets/opset7.hpp"
|
||||||
#include "ngraph_functions/builders.hpp"
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
#include "ngraph/pass/low_latency.hpp"
|
||||||
#include "shared_test_classes/single_layer/memory.hpp"
|
#include "shared_test_classes/single_layer/memory.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
namespace LayerTestsDefinitions {
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
std::string MemoryTest::getTestCaseName(const testing::TestParamInfo<MemoryTestParams> &obj) {
|
std::string MemoryTest::getTestCaseName(const testing::TestParamInfo<MemoryTestParams> &obj) {
|
||||||
@ -14,9 +22,11 @@ namespace LayerTestsDefinitions {
|
|||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
InferenceEngine::SizeVector inputShape;
|
InferenceEngine::SizeVector inputShape;
|
||||||
std::string targetDevice;
|
std::string targetDevice;
|
||||||
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
|
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = obj.param;
|
||||||
|
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
|
result << "transformation=" << transformation << "_";
|
||||||
result << "iteration_count=" << iteration_count << "_";
|
result << "iteration_count=" << iteration_count << "_";
|
||||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||||
result << "netPRC=" << netPrecision.name() << "_";
|
result << "netPRC=" << netPrecision.name() << "_";
|
||||||
@ -26,20 +36,17 @@ namespace LayerTestsDefinitions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MemoryTest::SetUp() {
|
void MemoryTest::SetUp() {
|
||||||
using namespace ngraph;
|
std::tie(transformation, iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
|
||||||
InferenceEngine::SizeVector inputShape;
|
ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
std::tie(iteration_count, inputShape, netPrecision, targetDevice) = this->GetParam();
|
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
|
||||||
|
|
||||||
auto param = ngraph::builder::makeParams(ngPrc, {inputShape});
|
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||||
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
|
CreateCommonFunc();
|
||||||
auto read_value = std::make_shared<opset7::ReadValue>(param.at(0), variable);
|
} else {
|
||||||
auto add = std::make_shared<opset7::Add>(read_value, param.at(0));
|
CreateTIFunc();
|
||||||
auto assign = std::make_shared<opset7::Assign>(add, variable);
|
ApplyLowLatency();
|
||||||
auto res = std::make_shared<opset7::Result>(add);
|
}
|
||||||
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
|
|
||||||
|
|
||||||
auto hostTensor = std::make_shared<ngraph::HostTensor>(ngPrc, inputShape);
|
auto hostTensor = std::make_shared<HostTensor>(ngPrc, inputShape);
|
||||||
auto variable_context = std::make_shared<VariantWrapper<VariableContext>>(VariableContext());
|
auto variable_context = std::make_shared<VariantWrapper<VariableContext>>(VariableContext());
|
||||||
auto variable_value = std::make_shared<VariableValue>(hostTensor);
|
auto variable_value = std::make_shared<VariableValue>(hostTensor);
|
||||||
variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value);
|
variable_context->get().set_variable_value(function->get_variable_by_id("v0"), variable_value);
|
||||||
@ -48,6 +55,7 @@ namespace LayerTestsDefinitions {
|
|||||||
|
|
||||||
|
|
||||||
void MemoryTest::Run() {
|
void MemoryTest::Run() {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
using namespace LayerTestsUtils;
|
using namespace LayerTestsUtils;
|
||||||
auto crashHandler = [](int errCode) {
|
auto crashHandler = [](int errCode) {
|
||||||
auto &s = Summary::getInstance();
|
auto &s = Summary::getInstance();
|
||||||
@ -68,7 +76,13 @@ namespace LayerTestsDefinitions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
LoadNetwork();
|
if (transformation != ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||||
|
LoadNetwork();
|
||||||
|
} else {
|
||||||
|
CoreConfiguration(this);
|
||||||
|
ConfigureNetwork();
|
||||||
|
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||||
|
}
|
||||||
GenerateInputs();
|
GenerateInputs();
|
||||||
for (int64_t i = 0; i < iteration_count; ++i) {
|
for (int64_t i = 0; i < iteration_count; ++i) {
|
||||||
Infer();
|
Infer();
|
||||||
@ -88,12 +102,12 @@ namespace LayerTestsDefinitions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
|
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
function->validate_nodes_and_infer_types();
|
function->validate_nodes_and_infer_types();
|
||||||
|
|
||||||
auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
|
auto referenceInputs = std::vector<std::vector<uint8_t>>(inputs.size());
|
||||||
auto refInputsTypes = std::vector<ngraph::element::Type>(inputs.size());
|
auto refInputsTypes = std::vector<element::Type>(inputs.size());
|
||||||
HostTensorVector inputTensors;
|
HostTensorVector inputTensors;
|
||||||
for (auto & input : inputs) {
|
for (auto & input : inputs) {
|
||||||
const auto &dataSize = input->byteSize();
|
const auto &dataSize = input->byteSize();
|
||||||
@ -104,17 +118,25 @@ namespace LayerTestsDefinitions {
|
|||||||
const auto lockedMemory = memory->wmap();
|
const auto lockedMemory = memory->wmap();
|
||||||
const auto buffer = lockedMemory.as<const std::uint8_t *>();
|
const auto buffer = lockedMemory.as<const std::uint8_t *>();
|
||||||
|
|
||||||
auto hostTensor = std::make_shared<ngraph::HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
|
auto hostTensor = std::make_shared<HostTensor>(FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(tensorDesc.getPrecision()),
|
||||||
tensorDesc.getDims());
|
tensorDesc.getDims());
|
||||||
hostTensor->write(buffer, dataSize);
|
hostTensor->write(buffer, dataSize);
|
||||||
inputTensors.push_back(hostTensor);
|
inputTensors.push_back(hostTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// evaluate method is not implemented for TI op.
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
|
||||||
const auto &outInfo = executableNetwork.GetOutputsInfo();
|
const auto &outInfo = executableNetwork.GetOutputsInfo();
|
||||||
HostTensorVector outputTensors(outInfo.size(), std::make_shared<ngraph::HostTensor>());
|
HostTensorVector outputTensors(outInfo.size());
|
||||||
|
for (auto& outTensor : outputTensors) {
|
||||||
|
outTensor = std::make_shared<HostTensor>();
|
||||||
|
}
|
||||||
function->evaluate(outputTensors, inputTensors, eval_context);
|
function->evaluate(outputTensors, inputTensors, eval_context);
|
||||||
|
|
||||||
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
|
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> outputs(outInfo.size());
|
||||||
for (size_t idx = 0; idx < outInfo.size(); ++idx) {
|
for (size_t idx = 0; idx < outInfo.size(); ++idx) {
|
||||||
outputs[idx].first = outputTensors[idx]->get_element_type();
|
outputs[idx].first = outputTensors[idx]->get_element_type();
|
||||||
outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes());
|
outputs[idx].second.resize(outputTensors[idx]->get_size_in_bytes());
|
||||||
@ -123,5 +145,61 @@ namespace LayerTestsDefinitions {
|
|||||||
return outputs;
|
return outputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryTest::CreateTIFunc() {
|
||||||
|
auto param = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||||
|
std::vector<std::vector<size_t>> shape = {{static_cast<size_t>(iteration_count), 1}};
|
||||||
|
auto iter_count = builder::makeParams(ngPrc, shape).at(0);
|
||||||
|
|
||||||
|
// Body
|
||||||
|
auto X = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||||
|
auto Y = builder::makeParams(ngPrc, {inputShape}).at(0);
|
||||||
|
auto Iter = builder::makeParams(ngPrc, {Shape{1, 1}}).at(0);
|
||||||
|
auto add = std::make_shared<Add>(X, Y);
|
||||||
|
auto res = std::make_shared<Result>(add);
|
||||||
|
auto Iter_res = std::make_shared<Result>(Iter);
|
||||||
|
auto body = std::make_shared<Function>(OutputVector{res, Iter_res}, ParameterVector {X, Y, Iter});
|
||||||
|
|
||||||
|
// TI construction
|
||||||
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
|
tensor_iterator->set_body(body);
|
||||||
|
|
||||||
|
tensor_iterator->set_merged_input(X, param, res);
|
||||||
|
tensor_iterator->set_invariant_input(Y, param);
|
||||||
|
tensor_iterator->set_sliced_input(Iter, iter_count, 0, 1, 1, -1, 0);
|
||||||
|
|
||||||
|
auto output = tensor_iterator->get_iter_value(res, -1);
|
||||||
|
auto output_iter = tensor_iterator->get_concatenated_slices(Iter_res, 0, 1, 1, -1, 0);
|
||||||
|
function = std::make_shared<Function>(OutputVector{output, output_iter},
|
||||||
|
ParameterVector{param, iter_count},
|
||||||
|
"PureTI");
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryTest::CreateCommonFunc() {
|
||||||
|
auto param = builder::makeParams(ngPrc, {inputShape});
|
||||||
|
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
|
||||||
|
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
|
||||||
|
auto add = std::make_shared<Add>(read_value, param.at(0));
|
||||||
|
auto assign = std::make_shared<Assign>(add, variable);
|
||||||
|
auto res = std::make_shared<Result>(add);
|
||||||
|
function = std::make_shared<Function>(ResultVector{res}, SinkVector{assign}, param, "TestMemory");
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryTest::ApplyLowLatency() {
|
||||||
|
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||||
|
function->validate_nodes_and_infer_types();
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT) {
|
||||||
|
function->validate_nodes_and_infer_types();
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::LowLatency2>(false);
|
||||||
|
manager.run_passes(function);
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||||
|
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||||
|
InferenceEngine::lowLatency2(cnnNetwork, iteration_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace LayerTestsDefinitions
|
} // namespace LayerTestsDefinitions
|
||||||
|
|
||||||
|
@ -9,6 +9,9 @@
|
|||||||
#include "ngraph_functions/builders.hpp"
|
#include "ngraph_functions/builders.hpp"
|
||||||
#include "shared_test_classes/subgraph/memory_LSTMCell.hpp"
|
#include "shared_test_classes/subgraph/memory_LSTMCell.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
|
|
||||||
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
|
std::string MemoryLSTMCellTest::getTestCaseName(const testing::TestParamInfo<memoryLSTMCellParams> &obj) {
|
||||||
@ -17,9 +20,11 @@ namespace SubgraphTestsDefinitions {
|
|||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
size_t hiddenSize;
|
size_t hiddenSize;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << "transformation=" << transformation << "_";
|
||||||
result << "netPrecision=" << netPrecision.name() << "_";
|
result << "netPrecision=" << netPrecision.name() << "_";
|
||||||
result << "IS=" << inputSize << "_";
|
result << "IS=" << inputSize << "_";
|
||||||
result << "HS=" << hiddenSize << "_";
|
result << "HS=" << hiddenSize << "_";
|
||||||
@ -34,7 +39,7 @@ namespace SubgraphTestsDefinitions {
|
|||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
configuration.insert(config.begin(), config.end());
|
configuration.insert(config.begin(), config.end());
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
@ -51,49 +56,53 @@ namespace SubgraphTestsDefinitions {
|
|||||||
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
||||||
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f);
|
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.2f, 0.1f);
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||||
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
|
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||||
|
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
|
auto var_cell =
|
||||||
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
|
||||||
|
auto var_hidden =
|
||||||
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
|
||||||
|
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
|
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o = lstm->output(0);
|
auto H_o = lstm->output(0);
|
||||||
auto C_o = lstm->output(1);
|
auto C_o = lstm->output(1);
|
||||||
auto unsqueeze_o = unsqueeze->output(0);
|
auto unsqueeze_o = unsqueeze->output(0);
|
||||||
|
|
||||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator->set_body(body);
|
tensor_iterator->set_body(body);
|
||||||
tensor_iterator->set_invariant_input(X, permute_in);
|
tensor_iterator->set_invariant_input(X, permute_in);
|
||||||
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
||||||
@ -107,27 +116,27 @@ namespace SubgraphTestsDefinitions {
|
|||||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||||
out_cell.get_tensor().set_element_type(ngPrc);
|
out_cell.get_tensor().set_element_type(ngPrc);
|
||||||
|
|
||||||
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
|
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
|
||||||
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
|
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
|
||||||
|
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
|
||||||
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||||
|
|
||||||
cell_memory_write->add_control_dependency(cell_memory_read);
|
cell_memory_write->add_control_dependency(cell_memory_read);
|
||||||
final_reshape->add_control_dependency(cell_memory_write);
|
|
||||||
|
|
||||||
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
||||||
final_reshape->add_control_dependency(hidden_memory_write);
|
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
|
function = std::make_shared<Function>(OutputVector{final_reshape},
|
||||||
|
SinkVector{cell_memory_write, hidden_memory_write},
|
||||||
|
input_parameter,
|
||||||
|
"TI_with_memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
|
void MemoryLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
std::vector<size_t> input_dims { 1, inputSize };
|
std::vector<size_t> input_dims { 1, inputSize };
|
||||||
@ -135,46 +144,46 @@ namespace SubgraphTestsDefinitions {
|
|||||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||||
reccurrenceWeightsNode, biasNode, hiddenSize);
|
reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
|
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, final_reshape_pattern, false);
|
auto final_reshape = std::make_shared<Reshape>(unsqueeze, final_reshape_pattern, false);
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryLSTMCellTest::CreatePureTensorIteratorModel() {
|
void MemoryLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
std::vector<size_t> input_dims { 1, inputSize };
|
std::vector<size_t> input_dims { 1, inputSize };
|
||||||
@ -182,49 +191,49 @@ namespace SubgraphTestsDefinitions {
|
|||||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
auto permute_in_params = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||||
auto permute_in = std::make_shared<ngraph::opset5::Transpose>(unsqueeze_input, permute_in_params);
|
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||||
|
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
H_t->set_friendly_name("hidden_state_1");
|
H_t->set_friendly_name("hidden_state_1");
|
||||||
C_t->set_friendly_name("cell_state_1");
|
C_t->set_friendly_name("cell_state_1");
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset5::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o = lstm->output(0);
|
auto H_o = lstm->output(0);
|
||||||
auto C_o = lstm->output(1);
|
auto C_o = lstm->output(1);
|
||||||
auto unsqueeze_o = unsqueeze->output(0);
|
auto unsqueeze_o = unsqueeze->output(0);
|
||||||
|
|
||||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator->set_body(body);
|
tensor_iterator->set_body(body);
|
||||||
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
||||||
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
||||||
@ -237,56 +246,35 @@ namespace SubgraphTestsDefinitions {
|
|||||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||||
out_cell.get_tensor().set_element_type(ngPrc);
|
out_cell.get_tensor().set_element_type(ngPrc);
|
||||||
|
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{4},
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4},
|
||||||
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze, final_reshape_pattern, false);
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
|
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryLSTMCellTest::Run() {
|
void MemoryLSTMCellTest::Run() {
|
||||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
ApplyLowLatency();
|
||||||
LoadNetwork();
|
} else {
|
||||||
auto states = executableNetwork.QueryState();
|
LoadNetwork();
|
||||||
for (auto& state : states) {
|
|
||||||
auto name = state.GetName();
|
|
||||||
if (name == "cell_memory") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
|
||||||
cell_memory_init.data(), cell_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else if (name == "hidden_memory") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetLastState()->getTensorDesc(),
|
|
||||||
hidden_memory_init.data(), hidden_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else {
|
|
||||||
GTEST_FAIL() << "unknown memory state";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
IE_SUPPRESS_DEPRECATED_END
|
|
||||||
|
InitMemory();
|
||||||
GenerateInputs();
|
GenerateInputs();
|
||||||
Infer();
|
Infer();
|
||||||
switchToNgraphFriendlyModel();
|
|
||||||
|
// Calculate ref values
|
||||||
|
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||||
|
switchToNgraphFriendlyModel();
|
||||||
|
} else {
|
||||||
|
CreatePureTensorIteratorModel();
|
||||||
|
}
|
||||||
Validate();
|
Validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryLSTMCellTest::RunLowLatency(bool regular_api) {
|
void MemoryLSTMCellTest::InitMemory() {
|
||||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
|
||||||
|
|
||||||
CreatePureTensorIteratorModel();
|
|
||||||
if (regular_api) {
|
|
||||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
|
||||||
InferenceEngine::LowLatency(cnnNetwork);
|
|
||||||
ConfigureNetwork();
|
|
||||||
executableNetwork = core->LoadNetwork(static_cast<const InferenceEngine::CNNNetwork>(cnnNetwork), targetDevice, configuration);
|
|
||||||
} else {
|
|
||||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
|
||||||
ngraph::pass::Manager manager;
|
|
||||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
|
||||||
manager.run_passes(function);
|
|
||||||
LoadNetwork();
|
|
||||||
}
|
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
auto states = executableNetwork.QueryState();
|
auto states = executableNetwork.QueryState();
|
||||||
for (auto& state : states) {
|
for (auto& state : states) {
|
||||||
@ -304,13 +292,52 @@ namespace SubgraphTestsDefinitions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
IE_SUPPRESS_DEPRECATED_END
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
GenerateInputs();
|
}
|
||||||
Infer();
|
|
||||||
|
|
||||||
|
void MemoryLSTMCellTest::ApplyLowLatency() {
|
||||||
|
// Calculate values after LowLatency transformation
|
||||||
CreatePureTensorIteratorModel();
|
CreatePureTensorIteratorModel();
|
||||||
ngraph::pass::Manager manager_2;
|
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
|
||||||
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
|
function->validate_nodes_and_infer_types();
|
||||||
manager_2.run_passes(function);
|
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||||
Validate();
|
pass::Manager manager;
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
LoadNetwork();
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||||
|
function->validate_nodes_and_infer_types();
|
||||||
|
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
LoadNetwork();
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
|
||||||
|
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||||
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
|
InferenceEngine::LowLatency(cnnNetwork);
|
||||||
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
|
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
|
||||||
|
ConfigureNetwork();
|
||||||
|
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||||
|
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||||
|
InferenceEngine::lowLatency2(cnnNetwork);
|
||||||
|
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
|
||||||
|
ConfigureNetwork();
|
||||||
|
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace SubgraphTestsDefinitions
|
} // namespace SubgraphTestsDefinitions
|
||||||
|
@ -2,16 +2,19 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "ngraph/opsets/opset5.hpp"
|
#include "ie_transformations.hpp"
|
||||||
|
#include "ngraph/opsets/opset7.hpp"
|
||||||
|
#include "ngraph/op/util/variable_context.hpp"
|
||||||
#include "ngraph/pass/low_latency.hpp"
|
#include "ngraph/pass/low_latency.hpp"
|
||||||
|
|
||||||
#include "ie_transformations.hpp"
|
|
||||||
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
|
|
||||||
|
|
||||||
#include "ngraph_functions/builders.hpp"
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
|
||||||
#include "shared_test_classes/subgraph/multiple_LSTMCell.hpp"
|
#include "shared_test_classes/subgraph/multiple_LSTMCell.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
namespace SubgraphTestsDefinitions {
|
namespace SubgraphTestsDefinitions {
|
||||||
std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj) {
|
std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<multipleLSTMCellParams> &obj) {
|
||||||
std::string targetDevice;
|
std::string targetDevice;
|
||||||
@ -19,9 +22,11 @@ std::string MultipleLSTMCellTest::getTestCaseName(const testing::TestParamInfo<m
|
|||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
size_t hiddenSize;
|
size_t hiddenSize;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
ngraph::helpers::MemoryTransformation transformation;
|
||||||
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = obj.param;
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << "transformation=" << transformation << "_";
|
||||||
result << "netPrecision=" << netPrecision.name() << "_";
|
result << "netPrecision=" << netPrecision.name() << "_";
|
||||||
result << "IS=" << inputSize << "_";
|
result << "IS=" << inputSize << "_";
|
||||||
result << "HS=" << hiddenSize << "_";
|
result << "HS=" << hiddenSize << "_";
|
||||||
@ -33,7 +38,7 @@ void MultipleLSTMCellTest::SetUp() {
|
|||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
configuration.insert(config.begin(), config.end());
|
configuration.insert(config.begin(), config.end());
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
@ -51,51 +56,55 @@ void MultipleLSTMCellTest::SetUp() {
|
|||||||
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
reccurrenceWeights_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize * hiddenSize, -0.1f, 0.1f);
|
||||||
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f);
|
bias_vals = CommonTestUtils::generate_float_numbers(4 * hiddenSize, -0.25f, 0.15f);
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||||
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
|
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||||
|
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
auto cell_memory_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_constant, "cell_memory");
|
auto var_cell =
|
||||||
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_1"});
|
||||||
|
auto var_hidden =
|
||||||
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_1"});
|
||||||
|
auto cell_memory_read = std::make_shared<ReadValue>(cell_memory_constant, var_cell);
|
||||||
cell_memory_read->set_friendly_name("cell_memory");
|
cell_memory_read->set_friendly_name("cell_memory");
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
auto hidden_memory_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_constant, "hidden_memory");
|
auto hidden_memory_read = std::make_shared<ReadValue>(hidden_memory_constant, var_hidden);
|
||||||
hidden_memory_read->set_friendly_name("hidden_memory");
|
hidden_memory_read->set_friendly_name("hidden_memory");
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o = lstm->output(0);
|
auto H_o = lstm->output(0);
|
||||||
auto C_o = lstm->output(1);
|
auto C_o = lstm->output(1);
|
||||||
auto unsqueeze_o = unsqueeze->output(0);
|
auto unsqueeze_o = unsqueeze->output(0);
|
||||||
|
|
||||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator->set_body(body);
|
tensor_iterator->set_body(body);
|
||||||
tensor_iterator->set_invariant_input(X, permute_in);
|
tensor_iterator->set_invariant_input(X, permute_in);
|
||||||
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
tensor_iterator->set_merged_input(H_t, hidden_memory_read, H_o);
|
||||||
@ -108,49 +117,53 @@ void MultipleLSTMCellTest::SetUp() {
|
|||||||
out_hidden.get_tensor().set_element_type(ngPrc);
|
out_hidden.get_tensor().set_element_type(ngPrc);
|
||||||
out_cell.get_tensor().set_element_type(ngPrc);
|
out_cell.get_tensor().set_element_type(ngPrc);
|
||||||
|
|
||||||
auto cell_memory_write = std::make_shared<ngraph::opset5::Assign>(out_cell, "cell_memory");
|
auto cell_memory_write = std::make_shared<Assign>(out_cell, var_cell);
|
||||||
auto hidden_memory_write = std::make_shared<ngraph::opset5::Assign>(out_hidden, "hidden_memory");
|
auto hidden_memory_write = std::make_shared<Assign>(out_hidden, var_hidden);
|
||||||
|
|
||||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||||
// End of TI 1
|
// End of TI 1
|
||||||
|
|
||||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||||
|
|
||||||
// Second TI
|
// Second TI
|
||||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto var_cell_2 =
|
||||||
auto cell_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(cell_memory_2_constant, "cell_memory_2");
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "cell_state_2"});
|
||||||
|
auto var_hidden_2 =
|
||||||
|
std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "hidden_state_2"});
|
||||||
|
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
auto cell_memory_2_read = std::make_shared<ReadValue>(cell_memory_2_constant, var_cell_2);
|
||||||
cell_memory_2_read->set_friendly_name("cell_memory_2");
|
cell_memory_2_read->set_friendly_name("cell_memory_2");
|
||||||
|
|
||||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
auto hidden_memory_2_read = std::make_shared<ngraph::opset5::ReadValue>(hidden_memory_2_constant, "hidden_memory_2");
|
auto hidden_memory_2_read = std::make_shared<ReadValue>(hidden_memory_2_constant, var_hidden_2);
|
||||||
hidden_memory_2_read->set_friendly_name("hidden_memory_2");
|
hidden_memory_2_read->set_friendly_name("hidden_memory_2");
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
|
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
|
||||||
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
|
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
|
||||||
|
|
||||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o_2 = lstm_2->output(0);
|
auto H_o_2 = lstm_2->output(0);
|
||||||
auto C_o_2 = lstm_2->output(1);
|
auto C_o_2 = lstm_2->output(1);
|
||||||
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
||||||
|
|
||||||
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
|
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator_2->set_body(body_2);
|
tensor_iterator_2->set_body(body_2);
|
||||||
tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze);
|
tensor_iterator_2->set_invariant_input(X_2, inbetween_squeeze);
|
||||||
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2);
|
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_read, H_o_2);
|
||||||
@ -163,33 +176,28 @@ void MultipleLSTMCellTest::SetUp() {
|
|||||||
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
||||||
out_cell_2.get_tensor().set_element_type(ngPrc);
|
out_cell_2.get_tensor().set_element_type(ngPrc);
|
||||||
|
|
||||||
auto cell_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_cell_2, "cell_memory_2");
|
auto cell_memory_2_write = std::make_shared<Assign>(out_cell_2, var_cell_2);
|
||||||
auto hidden_memory_2_write = std::make_shared<ngraph::opset5::Assign>(out_hidden_2, "hidden_memory_2");
|
auto hidden_memory_2_write = std::make_shared<Assign>(out_hidden_2, var_hidden_2);
|
||||||
|
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64, Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
|
||||||
|
|
||||||
cell_memory_write->add_control_dependency(cell_memory_read);
|
cell_memory_write->add_control_dependency(cell_memory_read);
|
||||||
final_reshape->add_control_dependency(cell_memory_write);
|
|
||||||
|
|
||||||
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
hidden_memory_write->add_control_dependency(hidden_memory_read);
|
||||||
final_reshape->add_control_dependency(hidden_memory_write);
|
|
||||||
|
|
||||||
cell_memory_2_write->add_control_dependency(cell_memory_2_read);
|
cell_memory_2_write->add_control_dependency(cell_memory_2_read);
|
||||||
final_reshape->add_control_dependency(cell_memory_2_write);
|
|
||||||
|
|
||||||
hidden_memory_2_write->add_control_dependency(hidden_memory_2_read);
|
hidden_memory_2_write->add_control_dependency(hidden_memory_2_read);
|
||||||
final_reshape->add_control_dependency(hidden_memory_2_write);
|
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_with_memory");
|
function = std::make_shared<Function>(OutputVector {final_reshape},
|
||||||
|
SinkVector{cell_memory_write, hidden_memory_write, cell_memory_2_write, hidden_memory_2_write},
|
||||||
|
input_parameter,
|
||||||
|
"TI_with_memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
|
void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
std::vector<size_t> input_dims { 1, inputSize };
|
std::vector<size_t> input_dims { 1, inputSize };
|
||||||
@ -197,72 +205,72 @@ void MultipleLSTMCellTest::switchToNgraphFriendlyModel() {
|
|||||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
// Body 1 - layers
|
// Body 1 - layers
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(unsqueeze_input, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(unsqueeze_input, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
auto lstm = std::make_shared<LSTMCell>(squeeze, hidden_memory_constant, cell_memory_constant, weightsNode,
|
||||||
reccurrenceWeightsNode, biasNode, hiddenSize);
|
reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
|
|
||||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze, first_reshape_pattern, false);
|
auto first_reshape = std::make_shared<Reshape>(unsqueeze, first_reshape_pattern, false);
|
||||||
// Body 1 - end
|
// Body 1 - end
|
||||||
|
|
||||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||||
|
|
||||||
// Body 2 - layers
|
// Body 2 - layers
|
||||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(inbetween_squeeze, squeeze_2_const);
|
auto squeeze_2 = std::make_shared<Squeeze>(inbetween_squeeze, squeeze_2_const);
|
||||||
|
|
||||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
|
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, hidden_memory_2_constant, cell_memory_2_constant, weightsNode_2,
|
||||||
reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||||
|
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(unsqueeze_2, final_reshape_pattern, false);
|
auto final_reshape = std::make_shared<Reshape>(unsqueeze_2, final_reshape_pattern, false);
|
||||||
// Body 2 - end
|
// Body 2 - end
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
function = std::make_shared<Function>(final_reshape, input_parameter, "TI_unrolled_without_memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::map<std::string, std::string> config;
|
std::map<std::string, std::string> config;
|
||||||
size_t inputSize;
|
size_t inputSize;
|
||||||
std::tie(targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
std::tie(transformation, targetDevice, netPrecision, inputSize, hiddenSize, config) = this->GetParam();
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
std::vector<size_t> input_dims { 1, inputSize };
|
std::vector<size_t> input_dims { 1, inputSize };
|
||||||
@ -270,49 +278,49 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
|||||||
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
std::vector<size_t> hidden_memory_dims {1, hiddenSize};
|
||||||
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
std::vector<size_t> cell_memory_dims {1, hiddenSize};
|
||||||
|
|
||||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
auto input_parameter = builder::makeParams(ngPrc, {input_dims});
|
||||||
|
|
||||||
auto input_add_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_bias);
|
auto input_add_const = builder::makeConstant(ngPrc, input_dims, input_bias);
|
||||||
auto add = ngraph::builder::makeEltwise(input_parameter[0], input_add_const, ngraph::helpers::EltwiseTypes::ADD);
|
auto add = builder::makeEltwise(input_parameter[0], input_add_const, helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
auto input_mul_const = ngraph::builder::makeConstant(ngPrc, input_dims, input_weights);
|
auto input_mul_const = builder::makeConstant(ngPrc, input_dims, input_weights);
|
||||||
auto mul = ngraph::builder::makeEltwise(add, input_mul_const, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto mul = builder::makeEltwise(add, input_mul_const, helpers::EltwiseTypes::MULTIPLY);
|
||||||
|
|
||||||
auto unsqueeze_input_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_input_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_input = std::make_shared<ngraph::opset5::Unsqueeze>(mul, unsqueeze_input_const);
|
auto unsqueeze_input = std::make_shared<Unsqueeze>(mul, unsqueeze_input_const);
|
||||||
|
|
||||||
auto permute_in_params = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, ngraph::Shape{{1, 0, 2}});
|
auto permute_in_params = std::make_shared<Constant>(element::i64, Shape{3}, Shape{{1, 0, 2}});
|
||||||
auto permute_in = std::make_shared<ngraph::opset1::Transpose>(unsqueeze_input, permute_in_params);
|
auto permute_in = std::make_shared<Transpose>(unsqueeze_input, permute_in_params);
|
||||||
|
|
||||||
auto cell_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, inputSize});
|
auto X = std::make_shared<Parameter>(ngPrc, Shape{1, 1, inputSize});
|
||||||
auto H_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
H_t->set_friendly_name("hidden_state_1");
|
H_t->set_friendly_name("hidden_state_1");
|
||||||
C_t->set_friendly_name("cell_state_1");
|
C_t->set_friendly_name("cell_state_1");
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze = std::make_shared<ngraph::opset5::Squeeze>(X, squeeze_const);
|
auto squeeze = std::make_shared<Squeeze>(X, squeeze_const);
|
||||||
|
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
auto weightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, inputSize }, weights_vals);
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm = std::make_shared<ngraph::opset4::LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
auto lstm = std::make_shared<LSTMCell>(squeeze, H_t, C_t, weightsNode, reccurrenceWeightsNode, biasNode, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze = std::make_shared<ngraph::opset5::Unsqueeze>(lstm->output(0), unsqueeze_const);
|
auto unsqueeze = std::make_shared<Unsqueeze>(lstm->output(0), unsqueeze_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o = lstm->output(0);
|
auto H_o = lstm->output(0);
|
||||||
auto C_o = lstm->output(1);
|
auto C_o = lstm->output(1);
|
||||||
auto unsqueeze_o = unsqueeze->output(0);
|
auto unsqueeze_o = unsqueeze->output(0);
|
||||||
|
|
||||||
auto body = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o, H_o, C_o}, ngraph::ParameterVector {X, H_t, C_t});
|
auto body = std::make_shared<Function>(OutputVector{unsqueeze_o, H_o, C_o}, ParameterVector {X, H_t, C_t});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator->set_body(body);
|
tensor_iterator->set_body(body);
|
||||||
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
tensor_iterator->set_sliced_input(X, permute_in, 0, 1, 1, -1, 0);
|
||||||
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
tensor_iterator->set_merged_input(H_t, hidden_memory_constant, H_o);
|
||||||
@ -326,44 +334,44 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
|||||||
out_cell.get_tensor().set_element_type(ngPrc);
|
out_cell.get_tensor().set_element_type(ngPrc);
|
||||||
tensor_iterator->validate_and_infer_types();
|
tensor_iterator->validate_and_infer_types();
|
||||||
|
|
||||||
auto first_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto first_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto first_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
auto first_reshape = std::make_shared<Reshape>(out_unsqueeze, first_reshape_pattern, false);
|
||||||
// End of TI 1
|
// End of TI 1
|
||||||
|
|
||||||
auto inbetween_squeeze_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto inbetween_squeeze_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto inbetween_squeeze = std::make_shared<ngraph::opset5::Squeeze>(first_reshape, inbetween_squeeze_const);
|
auto inbetween_squeeze = std::make_shared<Squeeze>(first_reshape, inbetween_squeeze_const);
|
||||||
|
|
||||||
// Second TI
|
// Second TI
|
||||||
auto cell_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
auto cell_memory_2_constant = builder::makeConstant<float>(ngPrc, cell_memory_dims, cell_memory_init);
|
||||||
|
|
||||||
auto hidden_memory_2_constant = ngraph::builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
auto hidden_memory_2_constant = builder::makeConstant<float>(ngPrc, hidden_memory_dims, hidden_memory_init);
|
||||||
|
|
||||||
// Body - inputs
|
// Body - inputs
|
||||||
auto X_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, 1, hiddenSize});
|
auto X_2 = std::make_shared<Parameter>(ngPrc, Shape{1, 1, hiddenSize});
|
||||||
auto H_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto H_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
auto C_t_2 = std::make_shared<ngraph::opset5::Parameter>(ngPrc, ngraph::Shape{1, hiddenSize});
|
auto C_t_2 = std::make_shared<Parameter>(ngPrc, Shape{1, hiddenSize});
|
||||||
H_t_2->set_friendly_name("hidden_state_2");
|
H_t_2->set_friendly_name("hidden_state_2");
|
||||||
C_t_2->set_friendly_name("cell_state_2");
|
C_t_2->set_friendly_name("cell_state_2");
|
||||||
// Body - layers
|
// Body - layers
|
||||||
auto squeeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto squeeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto squeeze_2 = std::make_shared<ngraph::opset5::Squeeze>(X_2, squeeze_2_const);
|
auto squeeze_2 = std::make_shared<Squeeze>(X_2, squeeze_2_const);
|
||||||
|
|
||||||
auto weightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
auto weightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, weights_2_vals);
|
||||||
auto reccurrenceWeightsNode_2 = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
auto reccurrenceWeightsNode_2 = builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, reccurrenceWeights_vals);
|
||||||
auto biasNode_2 = ngraph::builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
auto biasNode_2 = builder::makeConstant<float>(ngPrc, {4 * hiddenSize}, bias_vals);
|
||||||
auto lstm_2 = std::make_shared<ngraph::opset4::LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
auto lstm_2 = std::make_shared<LSTMCell>(squeeze_2, H_t_2, C_t_2, weightsNode_2, reccurrenceWeightsNode_2, biasNode_2, hiddenSize);
|
||||||
|
|
||||||
auto unsqueeze_2_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, ngraph::Shape{1}, squeeze_axes);
|
auto unsqueeze_2_const = std::make_shared<Constant>(element::i64, Shape{1}, squeeze_axes);
|
||||||
auto unsqueeze_2 = std::make_shared<ngraph::opset5::Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
auto unsqueeze_2 = std::make_shared<Unsqueeze>(lstm_2->output(0), unsqueeze_2_const);
|
||||||
// body - outputs
|
// body - outputs
|
||||||
auto H_o_2 = lstm_2->output(0);
|
auto H_o_2 = lstm_2->output(0);
|
||||||
auto C_o_2 = lstm_2->output(1);
|
auto C_o_2 = lstm_2->output(1);
|
||||||
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
auto unsqueeze_o_2 = unsqueeze_2->output(0);
|
||||||
|
|
||||||
auto body_2 = std::make_shared<ngraph::Function>(ngraph::OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ngraph::ParameterVector {X_2, H_t_2, C_t_2});
|
auto body_2 = std::make_shared<Function>(OutputVector{unsqueeze_o_2, H_o_2, C_o_2}, ParameterVector {X_2, H_t_2, C_t_2});
|
||||||
// TI construction
|
// TI construction
|
||||||
auto tensor_iterator_2 = std::make_shared<ngraph::opset5::TensorIterator>();
|
auto tensor_iterator_2 = std::make_shared<TensorIterator>();
|
||||||
tensor_iterator_2->set_body(body_2);
|
tensor_iterator_2->set_body(body_2);
|
||||||
tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0);
|
tensor_iterator_2->set_sliced_input(X_2, inbetween_squeeze, 0, 1, 1, -1, 0);
|
||||||
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2);
|
tensor_iterator_2->set_merged_input(H_t_2, hidden_memory_2_constant, H_o_2);
|
||||||
@ -376,70 +384,17 @@ void MultipleLSTMCellTest::CreatePureTensorIteratorModel() {
|
|||||||
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
out_hidden_2.get_tensor().set_element_type(ngPrc);
|
||||||
out_cell_2.get_tensor().set_element_type(ngPrc);
|
out_cell_2.get_tensor().set_element_type(ngPrc);
|
||||||
tensor_iterator_2->validate_and_infer_types();
|
tensor_iterator_2->validate_and_infer_types();
|
||||||
auto final_reshape_pattern = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64,
|
auto final_reshape_pattern = std::make_shared<Constant>(element::i64,
|
||||||
ngraph::Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
Shape{4}, std::vector<size_t>({1, 1, 1, hiddenSize}));
|
||||||
auto final_reshape = std::make_shared<ngraph::opset5::Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
auto final_reshape = std::make_shared<Reshape>(out_unsqueeze_2, final_reshape_pattern, false);
|
||||||
|
|
||||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "PureTI");
|
function = std::make_shared<Function>(final_reshape, input_parameter, "PureTI");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultipleLSTMCellTest::Run() {
|
void MultipleLSTMCellTest::InitMemory() {
|
||||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
|
||||||
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
||||||
InferenceEngine::SizeVector({1, hiddenSize}),
|
InferenceEngine::SizeVector({1, hiddenSize}),
|
||||||
InferenceEngine::Layout::NC);
|
InferenceEngine::Layout::NC);
|
||||||
LoadNetwork();
|
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
|
||||||
auto states = executableNetwork.QueryState();
|
|
||||||
for (auto& state : states) {
|
|
||||||
auto name = state.GetName();
|
|
||||||
if (name == "cell_memory") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
|
||||||
cell_memory_init.data(), cell_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else if (name == "hidden_memory") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
|
||||||
hidden_memory_init.data(), hidden_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else if (name == "cell_memory_2") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
|
||||||
cell_memory_init.data(), cell_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else if (name == "hidden_memory_2") {
|
|
||||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
|
||||||
hidden_memory_init.data(), hidden_memory_init.size());
|
|
||||||
state.SetState(blob);
|
|
||||||
} else {
|
|
||||||
GTEST_FAIL() << "unknown memory state";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
IE_SUPPRESS_DEPRECATED_END
|
|
||||||
GenerateInputs();
|
|
||||||
Infer();
|
|
||||||
switchToNgraphFriendlyModel();
|
|
||||||
Validate();
|
|
||||||
}
|
|
||||||
|
|
||||||
void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
|
|
||||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
|
||||||
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
|
||||||
InferenceEngine::SizeVector({1, hiddenSize}),
|
|
||||||
InferenceEngine::Layout::NC);
|
|
||||||
// Calculate values after LowLatency transformation
|
|
||||||
CreatePureTensorIteratorModel();
|
|
||||||
if (regular_api) {
|
|
||||||
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
|
||||||
InferenceEngine::LowLatency(cnnNetwork);
|
|
||||||
ConfigureNetwork();
|
|
||||||
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
|
||||||
} else {
|
|
||||||
function->validate_nodes_and_infer_types();
|
|
||||||
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
|
||||||
ngraph::pass::Manager manager;
|
|
||||||
manager.register_pass<ngraph::pass::LowLatency>(); // LowLatency enables UnrollTI
|
|
||||||
manager.run_passes(function);
|
|
||||||
LoadNetwork();
|
|
||||||
}
|
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
auto states = executableNetwork.QueryState();
|
auto states = executableNetwork.QueryState();
|
||||||
for (auto& state : states) {
|
for (auto& state : states) {
|
||||||
@ -465,14 +420,73 @@ void MultipleLSTMCellTest::RunLowLatency(bool regular_api) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
IE_SUPPRESS_DEPRECATED_END
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
|
}
|
||||||
|
|
||||||
|
void MultipleLSTMCellTest::ApplyLowLatency() {
|
||||||
|
// Calculate values after LowLatency transformation
|
||||||
|
CreatePureTensorIteratorModel();
|
||||||
|
if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY) {
|
||||||
|
function->validate_nodes_and_infer_types();
|
||||||
|
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||||
|
pass::Manager manager;
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
|
manager.register_pass<ngraph::pass::LowLatency>();
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END // LowLatency enables UnrollTI
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
LoadNetwork();
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2) {
|
||||||
|
function->validate_nodes_and_infer_types();
|
||||||
|
// Apply LowLatency (insert Assigns/ReadValues) and UnrollTensorIterator
|
||||||
|
|
||||||
|
pass::Manager manager;
|
||||||
|
manager.register_pass<pass::LowLatency2>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
LoadNetwork();
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_REGULAR_API) {
|
||||||
|
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||||
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
|
InferenceEngine::LowLatency(cnnNetwork);
|
||||||
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
|
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
|
||||||
|
ConfigureNetwork();
|
||||||
|
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||||
|
} else if (transformation == ngraph::helpers::MemoryTransformation::LOW_LATENCY_V2_REGULAR_API) {
|
||||||
|
cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||||
|
InferenceEngine::lowLatency2(cnnNetwork);
|
||||||
|
|
||||||
|
bool ti_found = helpers::is_tensor_iterator_exist(cnnNetwork.getFunction());
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
|
||||||
|
ConfigureNetwork();
|
||||||
|
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MultipleLSTMCellTest::Run() {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
if (transformation != ngraph::helpers::MemoryTransformation::NONE) {
|
||||||
|
ApplyLowLatency();
|
||||||
|
} else {
|
||||||
|
LoadNetwork();
|
||||||
|
}
|
||||||
|
|
||||||
|
InitMemory();
|
||||||
GenerateInputs();
|
GenerateInputs();
|
||||||
Infer();
|
Infer();
|
||||||
|
|
||||||
// Calculate ref values for Unrolled TI
|
// Calculate ref values
|
||||||
CreatePureTensorIteratorModel();
|
if (transformation == ngraph::helpers::MemoryTransformation::NONE) {
|
||||||
ngraph::pass::Manager manager_2;
|
switchToNgraphFriendlyModel();
|
||||||
manager_2.register_pass<ngraph::pass::UnrollTensorIterator>();
|
} else {
|
||||||
manager_2.run_passes(function);
|
CreatePureTensorIteratorModel();
|
||||||
|
}
|
||||||
Validate();
|
Validate();
|
||||||
}
|
}
|
||||||
} // namespace SubgraphTestsDefinitions
|
} // namespace SubgraphTestsDefinitions
|
||||||
|
@ -214,6 +214,15 @@ enum class SequenceTestsMode {
|
|||||||
CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
|
CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class MemoryTransformation {
|
||||||
|
NONE,
|
||||||
|
LOW_LATENCY,
|
||||||
|
LOW_LATENCY_REGULAR_API,
|
||||||
|
LOW_LATENCY_V2,
|
||||||
|
LOW_LATENCY_V2_REGULAR_API,
|
||||||
|
LOW_LATENCY_V2_ORIGINAL_INIT
|
||||||
|
};
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &os, const ReductionType &m);
|
std::ostream &operator<<(std::ostream &os, const ReductionType &m);
|
||||||
std::ostream &operator<<(std::ostream &os, const PadMode &m);
|
std::ostream &operator<<(std::ostream &os, const PadMode &m);
|
||||||
|
|
||||||
@ -297,5 +306,7 @@ std::ostream& operator<<(std::ostream & os, TensorIteratorBody type);
|
|||||||
|
|
||||||
std::ostream& operator<<(std::ostream & os, SequenceTestsMode type);
|
std::ostream& operator<<(std::ostream & os, SequenceTestsMode type);
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream & os, MemoryTransformation type);
|
||||||
|
|
||||||
} // namespace helpers
|
} // namespace helpers
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -817,5 +817,32 @@ std::ostream& operator<<(std::ostream & os, SequenceTestsMode type) {
|
|||||||
}
|
}
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream & os, MemoryTransformation type) {
|
||||||
|
switch (type) {
|
||||||
|
case MemoryTransformation::NONE:
|
||||||
|
os << "NONE";
|
||||||
|
break;
|
||||||
|
case MemoryTransformation::LOW_LATENCY_V2:
|
||||||
|
os << "LOW_LATENCY_V2";
|
||||||
|
break;
|
||||||
|
case MemoryTransformation::LOW_LATENCY:
|
||||||
|
os << "LOW_LATENCY";
|
||||||
|
break;
|
||||||
|
case MemoryTransformation::LOW_LATENCY_V2_REGULAR_API:
|
||||||
|
os << "LOW_LATENCY_V2_REGULAR_API";
|
||||||
|
break;
|
||||||
|
case MemoryTransformation::LOW_LATENCY_REGULAR_API:
|
||||||
|
os << "LOW_LATENCY_REGULAR_API";
|
||||||
|
break;
|
||||||
|
case MemoryTransformation::LOW_LATENCY_V2_ORIGINAL_INIT:
|
||||||
|
os << "LOW_LATENCY_V2_ORIGINAL_INIT";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("NOT_SUPPORTED_TYPE");
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace helpers
|
} // namespace helpers
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -11,7 +11,7 @@ def get_available_transformations():
|
|||||||
try:
|
try:
|
||||||
from openvino.offline_transformations import ApplyLowLatencyTransformation # pylint: disable=import-error,no-name-in-module
|
from openvino.offline_transformations import ApplyLowLatencyTransformation # pylint: disable=import-error,no-name-in-module
|
||||||
return {
|
return {
|
||||||
'LowLatency': ApplyLowLatencyTransformation,
|
'LowLatency2': ApplyLowLatencyTransformation,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {}
|
return {}
|
||||||
|
@ -8,6 +8,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
|
from distutils.util import strtobool
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -257,9 +258,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
|
|||||||
help='Apply additional transformations. ' +
|
help='Apply additional transformations. ' +
|
||||||
'Usage: "--transform transformation_name1[args],transformation_name2..." ' +
|
'Usage: "--transform transformation_name1[args],transformation_name2..." ' +
|
||||||
'where [args] is key=value pairs separated by semicolon. ' +
|
'where [args] is key=value pairs separated by semicolon. ' +
|
||||||
'Examples: "--transform LowLatency" or ' +
|
'Examples: "--transform LowLatency2" or ' +
|
||||||
' "--transform LowLatency[num_iterations=2]" ' +
|
' "--transform LowLatency2[use_const_initializer=False]" ' +
|
||||||
'Available transformations: "LowLatency"',
|
'Available transformations: "LowLatency2"',
|
||||||
default="")
|
default="")
|
||||||
common_group.add_argument('--disable_fusing',
|
common_group.add_argument('--disable_fusing',
|
||||||
help='Turn off fusing of linear operations to Convolution',
|
help='Turn off fusing of linear operations to Convolution',
|
||||||
@ -1151,6 +1152,14 @@ def isfloat(value):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def isbool(value):
|
||||||
|
try:
|
||||||
|
strtobool(value)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def convert_string_to_real_type(value: str):
|
def convert_string_to_real_type(value: str):
|
||||||
values = value.split(',')
|
values = value.split(',')
|
||||||
for i in range(len(values)):
|
for i in range(len(values)):
|
||||||
@ -1159,6 +1168,8 @@ def convert_string_to_real_type(value: str):
|
|||||||
values[i] = int(value)
|
values[i] = int(value)
|
||||||
elif isfloat(value):
|
elif isfloat(value):
|
||||||
values[i] = float(value)
|
values[i] = float(value)
|
||||||
|
elif isbool(value):
|
||||||
|
values[i] = strtobool(value)
|
||||||
|
|
||||||
return values[0] if len(values) == 1 else values
|
return values[0] if len(values) == 1 else values
|
||||||
|
|
||||||
|
@ -905,64 +905,65 @@ class TransformChecker(unittest.TestCase):
|
|||||||
self.assertEqual(parse_transform(""), [])
|
self.assertEqual(parse_transform(""), [])
|
||||||
|
|
||||||
def test_single_pass(self):
|
def test_single_pass(self):
|
||||||
self.assertEqual(parse_transform("LowLatency"), [("LowLatency", {})])
|
self.assertEqual(parse_transform("LowLatency2"), [("LowLatency2", {})])
|
||||||
|
|
||||||
def test_single_pass_with_args(self):
|
def test_single_pass_with_args(self):
|
||||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2]"),
|
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True]"),
|
||||||
[("LowLatency", {"num_iterations": 2})])
|
[("LowLatency2", {"use_const_initializer": True})])
|
||||||
|
|
||||||
def test_single_pass_with_multiple_args(self):
|
def test_single_pass_with_multiple_args(self):
|
||||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2;dummy_attr=3.14]"),
|
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True;dummy_attr=3.14]"),
|
||||||
[("LowLatency", {"num_iterations": 2, "dummy_attr": 3.14})])
|
[("LowLatency2", {"use_const_initializer": True, "dummy_attr": 3.14})])
|
||||||
|
|
||||||
def test_multiple_passes_with_args(self):
|
def test_multiple_passes_with_args(self):
|
||||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2],DummyPass[type=ReLU]"),
|
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True],DummyPass[type=ReLU]"),
|
||||||
[("LowLatency", {"num_iterations": 2}),
|
[("LowLatency2", {"use_const_initializer": True}),
|
||||||
("DummyPass", {"type": "ReLU"})])
|
("DummyPass", {"type": "ReLU"})])
|
||||||
|
|
||||||
def test_multiple_passes_with_args2(self):
|
def test_multiple_passes_with_args2(self):
|
||||||
self.assertEqual(parse_transform("LowLatency[num_iterations=2,3,4.15],DummyPass1,DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
|
self.assertEqual(parse_transform("LowLatency2[use_const_initializer=True,False],DummyPass1,"
|
||||||
[("LowLatency", {"num_iterations": [2,3,4.15]}),
|
"DummyPass2[types=ReLU,PReLU;values=1,2,3]"),
|
||||||
|
[("LowLatency2", {"use_const_initializer": [True, False]}),
|
||||||
("DummyPass1", {}),
|
("DummyPass1", {}),
|
||||||
("DummyPass2", {"types": ["ReLU", "PReLU"], "values": [1,2,3]})])
|
("DummyPass2", {"types": ["ReLU", "PReLU"], "values": [1,2,3]})])
|
||||||
|
|
||||||
def test_multiple_passes_no_args(self):
|
def test_multiple_passes_no_args(self):
|
||||||
self.assertEqual(parse_transform("DummyPass,LowLatency2"),
|
self.assertEqual(parse_transform("DummyPass,LowLatency22"),
|
||||||
[("DummyPass", {}), ("LowLatency2", {})])
|
[("DummyPass", {}), ("LowLatency22", {})])
|
||||||
|
|
||||||
def test_single_pass_neg(self):
|
def test_single_pass_neg(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency!")
|
self.assertRaises(Error, parse_transform, "LowLatency2!")
|
||||||
|
|
||||||
def test_multiple_passes_neg(self):
|
def test_multiple_passes_neg(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency;DummyPass")
|
self.assertRaises(Error, parse_transform, "LowLatency2;DummyPass")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg1(self):
|
def test_single_pass_with_args_neg1(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[=2]")
|
self.assertRaises(Error, parse_transform, "LowLatency2[=2]")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg2(self):
|
def test_single_pass_with_args_neg2(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[key=]")
|
self.assertRaises(Error, parse_transform, "LowLatency2[key=]")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg3(self):
|
def test_single_pass_with_args_neg3(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[]")
|
self.assertRaises(Error, parse_transform, "LowLatency2[]")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg4(self):
|
def test_single_pass_with_args_neg4(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[key=value;]")
|
self.assertRaises(Error, parse_transform, "LowLatency2[key=value;]")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg5(self):
|
def test_single_pass_with_args_neg5(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[value]")
|
self.assertRaises(Error, parse_transform, "LowLatency2[value]")
|
||||||
|
|
||||||
def test_single_pass_with_args_neg6(self):
|
def test_single_pass_with_args_neg6(self):
|
||||||
self.assertRaises(Error, parse_transform, "LowLatency[key=value")
|
self.assertRaises(Error, parse_transform, "LowLatency2[key=value")
|
||||||
|
|
||||||
@patch("mo.back.offline_transformations.get_available_transformations")
|
@patch("mo.back.offline_transformations.get_available_transformations")
|
||||||
def test_check_low_latency_is_available(self, available_transformations):
|
def test_check_low_latency_is_available(self, available_transformations):
|
||||||
available_transformations.return_value = {"LowLatency": None}
|
available_transformations.return_value = {"LowLatency2": None}
|
||||||
try:
|
try:
|
||||||
check_available_transforms([("LowLatency" ,"")], True)
|
check_available_transforms([("LowLatency2", "")], True)
|
||||||
except Error as e:
|
except Error as e:
|
||||||
self.assertTrue(False, "Exception \"{}\" is unexpected".format(e))
|
self.assertTrue(False, "Exception \"{}\" is unexpected".format(e))
|
||||||
|
|
||||||
@patch("mo.back.offline_transformations.get_available_transformations")
|
@patch("mo.back.offline_transformations.get_available_transformations")
|
||||||
def test_check_dummy_pass_is_available(self, available_transformations):
|
def test_check_dummy_pass_is_available(self, available_transformations):
|
||||||
available_transformations.return_value = {"LowLatency": None}
|
available_transformations.return_value = {"LowLatency2": None}
|
||||||
self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True)
|
self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True)
|
||||||
|
@ -8,13 +8,14 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
#include <ngraph/pass/graph_rewrite.hpp>
|
||||||
|
#include <ngraph/pass/pass.hpp>
|
||||||
|
|
||||||
namespace ngraph
|
namespace ngraph
|
||||||
{
|
{
|
||||||
namespace pass
|
namespace pass
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* @brief The transformation finds all TensorIterator layers in the network,
|
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||||
* processes all back edges that describe a connection between Result and Parameter
|
* processes all back edges that describe a connection between Result and Parameter
|
||||||
* of the TensorIterator body,and inserts ReadValue layer between Parameter
|
* of the TensorIterator body,and inserts ReadValue layer between Parameter
|
||||||
* and the next layers after this Parameter, and Assign layer after the layers
|
* and the next layers after this Parameter, and Assign layer after the layers
|
||||||
@ -42,11 +43,50 @@ namespace ngraph
|
|||||||
* by step, the states will store between inferences.
|
* by step, the states will store between inferences.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class NGRAPH_API LowLatency : public ngraph::pass::MatcherPass
|
class NGRAPH_DEPRECATED("Use LowLatency2 instead.") NGRAPH_API LowLatency
|
||||||
|
: public ngraph::pass::MatcherPass
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
NGRAPH_RTTI_DECLARATION;
|
NGRAPH_RTTI_DECLARATION;
|
||||||
LowLatency();
|
LowLatency();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||||
|
* processes all back edges that describe a connection between Result and Parameter
|
||||||
|
* of the TensorIterator/Loop bodies,and inserts ReadValue and Assign layers at the
|
||||||
|
* input and output corresponding to this back edge.
|
||||||
|
* Supported platforms: CPU, GNA.
|
||||||
|
*
|
||||||
|
* The example below describes the changes made by the transformation
|
||||||
|
* [] - TensorIterator body
|
||||||
|
* () - new layer
|
||||||
|
* BE - back-edge
|
||||||
|
*
|
||||||
|
* before applying the transformation:
|
||||||
|
* -> input1[BE_1 -> Parameter -> Layers ... -> Result -> BE_1 ]output1->
|
||||||
|
*
|
||||||
|
* after applying the transformation:
|
||||||
|
* ->(ReadValue)-> input1[BE_1 ->Parameter->Layers ...->Result->BE_1]output1 ->(Assign)
|
||||||
|
* \
|
||||||
|
* ->...
|
||||||
|
* After applying the transformation, the resulting network can be inferred
|
||||||
|
* step by step, the states will store between inferences.
|
||||||
|
*/
|
||||||
|
class NGRAPH_API LowLatency2 : public ngraph::pass::FunctionPass
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
|
||||||
|
explicit LowLatency2(bool use_const_initializer = true)
|
||||||
|
: m_use_const_initializer(use_const_initializer)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_use_const_initializer;
|
||||||
|
};
|
||||||
} // namespace pass
|
} // namespace pass
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -129,7 +129,6 @@ void op::v0::TensorIterator::validate_and_infer_types()
|
|||||||
m_body->get_results().at(merged_input_description->m_body_value_index)->input(0);
|
m_body->get_results().at(merged_input_description->m_body_value_index)->input(0);
|
||||||
ends.push_back(body_value.get_node()->shared_from_this());
|
ends.push_back(body_value.get_node()->shared_from_this());
|
||||||
|
|
||||||
auto body_value_partial_shape = body_value.get_partial_shape();
|
|
||||||
auto body_parameter =
|
auto body_parameter =
|
||||||
m_body->get_parameters().at(merged_input_description->m_body_parameter_index);
|
m_body->get_parameters().at(merged_input_description->m_body_parameter_index);
|
||||||
|
|
||||||
|
@ -6,12 +6,29 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
#include <ngraph/log.hpp>
|
||||||
#include <ngraph/opsets/opset6.hpp>
|
#include <ngraph/opsets/opset6.hpp>
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
#include <ngraph/rt_info.hpp>
|
||||||
#include <ngraph/variant.hpp>
|
#include <ngraph/variant.hpp>
|
||||||
|
|
||||||
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency2, "LowLatency2", 0);
|
||||||
|
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0);
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace ngraph;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
string generate_variable_name(const string& op_name, const string& param_name, int variable_idx)
|
||||||
|
{
|
||||||
|
return op_name + "/" + param_name + "/" + "variable_" + to_string(variable_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
ngraph::pass::LowLatency::LowLatency()
|
ngraph::pass::LowLatency::LowLatency()
|
||||||
{
|
{
|
||||||
auto tensor_iterator = ngraph::pattern::wrap_type<opset6::TensorIterator, opset6::Loop>();
|
auto tensor_iterator = ngraph::pattern::wrap_type<opset6::TensorIterator, opset6::Loop>();
|
||||||
@ -58,11 +75,12 @@ ngraph::pass::LowLatency::LowLatency()
|
|||||||
const auto& inputs_to = func->get_parameters()
|
const auto& inputs_to = func->get_parameters()
|
||||||
.at(merged_in->m_body_parameter_index)
|
.at(merged_in->m_body_parameter_index)
|
||||||
->get_output_target_inputs(0);
|
->get_output_target_inputs(0);
|
||||||
const std::string variable_name(sub_graph_op->get_friendly_name() + "/" +
|
const std::string variable_name(
|
||||||
func->get_parameters()
|
generate_variable_name(sub_graph_op->get_friendly_name(),
|
||||||
.at(merged_in->m_body_parameter_index)
|
func->get_parameters()
|
||||||
->get_friendly_name() +
|
.at(merged_in->m_body_parameter_index)
|
||||||
"/variable_" + std::to_string(variable_id));
|
->get_friendly_name(),
|
||||||
|
variable_id));
|
||||||
auto variable = std::make_shared<Variable>(
|
auto variable = std::make_shared<Variable>(
|
||||||
VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
|
VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
|
||||||
auto read_value = std::make_shared<opset6::ReadValue>(
|
auto read_value = std::make_shared<opset6::ReadValue>(
|
||||||
@ -90,3 +108,178 @@ ngraph::pass::LowLatency::LowLatency()
|
|||||||
auto m = std::make_shared<ngraph::pattern::Matcher>(tensor_iterator, "LowLatency");
|
auto m = std::make_shared<ngraph::pattern::Matcher>(tensor_iterator, "LowLatency");
|
||||||
register_matcher(m, callback);
|
register_matcher(m, callback);
|
||||||
}
|
}
|
||||||
|
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||||
|
|
||||||
|
void UnrollSingleIteration(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
|
||||||
|
const shared_ptr<Function>& outer_f)
|
||||||
|
{
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
|
const auto& params = sub_graph_op->get_function()->get_parameters();
|
||||||
|
const auto& results = sub_graph_op->get_function()->get_results();
|
||||||
|
|
||||||
|
// before: Layer1 -> TI [input -> bodyParameter -> Layer2 -> ...]
|
||||||
|
// after: Layer1 -> Layer2 ->...
|
||||||
|
for (const auto& in : sub_graph_op->get_input_descriptions())
|
||||||
|
{
|
||||||
|
const auto& connect_to = sub_graph_op->get_input_source_output(in->m_input_index);
|
||||||
|
for (auto& output : params.at(in->m_body_parameter_index)->outputs())
|
||||||
|
{
|
||||||
|
output.replace(connect_to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// before: TI [...-> Layer1 -> Result -> output] -> Layer2 -> ...
|
||||||
|
// after: ...-> Layer1 -> Layer2 -> ...
|
||||||
|
NodeVector new_ops;
|
||||||
|
for (const auto& out : sub_graph_op->get_output_descriptions())
|
||||||
|
{
|
||||||
|
const auto& connect_to = results.at(out->m_body_value_index)->get_input_source_output(0);
|
||||||
|
for (auto& input_to : sub_graph_op->output(out->m_output_index).get_target_inputs())
|
||||||
|
{
|
||||||
|
// create IE output name
|
||||||
|
std::string out_name = sub_graph_op->get_friendly_name();
|
||||||
|
if (sub_graph_op->get_output_size() != 1)
|
||||||
|
out_name += "." + std::to_string(out->m_output_index);
|
||||||
|
|
||||||
|
// IECompatibility: insert identity (Unsqueeze + Squeeze) to store the TensorIterator
|
||||||
|
// output names
|
||||||
|
auto axis_1 = Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
|
||||||
|
auto identity_1 = std::make_shared<Unsqueeze>(connect_to, axis_1);
|
||||||
|
auto identity_2 = std::make_shared<Squeeze>(identity_1, axis_1);
|
||||||
|
identity_2->set_friendly_name(out_name);
|
||||||
|
new_ops.push_back(identity_1);
|
||||||
|
new_ops.push_back(identity_2);
|
||||||
|
|
||||||
|
input_to.replace_source_output(identity_2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outer_f->add_sinks(sub_graph_op->get_function()->get_sinks());
|
||||||
|
ngraph::copy_runtime_info(sub_graph_op, sub_graph_op->get_function()->get_ops());
|
||||||
|
ngraph::copy_runtime_info(sub_graph_op, new_ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
Output<Node> create_init_subgraph(const shared_ptr<op::util::SubGraphOp>& sub_graph_op,
|
||||||
|
const Output<Node>& in_node)
|
||||||
|
{
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
|
auto const_zero = make_shared<Constant>(in_node.get_element_type(), Shape{1}, 0);
|
||||||
|
auto shape_of = make_shared<ShapeOf>(in_node);
|
||||||
|
auto broadcast = make_shared<Broadcast>(const_zero, shape_of);
|
||||||
|
copy_runtime_info(sub_graph_op, {const_zero, shape_of, broadcast});
|
||||||
|
return broadcast->output(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool pass::LowLatency2::run_on_function(shared_ptr<Function> f)
|
||||||
|
{
|
||||||
|
using namespace opset7;
|
||||||
|
|
||||||
|
SinkVector assigns;
|
||||||
|
for (const auto& op : f->get_ordered_ops())
|
||||||
|
{
|
||||||
|
if (const auto& sub_graph_op = dynamic_pointer_cast<op::util::SubGraphOp>(op))
|
||||||
|
{
|
||||||
|
int64_t variable_id = 0;
|
||||||
|
const auto& func = sub_graph_op->get_function();
|
||||||
|
const auto& params = func->get_parameters();
|
||||||
|
for (const auto& in : sub_graph_op->get_input_descriptions())
|
||||||
|
{
|
||||||
|
// Process all back edges
|
||||||
|
if (const auto& merged_in =
|
||||||
|
dynamic_pointer_cast<op::util::SubGraphOp::MergedInputDescription>(in))
|
||||||
|
{
|
||||||
|
// create new Variable
|
||||||
|
const string& param_name =
|
||||||
|
params.at(merged_in->m_body_parameter_index)->get_friendly_name();
|
||||||
|
const string& var_name = generate_variable_name(
|
||||||
|
sub_graph_op->get_friendly_name(), param_name, variable_id);
|
||||||
|
|
||||||
|
const auto& input = sub_graph_op->input(merged_in->m_input_index);
|
||||||
|
if (std::dynamic_pointer_cast<op::ReadValueBase>(
|
||||||
|
input.get_source_output().get_node_shared_ptr()) != nullptr)
|
||||||
|
{
|
||||||
|
NGRAPH_DEBUG
|
||||||
|
<< "LowLatency2 transformation cannot be applied because the "
|
||||||
|
<< "ReadValue node is already an input to the TensorIterator."
|
||||||
|
<< "LowLatency2 transformation may have already been applied, please "
|
||||||
|
<< "do not call it more then once.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& param = sub_graph_op->get_function()->get_parameters().at(
|
||||||
|
merged_in->m_body_parameter_index);
|
||||||
|
for (const auto& in_to : param->output(0).get_target_inputs())
|
||||||
|
{
|
||||||
|
if (dynamic_cast<op::ReadValueBase*>(in_to.get_node()) != nullptr)
|
||||||
|
{
|
||||||
|
NGRAPH_DEBUG
|
||||||
|
<< "LowLatency2 transformation cannot be applied because the "
|
||||||
|
<< "ReadValue node is already inside the TensorIterator. "
|
||||||
|
<< "LowLatency transformation may have been applied, please do "
|
||||||
|
<< "not call LowLatency2 after LowLatency.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VariableInfo var_info{PartialShape::dynamic(), element::dynamic, var_name};
|
||||||
|
auto variable = make_shared<Variable>(var_info);
|
||||||
|
|
||||||
|
// insert ReadValue
|
||||||
|
// Layers -> [new op: ReadValue] -> Subgraph operation
|
||||||
|
Output<Node> read_value_in = input.get_source_output();
|
||||||
|
if (m_use_const_initializer)
|
||||||
|
{
|
||||||
|
read_value_in = create_init_subgraph(sub_graph_op, read_value_in);
|
||||||
|
}
|
||||||
|
auto read_value = make_shared<ReadValue>(read_value_in, variable);
|
||||||
|
input.replace_source_output(read_value->output(0));
|
||||||
|
read_value->set_friendly_name(var_name);
|
||||||
|
ngraph::copy_runtime_info(sub_graph_op, read_value);
|
||||||
|
|
||||||
|
/* insert Assign
|
||||||
|
// Subgraph operation -> [new op: Assign]
|
||||||
|
// \
|
||||||
|
// ---> Layers -> ...
|
||||||
|
*/
|
||||||
|
const auto& out_desc = sub_graph_op->get_output_descriptions();
|
||||||
|
bool is_output_exist = std::any_of(
|
||||||
|
out_desc.begin(),
|
||||||
|
out_desc.end(),
|
||||||
|
[&merged_in](
|
||||||
|
const std::shared_ptr<op::util::SubGraphOp::OutputDescription>& out) {
|
||||||
|
return out->m_body_value_index == merged_in->m_body_value_index;
|
||||||
|
});
|
||||||
|
// Create new output if it doesn't exist.
|
||||||
|
if (!is_output_exist)
|
||||||
|
{
|
||||||
|
sub_graph_op->get_iter_value(
|
||||||
|
func->get_results().at(merged_in->m_body_value_index));
|
||||||
|
}
|
||||||
|
for (const auto& out : sub_graph_op->get_output_descriptions())
|
||||||
|
{
|
||||||
|
if (out->m_body_value_index == merged_in->m_body_value_index)
|
||||||
|
{
|
||||||
|
auto assign = make_shared<Assign>(
|
||||||
|
sub_graph_op->output(out->m_output_index), variable);
|
||||||
|
ngraph::copy_runtime_info(sub_graph_op, assign);
|
||||||
|
// control dependency so that ReadValue is processed before Assign
|
||||||
|
assign->add_control_dependency(read_value);
|
||||||
|
assigns.emplace_back(assign);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sub_graph_op->get_num_iterations() == 1)
|
||||||
|
{
|
||||||
|
UnrollSingleIteration(sub_graph_op, f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f->add_sinks(assigns);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user