From 77ba4ab6dd8f51f8f3b80433ff67815fe59af74c Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Thu, 22 Sep 2022 14:21:05 +0300 Subject: [PATCH] AUGRUCell fusion transformation (#12844) * AUGRUCell fusion transformation and tests * add missed includes * Apply review comments * Apply review comments * fix build * Enable accuracy tests * change supported weights format from zrh to rzh * update submodule version * resolve review comments * add additional checks to pattern * fix conflict with master --- .../augru_cell_fusion.hpp | 37 +++++ .../augru_cell_fusion.cpp | 102 +++++++++++++ .../augru_cell_fusion.cpp | 135 ++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/augru_cell_fusion.cpp create mode 100644 src/tests/functional/inference_engine/transformations/common_optimizations/augru_cell_fusion.cpp diff --git a/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp new file mode 100644 index 00000000000..b1329518008 --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/augru_cell_fusion.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API AUGRUCellFusion; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief AUGRUCellFusion transformation replaces a sequence of + * operations with AUGRUCell op. + * + * Supported activations: 1st is Sigmoid, 2nd is Tanh + * Clip attribute is not supported. + * Linear_before_reset attribute is not supported. + * Supported weights format: 'rzh' + * + */ + +class ov::pass::AUGRUCellFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("AUGRUCellFusion", "0"); + AUGRUCellFusion(); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/augru_cell_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/augru_cell_fusion.cpp new file mode 100644 index 00000000000..1f52ae6d116 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/augru_cell_fusion.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/augru_cell_fusion.hpp" + +#include + +#include "itt.hpp" +#include "ngraph_ops/augru_cell.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/opsets/opset9.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +using namespace std; +using namespace ov::opset9; +using namespace ov::element; +using namespace ov::pass::pattern; + +ov::pass::AUGRUCellFusion::AUGRUCellFusion() { + MATCHER_SCOPE(AUGRUCellFusion); + + // we can't determine hidden_size or input_size in this case + const auto is_first_dim_static = [](const Output& output) -> bool { + const auto& p_shape = output.get_partial_shape(); + return !(p_shape.rank().is_dynamic() || p_shape[1].is_dynamic()); + }; + + auto concat_1 = wrap_type({any_input(is_first_dim_static), any_input(is_first_dim_static)}); + auto matmul_1 = wrap_type({concat_1, any_input(is_first_dim_static)}); + auto add_1 = wrap_type({matmul_1, any_input()}); + // only Sigmoid is supported in the current version of AUGRUCell + auto sigmoid = wrap_type({add_1}); + auto split = wrap_type({sigmoid, any_input()}); + auto multiply = wrap_type({split, any_input()}); + + auto concat_2 = wrap_type({any_input(), multiply}); + auto matmul_2 = wrap_type({concat_2, any_input(is_first_dim_static)}); + auto add_2 = wrap_type({matmul_2, any_input()}); + // only Tanh is supported in the current version of AUGRUCell + auto tanh = wrap_type({add_2}); + + auto subtract_1 = wrap_type({any_input(), any_input()}); + auto multiply_2 = wrap_type({subtract_1, split}); + auto subtract_2 = wrap_type({any_input(), multiply_2}); + auto multiply_3 = wrap_type({subtract_2, tanh}); + + auto multiply_4 = wrap_type({multiply_2, any_input()}); + auto add_3 = wrap_type({multiply_4, multiply_3}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + NodeRegistry rg; + auto pattern_map = m.get_pattern_map(); + auto concat = pattern_map.at(concat_1); + auto X = concat->input_value(0); + auto H = concat->input_value(1); + + auto h_pshape = H.get_partial_shape(); + auto x_pshape = X.get_partial_shape(); + + auto hidden_size = h_pshape[1].get_length(); + auto input_size = x_pshape[1].get_length(); + + auto axis_0 = rg.make(i64, Shape{}, 0); + auto axis_1 = rg.make(i64, Shape{}, 1); + + auto A = pattern_map.at(subtract_1)->input_value(1); + // biases are required + auto bias_add_1 = pattern_map.at(add_1); + auto split_bias_r_z = rg.make(bias_add_1->input_value(1), axis_1, 2); + auto bias_add_2 = pattern_map.at(add_2); + + auto B = rg.make( + OutputVector{split_bias_r_z->output(1), split_bias_r_z->output(0), bias_add_2->input_value(1)}, + 1); + + auto WRrz = pattern_map.at(matmul_1)->input_value(1); + auto WRh = pattern_map.at(matmul_2)->input_value(1); + + auto split_lenghts = rg.make(i64, Shape{2}, vector{input_size, hidden_size}); + auto split_WRrz = rg.make(WRrz, axis_1, split_lenghts); + auto split_W_r_z = rg.make(split_WRrz->output(0), axis_0, 2); + auto split_R_r_z = rg.make(split_WRrz->output(1), axis_0, 2); + auto split_WRh = rg.make(WRh, axis_1, split_lenghts); + auto Wzrh = + rg.make(OutputVector{split_W_r_z->output(1), split_W_r_z->output(0), split_WRh->output(0)}, 0); + auto Rzrh = + rg.make(OutputVector{split_R_r_z->output(1), split_R_r_z->output(0), split_WRh->output(1)}, 0); + + auto squeeze_B = rg.make(B, axis_0); + auto cell = + rg.make(X, H, Wzrh, Rzrh, squeeze_B, A, H.get_partial_shape()[1].get_length()); + + cell->set_friendly_name(m.get_match_root()->get_friendly_name()); + copy_runtime_info(m.get_matched_nodes(), rg.get()); + replace_node(m.get_match_root(), cell); + return true; + }; + + auto m = make_shared(add_3, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/tests/functional/inference_engine/transformations/common_optimizations/augru_cell_fusion.cpp b/src/tests/functional/inference_engine/transformations/common_optimizations/augru_cell_fusion.cpp new file mode 100644 index 00000000000..cd29854404b --- /dev/null +++ b/src/tests/functional/inference_engine/transformations/common_optimizations/augru_cell_fusion.cpp @@ -0,0 +1,135 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "ngraph_ops/augru_cell.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/opsets/opset9.hpp" +#include "transformations/common_optimizations/augru_cell_fusion.hpp" + +using namespace std; +using namespace testing; + +using namespace ov; +using namespace opset9; +using namespace element; + +namespace { +shared_ptr gen_model(size_t batch, size_t hidden_size, size_t input_size, bool use_dyn_shapes) { + auto X = make_shared(f32, Shape{batch, input_size}); + if (use_dyn_shapes) { + X = make_shared(f32, PartialShape{static_cast(batch), Dimension::dynamic()}); + } + auto H = make_shared(f32, Shape{batch, hidden_size}); + auto WRzr = make_shared(f32, Shape{2 * hidden_size, input_size + hidden_size}); + auto Bzr = make_shared(f32, Shape{1, 2 * hidden_size}); + auto WRh = make_shared(f32, Shape{hidden_size, input_size + hidden_size}); + auto Bh = make_shared(f32, Shape{1, hidden_size}); + auto A = make_shared(f32, Shape{batch, 1}); + auto concat_1 = make_shared(OutputVector{X, H}, 1); + auto matmul_1 = make_shared(concat_1, WRzr, false, true); + auto in_to_activation_1 = make_shared(matmul_1, Bzr); + + auto sigmoid = make_shared(in_to_activation_1); + auto axis_1 = make_shared(i64, Shape{}, 1); + auto split = make_shared(sigmoid, axis_1, 2); + + auto multiply_1 = make_shared(split, H); + auto concat_2 = make_shared(OutputVector{X, multiply_1}, 1); + auto matmul_2 = make_shared(concat_2, WRh, false, true); + auto in_to_activation_2 = make_shared(matmul_2, Bh); + auto tanh = make_shared(in_to_activation_2); + + auto one = make_shared(f32, Shape{1}, 1); + auto subtract_1 = make_shared(one, A); + auto multiply_2 = make_shared(subtract_1, split->output(1)); + auto subtract_2 = make_shared(one, multiply_2); + auto multiply_3 = make_shared(subtract_2, tanh); + + auto multiply_4 = make_shared(multiply_2, H); + auto add = make_shared(multiply_4, multiply_3); + return make_shared(OutputVector{add}, ParameterVector{X, H, WRzr, WRh, Bzr, Bh, A}); +} + +shared_ptr gen_reference(size_t batch, size_t hidden_size, size_t input_size) { + auto X = make_shared(f32, Shape{batch, input_size}); + auto H = make_shared(f32, Shape{batch, hidden_size}); + auto WRrz = make_shared(f32, Shape{2 * hidden_size, input_size + hidden_size}); + auto WRh = make_shared(f32, Shape{hidden_size, input_size + hidden_size}); + auto Brz = make_shared(f32, Shape{1, 2 * hidden_size}); + auto Bh = make_shared(f32, Shape{1, hidden_size}); + auto A = make_shared(f32, Shape{batch, 1}); + ParameterVector params = {X, H, WRrz, WRh, Brz, Bh, A}; + + auto axis_0 = make_shared(i64, Shape{}, 0); + auto axis_1 = make_shared(i64, Shape{}, 1); + auto split_lenghts = make_shared(i64, Shape{2}, vector{input_size, hidden_size}); + auto split_WRrz = make_shared(WRrz, axis_1, split_lenghts); + auto split_W_r_z = make_shared(split_WRrz->output(0), axis_0, 2); + auto split_R_r_z = make_shared(split_WRrz->output(1), axis_0, 2); + auto split_WRh = make_shared(WRh, axis_1, split_lenghts); + auto Wzrh = + make_shared(OutputVector{split_W_r_z->output(1), split_W_r_z->output(0), split_WRh->output(0)}, 0); + auto Rzrh = + make_shared(OutputVector{split_R_r_z->output(1), split_R_r_z->output(0), split_WRh->output(1)}, 0); + + auto split_bias_r_z = make_shared(Brz, axis_1, 2); + auto B = make_shared(OutputVector{split_bias_r_z->output(1), split_bias_r_z->output(0), Bh}, 1); + + auto squeeze_B = make_shared(B, axis_0); + auto cell = make_shared(X, H, Wzrh, Rzrh, squeeze_B, A, hidden_size); + return make_shared(OutputVector {cell}, params); +} +} // namespace + +struct AUGRUFusionParams { + size_t batch; + size_t hidden_size; + size_t input_size; +}; + +class AUGRUFusionTest + : public WithParamInterface, + public TransformationTestsF { +}; + +TEST_P(AUGRUFusionTest, AUGRUCellPattern) { + const auto& p = GetParam(); + { + model = gen_model(p.batch, p.hidden_size, p.input_size, false); + manager.register_pass(); + } + + { + model_ref = gen_reference(p.batch, p.hidden_size, p.input_size); + } + comparator.enable(FunctionsComparator::CmpValues::ACCURACY); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +class AUGRUFusionTestDyn + : public WithParamInterface, public TransformationTestsF { +}; + +TEST_P(AUGRUFusionTestDyn, AUGRUCellPatternDynamicShapes) { + const auto& p = GetParam(); + { + model = gen_model(p.batch, p.hidden_size, p.input_size, true); + // the transformation won't be applied because we can't determine hidden_size/input_size, + // they are dynamic. + manager.register_pass(); + } +} + +static const std::vector params = { + AUGRUFusionParams{1, 1, 1}, + AUGRUFusionParams{2, 128, 32}, +}; + +INSTANTIATE_TEST_SUITE_P(AUGRUFusionTest, AUGRUFusionTest, ValuesIn(params)); +INSTANTIATE_TEST_SUITE_P(AUGRUFusionTestDyn, AUGRUFusionTestDyn, ValuesIn(params));