SharedOpOptimization (#18622)

* SharedOpOptimization

* PR Comments adressed

* Misprint

* Removed unnecessary mode_ref

* Fixed memcmp and modified tests accordingly

* Style

* Comment from Sergey resolved

* Optimize operations in groups

* Removed Transpose fusion per agreement with Dmitry

* Style

* Resolved unnecessary looping of the graph
This commit is contained in:
Evgenya Stepyreva 2023-07-25 15:00:44 +04:00 committed by GitHub
parent d3fdc761f6
commit 6be083d37e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 431 additions and 0 deletions

View File

@ -0,0 +1,27 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/pass/graph_rewrite.hpp>
#include <transformations_visibility.hpp>
namespace ov {
namespace pass {
class TRANSFORMATIONS_API SharedOpOptimization;
} // namespace pass
} // namespace ov
/**
* @ingroup ie_transformation_common_api
* @brief SharedOpOptimization optimizes operations which are
* sourcing from same Output<Node> and perform the same action on the same data
*/
class ov::pass::SharedOpOptimization : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("SharedOpOptimization", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
};

View File

@ -59,6 +59,7 @@
#include <transformations/common_optimizations/ric_fusion.hpp>
#include <transformations/common_optimizations/select_with_one_value_condition.hpp>
#include <transformations/common_optimizations/sequence_fusion.hpp>
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
#include <transformations/common_optimizations/shuffle_channels_fusion.hpp>
#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
#include <transformations/common_optimizations/softmax_fusion.hpp>
@ -243,6 +244,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph::Fu
fq_fusions->set_name("ov::pass::FakeQuantizeFusions");
REGISTER_PASS(manager, ReverseInputChannelsFusion)
REGISTER_PASS(manager, AlignEltwiseInputRanks)
REGISTER_PASS(manager, SharedOpOptimization)
REGISTER_PASS(manager, ConstantFolding)
manager.run_passes(f);

View File

@ -0,0 +1,142 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/core/validation_util.hpp>
#include <openvino/op/concat.hpp>
#include <openvino/op/gather_elements.hpp>
#include <openvino/op/reshape.hpp>
#include <openvino/op/slice.hpp>
#include <openvino/op/tile.hpp>
#include <openvino/op/util/sub_graph_base.hpp>
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
#include "itt.hpp"
using namespace std;
using namespace ov;
using namespace ov::op;
bool shared_node_optimization(const shared_ptr<Model>& model,
const unordered_map<Node::type_info_t, bool (*)(const Node*, const Node*)>& rules) {
bool rewritten = false;
for (const auto& op : model->get_ordered_ops()) {
// Recursively apply transformation for sub-graph based operations
if (auto multi_subgraph_op = dynamic_pointer_cast<op::util::MultiSubGraphOp>(op)) {
for (size_t i = 0; i < multi_subgraph_op->get_internal_subgraphs_size(); i++) {
if (auto sub_graph = multi_subgraph_op->get_function(i))
rewritten |= shared_node_optimization(sub_graph, rules);
}
}
for (auto& output : op->outputs()) {
const auto& target_inputs = output.get_target_inputs();
if (target_inputs.size() <= 1)
continue; // nothing to optimize
unordered_map<Node::type_info_t, vector<Node*>> type_to_node;
for (const auto& input : target_inputs) {
auto node = input.get_node();
if (node && rules.count(node->get_type_info()))
type_to_node[node->get_type_info()].push_back(node);
}
for (auto& item : type_to_node) {
const auto& shared_nodes = item.second;
if (shared_nodes.size() < 2)
continue;
const auto& ops_type = item.first;
const auto& are_equal = rules.at(ops_type);
std::vector<bool> visited_nodes(shared_nodes.size(), false);
for (size_t i = 0; i < visited_nodes.size(); ++i) {
if (visited_nodes[i])
continue;
const auto& root_op = shared_nodes[i];
visited_nodes[i] = true;
for (size_t j = i + 1; j < visited_nodes.size(); ++j) {
if (visited_nodes[j])
continue;
const auto& child_op = shared_nodes[j];
if (are_equal(root_op, child_op)) {
rewritten |= replace_output_update_name(child_op->output(0), root_op->output(0));
visited_nodes[j] = true;
}
}
}
}
}
}
return rewritten;
}
bool inputs_from_same_source_or_equal_constants(const Node* lhs, const Node* rhs) {
if (lhs->get_input_size() != rhs->get_input_size())
return false;
size_t input_size = lhs->get_input_size();
for (size_t i = 0; i < input_size; ++i) {
if (lhs->input_value(i) == rhs->input_value(i))
continue;
auto lhs_constant = as_type_ptr<v0::Constant>(lhs->get_input_node_shared_ptr(i));
auto rhs_constant = as_type_ptr<v0::Constant>(rhs->get_input_node_shared_ptr(i));
if (!lhs_constant || !rhs_constant)
return false;
if (lhs_constant->get_element_type() != rhs_constant->get_element_type())
return false;
const auto& lhs_shape = lhs_constant->get_shape();
if (lhs_shape != rhs_constant->get_shape() || shape_size(lhs_shape) > 10)
return false;
if (memcmp(lhs_constant->get_data_ptr(), rhs_constant->get_data_ptr(), lhs_constant->get_byte_size()) != 0)
return false;
}
return true;
}
bool concats_are_equal(const Node* lhs, const Node* rhs) {
const auto lhs_concat = as_type<const v0::Concat>(lhs);
if (!lhs_concat)
return false;
const auto rhs_concat = as_type<const v0::Concat>(rhs);
if (!rhs_concat)
return false;
return lhs_concat->get_axis() == rhs_concat->get_axis() && inputs_from_same_source_or_equal_constants(lhs, rhs);
}
bool gather_elements_are_equal(const Node* lhs, const Node* rhs) {
const auto lhs_gather_elements = as_type<const v6::GatherElements>(lhs);
if (!lhs_gather_elements)
return false;
const auto rhs_gather_elements = as_type<const v6::GatherElements>(rhs);
if (!rhs_gather_elements)
return false;
return lhs_gather_elements->get_axis() == rhs_gather_elements->get_axis() &&
inputs_from_same_source_or_equal_constants(lhs, rhs);
}
bool reshapes_are_equal(const Node* lhs, const Node* rhs) {
const auto lhs_reshape = as_type<const v1::Reshape>(lhs);
if (!lhs_reshape)
return false;
const auto rhs_reshape = as_type<const v1::Reshape>(rhs);
if (!rhs_reshape)
return false;
return lhs_reshape->get_special_zero() == rhs_reshape->get_special_zero() &&
inputs_from_same_source_or_equal_constants(lhs, rhs);
}
bool pass::SharedOpOptimization::run_on_model(const shared_ptr<Model>& model) {
RUN_ON_FUNCTION_SCOPE(SharedOpOptimization);
#define RECORD(operation, func) \
{ operation::get_type_info_static(), func }
const unordered_map<Node::type_info_t, bool (*)(const Node*, const Node*)> rules = {
// no attributes
RECORD(v8::Slice, inputs_from_same_source_or_equal_constants),
RECORD(v0::Tile, inputs_from_same_source_or_equal_constants),
// with attributes
RECORD(v0::Concat, concats_are_equal),
RECORD(v6::GatherElements, gather_elements_are_equal),
RECORD(v1::Reshape, reshapes_are_equal),
}; // TODO: use visit_attributes to uniformly perform attributes check in the future and get rid of rules table
return shared_node_optimization(model, rules);
}

View File

@ -0,0 +1,260 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/tile.hpp"
using namespace ov;
using namespace ov::op;
class SharedTransformationTestsF : public TransformationTestsF {
public:
void TearDown() override {
TransformationTestsF::TearDown();
size_t op_count = model->get_ops().size(), op_count_ref = model_ref->get_ops().size();
EXPECT_EQ(op_count, op_count_ref) << "Number of operations differ between models: model op count = " << op_count
<< " ref_model op count = " << op_count_ref;
};
static Output<Node> make_slice(const Output<Node>& out,
const int64_t& start,
const int64_t& stop,
const int64_t& step,
const int64_t& axis) {
return std::make_shared<v8::Slice>(out,
v0::Constant::create(element::i64, Shape{1}, {start}),
v0::Constant::create(element::i64, Shape{1}, {stop}),
v0::Constant::create(element::i64, Shape{1}, {step}),
v0::Constant::create(element::i64, Shape{1}, {axis}));
}
static Output<Node> make_tile(const Output<Node>& out, const std::vector<int64_t>& repeats) {
return std::make_shared<v0::Tile>(out, v0::Constant::create(element::i64, Shape{repeats.size()}, repeats));
}
static Output<Node> make_reshape(const Output<Node>& out, const std::vector<int64_t>& order) {
return std::make_shared<v1::Reshape>(out, v0::Constant::create(element::i64, Shape{order.size()}, order), true);
}
};
TEST_F(SharedTransformationTestsF, SharedSlice) {
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto slice_0 = make_slice(data, 1, 2, 3, 3);
auto slice_1 = make_slice(data, 1, 2, 3, 3);
auto slice_2 = make_slice(data, 1, 3, 3, 3);
auto slice_3 = make_slice(data, 1, 2, 3, 3);
auto slice_4 = make_slice(data, 1, 2, 3, 3);
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0, slice_1, slice_2, slice_3, slice_4}, 0);
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
manager.register_pass<ov::pass::SharedOpOptimization>();
}
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto slice_0 = make_slice(data, 1, 2, 3, 3);
auto slice_2 = make_slice(data, 1, 3, 3, 3);
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0, slice_0, slice_2, slice_0, slice_0}, 0);
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
}
}
TEST_F(SharedTransformationTestsF, SharedRecursively) {
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto slice_0 = make_slice(data, 1, 2, 3, 3);
auto slice_1 = make_slice(data, 1, 2, 3, 3);
auto slice_2 = make_slice(data, 1, 3, 3, 3);
auto tile_0_0 = make_tile(slice_0, {1, 2, 3, 4});
auto transpose_0_0 = make_reshape(slice_0, {0, 0, 0, -1});
auto tile_0_1 = make_tile(slice_0, {1, 2, 3, 4});
auto transpose_0_1 = make_reshape(slice_0, {0, 0, 0, -1});
auto tile_0_2 = make_tile(slice_0, {1, 2, 3, 4});
auto transpose_0_2 = make_reshape(slice_0, {0, 0, 0, -1});
auto tile_1_0 = make_tile(slice_1, {1, 2, 3, 4});
auto transpose_1_0 = make_reshape(slice_1, {0, 0, 0, -1});
auto tile_1_1 = make_tile(slice_1, {1, 2, 3, 4});
auto transpose_1_1 = make_reshape(slice_1, {0, 0, 0, -1});
auto tile_1_2 = make_tile(slice_1, {1, 2, 3, 4});
auto transpose_1_2 = make_reshape(slice_1, {0, 0, 0, -1});
auto tile_2_0 = make_tile(slice_2, {1, 2, 3, 4});
auto transpose_2_0 = make_reshape(slice_2, {0, 0, 0, -1});
auto tile_2_1 = make_tile(slice_2, {1, 2, 3, 4});
auto transpose_2_1 = make_reshape(slice_2, {0, 0, 0, -1});
auto tile_2_2 = make_tile(slice_2, {1, 2, 3, 4});
auto transpose_2_2 = make_reshape(slice_2, {0, 0, 0, -1});
auto concat = std::make_shared<v0::Concat>(
OutputVector{// source from slice 0
tile_0_0,
transpose_0_0,
tile_0_1,
transpose_0_1,
tile_0_2,
transpose_0_2,
// source from slice 1
tile_1_0,
transpose_1_0,
tile_1_1,
transpose_1_1,
tile_1_2,
transpose_1_2,
// source from slice 2
tile_2_0,
transpose_2_0,
tile_2_1,
transpose_2_1,
tile_2_2,
transpose_2_2},
0);
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
manager.register_pass<ov::pass::SharedOpOptimization>();
}
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto slice_0 = make_slice(data, 1, 2, 3, 3);
auto slice_2 = make_slice(data, 1, 3, 3, 3);
auto tile_0_0 = make_tile(slice_0, {1, 2, 3, 4});
auto transpose_0_0 = make_reshape(slice_0, {0, 0, 0, -1});
auto tile_2_0 = make_tile(slice_2, {1, 2, 3, 4});
auto transpose_2_0 = make_reshape(slice_2, {0, 0, 0, -1});
auto concat = std::make_shared<v0::Concat>(
OutputVector{// source from slice 0
tile_0_0,
transpose_0_0,
tile_0_0,
transpose_0_0,
tile_0_0,
transpose_0_0,
// source from slice 0
tile_0_0,
transpose_0_0,
tile_0_0,
transpose_0_0,
tile_0_0,
transpose_0_0,
// source from slice 2
tile_2_0,
transpose_2_0,
tile_2_0,
transpose_2_0,
tile_2_0,
transpose_2_0},
0);
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
}
}
TEST_F(SharedTransformationTestsF, SharedConcat) {
{
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
auto pre_constant_1 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
auto concat_1 = std::make_shared<v0::Concat>(OutputVector{pre_constant_1, data, post_constant}, 0);
auto concat = std::make_shared<v0::Concat>(OutputVector{concat_0, concat_1}, 0);
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
manager.register_pass<ov::pass::SharedOpOptimization>();
}
{
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
auto concat = std::make_shared<v0::Concat>(OutputVector{concat_0, concat_0}, 0);
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
}
}
TEST_F(SharedTransformationTestsF, SharedSliceInThreeGroups) {
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape::dynamic(10));
auto slice_0_0 = make_slice(data, 1, 2, 3, 4);
auto slice_1_0 = make_slice(data, 2, 3, 4, 5);
auto slice_2_0 = make_slice(data, 3, 4, 5, 6);
auto slice_0_1 = make_slice(data, 1, 2, 3, 4);
auto slice_1_1 = make_slice(data, 2, 3, 4, 5);
auto slice_2_1 = make_slice(data, 3, 4, 5, 6);
auto slice_0_2 = make_slice(data, 1, 2, 3, 4);
auto slice_1_2 = make_slice(data, 2, 3, 4, 5);
auto slice_2_2 = make_slice(data, 3, 4, 5, 6);
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0_0,
slice_1_0,
slice_2_0,
slice_0_1,
slice_1_1,
slice_2_1,
slice_0_2,
slice_1_2,
slice_2_2},
0);
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
manager.register_pass<ov::pass::SharedOpOptimization>();
}
{
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape::dynamic(10));
auto slice_0_0 = make_slice(data, 1, 2, 3, 4);
auto slice_1_0 = make_slice(data, 2, 3, 4, 5);
auto slice_2_0 = make_slice(data, 3, 4, 5, 6);
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0_0,
slice_1_0,
slice_2_0,
slice_0_0,
slice_1_0,
slice_2_0,
slice_0_0,
slice_1_0,
slice_2_0},
0);
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
}
}
TEST_F(SharedTransformationTestsF, SharedConcatCheckOpWithResultIsntReplaced) {
{
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
auto pre_constant_1 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
auto concat_1 = std::make_shared<v0::Concat>(OutputVector{pre_constant_1, data, post_constant}, 0);
model = std::make_shared<ov::Model>(OutputVector{concat_0, concat_1}, ParameterVector{data});
manager.register_pass<ov::pass::SharedOpOptimization>();
}
}