SharedOpOptimization (#18622)
* SharedOpOptimization * PR Comments adressed * Misprint * Removed unnecessary mode_ref * Fixed memcmp and modified tests accordingly * Style * Comment from Sergey resolved * Optimize operations in groups * Removed Transpose fusion per agreement with Dmitry * Style * Resolved unnecessary looping of the graph
This commit is contained in:
parent
d3fdc761f6
commit
6be083d37e
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API SharedOpOptimization;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief SharedOpOptimization optimizes operations which are
|
||||
* sourcing from same Output<Node> and perform the same action on the same data
|
||||
*/
|
||||
class ov::pass::SharedOpOptimization : public ov::pass::ModelPass {
|
||||
public:
|
||||
OPENVINO_RTTI("SharedOpOptimization", "0");
|
||||
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||
};
|
@ -59,6 +59,7 @@
|
||||
#include <transformations/common_optimizations/ric_fusion.hpp>
|
||||
#include <transformations/common_optimizations/select_with_one_value_condition.hpp>
|
||||
#include <transformations/common_optimizations/sequence_fusion.hpp>
|
||||
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
|
||||
#include <transformations/common_optimizations/shuffle_channels_fusion.hpp>
|
||||
#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
|
||||
#include <transformations/common_optimizations/softmax_fusion.hpp>
|
||||
@ -243,6 +244,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph::Fu
|
||||
fq_fusions->set_name("ov::pass::FakeQuantizeFusions");
|
||||
REGISTER_PASS(manager, ReverseInputChannelsFusion)
|
||||
REGISTER_PASS(manager, AlignEltwiseInputRanks)
|
||||
REGISTER_PASS(manager, SharedOpOptimization)
|
||||
REGISTER_PASS(manager, ConstantFolding)
|
||||
manager.run_passes(f);
|
||||
|
||||
|
@ -0,0 +1,142 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/core/validation_util.hpp>
|
||||
#include <openvino/op/concat.hpp>
|
||||
#include <openvino/op/gather_elements.hpp>
|
||||
#include <openvino/op/reshape.hpp>
|
||||
#include <openvino/op/slice.hpp>
|
||||
#include <openvino/op/tile.hpp>
|
||||
#include <openvino/op/util/sub_graph_base.hpp>
|
||||
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov;
|
||||
using namespace ov::op;
|
||||
|
||||
bool shared_node_optimization(const shared_ptr<Model>& model,
|
||||
const unordered_map<Node::type_info_t, bool (*)(const Node*, const Node*)>& rules) {
|
||||
bool rewritten = false;
|
||||
|
||||
for (const auto& op : model->get_ordered_ops()) {
|
||||
// Recursively apply transformation for sub-graph based operations
|
||||
if (auto multi_subgraph_op = dynamic_pointer_cast<op::util::MultiSubGraphOp>(op)) {
|
||||
for (size_t i = 0; i < multi_subgraph_op->get_internal_subgraphs_size(); i++) {
|
||||
if (auto sub_graph = multi_subgraph_op->get_function(i))
|
||||
rewritten |= shared_node_optimization(sub_graph, rules);
|
||||
}
|
||||
}
|
||||
for (auto& output : op->outputs()) {
|
||||
const auto& target_inputs = output.get_target_inputs();
|
||||
if (target_inputs.size() <= 1)
|
||||
continue; // nothing to optimize
|
||||
unordered_map<Node::type_info_t, vector<Node*>> type_to_node;
|
||||
for (const auto& input : target_inputs) {
|
||||
auto node = input.get_node();
|
||||
if (node && rules.count(node->get_type_info()))
|
||||
type_to_node[node->get_type_info()].push_back(node);
|
||||
}
|
||||
for (auto& item : type_to_node) {
|
||||
const auto& shared_nodes = item.second;
|
||||
if (shared_nodes.size() < 2)
|
||||
continue;
|
||||
const auto& ops_type = item.first;
|
||||
const auto& are_equal = rules.at(ops_type);
|
||||
|
||||
std::vector<bool> visited_nodes(shared_nodes.size(), false);
|
||||
for (size_t i = 0; i < visited_nodes.size(); ++i) {
|
||||
if (visited_nodes[i])
|
||||
continue;
|
||||
const auto& root_op = shared_nodes[i];
|
||||
visited_nodes[i] = true;
|
||||
for (size_t j = i + 1; j < visited_nodes.size(); ++j) {
|
||||
if (visited_nodes[j])
|
||||
continue;
|
||||
const auto& child_op = shared_nodes[j];
|
||||
if (are_equal(root_op, child_op)) {
|
||||
rewritten |= replace_output_update_name(child_op->output(0), root_op->output(0));
|
||||
visited_nodes[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
bool inputs_from_same_source_or_equal_constants(const Node* lhs, const Node* rhs) {
|
||||
if (lhs->get_input_size() != rhs->get_input_size())
|
||||
return false;
|
||||
size_t input_size = lhs->get_input_size();
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
if (lhs->input_value(i) == rhs->input_value(i))
|
||||
continue;
|
||||
auto lhs_constant = as_type_ptr<v0::Constant>(lhs->get_input_node_shared_ptr(i));
|
||||
auto rhs_constant = as_type_ptr<v0::Constant>(rhs->get_input_node_shared_ptr(i));
|
||||
if (!lhs_constant || !rhs_constant)
|
||||
return false;
|
||||
if (lhs_constant->get_element_type() != rhs_constant->get_element_type())
|
||||
return false;
|
||||
const auto& lhs_shape = lhs_constant->get_shape();
|
||||
if (lhs_shape != rhs_constant->get_shape() || shape_size(lhs_shape) > 10)
|
||||
return false;
|
||||
if (memcmp(lhs_constant->get_data_ptr(), rhs_constant->get_data_ptr(), lhs_constant->get_byte_size()) != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool concats_are_equal(const Node* lhs, const Node* rhs) {
|
||||
const auto lhs_concat = as_type<const v0::Concat>(lhs);
|
||||
if (!lhs_concat)
|
||||
return false;
|
||||
const auto rhs_concat = as_type<const v0::Concat>(rhs);
|
||||
if (!rhs_concat)
|
||||
return false;
|
||||
return lhs_concat->get_axis() == rhs_concat->get_axis() && inputs_from_same_source_or_equal_constants(lhs, rhs);
|
||||
}
|
||||
|
||||
bool gather_elements_are_equal(const Node* lhs, const Node* rhs) {
|
||||
const auto lhs_gather_elements = as_type<const v6::GatherElements>(lhs);
|
||||
if (!lhs_gather_elements)
|
||||
return false;
|
||||
const auto rhs_gather_elements = as_type<const v6::GatherElements>(rhs);
|
||||
if (!rhs_gather_elements)
|
||||
return false;
|
||||
return lhs_gather_elements->get_axis() == rhs_gather_elements->get_axis() &&
|
||||
inputs_from_same_source_or_equal_constants(lhs, rhs);
|
||||
}
|
||||
|
||||
bool reshapes_are_equal(const Node* lhs, const Node* rhs) {
|
||||
const auto lhs_reshape = as_type<const v1::Reshape>(lhs);
|
||||
if (!lhs_reshape)
|
||||
return false;
|
||||
const auto rhs_reshape = as_type<const v1::Reshape>(rhs);
|
||||
if (!rhs_reshape)
|
||||
return false;
|
||||
return lhs_reshape->get_special_zero() == rhs_reshape->get_special_zero() &&
|
||||
inputs_from_same_source_or_equal_constants(lhs, rhs);
|
||||
}
|
||||
|
||||
bool pass::SharedOpOptimization::run_on_model(const shared_ptr<Model>& model) {
|
||||
RUN_ON_FUNCTION_SCOPE(SharedOpOptimization);
|
||||
#define RECORD(operation, func) \
|
||||
{ operation::get_type_info_static(), func }
|
||||
|
||||
const unordered_map<Node::type_info_t, bool (*)(const Node*, const Node*)> rules = {
|
||||
// no attributes
|
||||
RECORD(v8::Slice, inputs_from_same_source_or_equal_constants),
|
||||
RECORD(v0::Tile, inputs_from_same_source_or_equal_constants),
|
||||
|
||||
// with attributes
|
||||
RECORD(v0::Concat, concats_are_equal),
|
||||
RECORD(v6::GatherElements, gather_elements_are_equal),
|
||||
RECORD(v1::Reshape, reshapes_are_equal),
|
||||
|
||||
}; // TODO: use visit_attributes to uniformly perform attributes check in the future and get rid of rules table
|
||||
return shared_node_optimization(model, rules);
|
||||
}
|
@ -0,0 +1,260 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/common_optimizations/shared_ops_optimization.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/parameter.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/tile.hpp"
|
||||
|
||||
using namespace ov;
|
||||
using namespace ov::op;
|
||||
|
||||
class SharedTransformationTestsF : public TransformationTestsF {
|
||||
public:
|
||||
void TearDown() override {
|
||||
TransformationTestsF::TearDown();
|
||||
size_t op_count = model->get_ops().size(), op_count_ref = model_ref->get_ops().size();
|
||||
EXPECT_EQ(op_count, op_count_ref) << "Number of operations differ between models: model op count = " << op_count
|
||||
<< " ref_model op count = " << op_count_ref;
|
||||
};
|
||||
|
||||
static Output<Node> make_slice(const Output<Node>& out,
|
||||
const int64_t& start,
|
||||
const int64_t& stop,
|
||||
const int64_t& step,
|
||||
const int64_t& axis) {
|
||||
return std::make_shared<v8::Slice>(out,
|
||||
v0::Constant::create(element::i64, Shape{1}, {start}),
|
||||
v0::Constant::create(element::i64, Shape{1}, {stop}),
|
||||
v0::Constant::create(element::i64, Shape{1}, {step}),
|
||||
v0::Constant::create(element::i64, Shape{1}, {axis}));
|
||||
}
|
||||
|
||||
static Output<Node> make_tile(const Output<Node>& out, const std::vector<int64_t>& repeats) {
|
||||
return std::make_shared<v0::Tile>(out, v0::Constant::create(element::i64, Shape{repeats.size()}, repeats));
|
||||
}
|
||||
|
||||
static Output<Node> make_reshape(const Output<Node>& out, const std::vector<int64_t>& order) {
|
||||
return std::make_shared<v1::Reshape>(out, v0::Constant::create(element::i64, Shape{order.size()}, order), true);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(SharedTransformationTestsF, SharedSlice) {
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
|
||||
auto slice_0 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_1 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_2 = make_slice(data, 1, 3, 3, 3);
|
||||
auto slice_3 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_4 = make_slice(data, 1, 2, 3, 3);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0, slice_1, slice_2, slice_3, slice_4}, 0);
|
||||
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
manager.register_pass<ov::pass::SharedOpOptimization>();
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
|
||||
auto slice_0 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_2 = make_slice(data, 1, 3, 3, 3);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0, slice_0, slice_2, slice_0, slice_0}, 0);
|
||||
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SharedTransformationTestsF, SharedRecursively) {
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
|
||||
auto slice_0 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_1 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_2 = make_slice(data, 1, 3, 3, 3);
|
||||
|
||||
auto tile_0_0 = make_tile(slice_0, {1, 2, 3, 4});
|
||||
auto transpose_0_0 = make_reshape(slice_0, {0, 0, 0, -1});
|
||||
auto tile_0_1 = make_tile(slice_0, {1, 2, 3, 4});
|
||||
auto transpose_0_1 = make_reshape(slice_0, {0, 0, 0, -1});
|
||||
auto tile_0_2 = make_tile(slice_0, {1, 2, 3, 4});
|
||||
auto transpose_0_2 = make_reshape(slice_0, {0, 0, 0, -1});
|
||||
|
||||
auto tile_1_0 = make_tile(slice_1, {1, 2, 3, 4});
|
||||
auto transpose_1_0 = make_reshape(slice_1, {0, 0, 0, -1});
|
||||
auto tile_1_1 = make_tile(slice_1, {1, 2, 3, 4});
|
||||
auto transpose_1_1 = make_reshape(slice_1, {0, 0, 0, -1});
|
||||
auto tile_1_2 = make_tile(slice_1, {1, 2, 3, 4});
|
||||
auto transpose_1_2 = make_reshape(slice_1, {0, 0, 0, -1});
|
||||
|
||||
auto tile_2_0 = make_tile(slice_2, {1, 2, 3, 4});
|
||||
auto transpose_2_0 = make_reshape(slice_2, {0, 0, 0, -1});
|
||||
auto tile_2_1 = make_tile(slice_2, {1, 2, 3, 4});
|
||||
auto transpose_2_1 = make_reshape(slice_2, {0, 0, 0, -1});
|
||||
auto tile_2_2 = make_tile(slice_2, {1, 2, 3, 4});
|
||||
auto transpose_2_2 = make_reshape(slice_2, {0, 0, 0, -1});
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(
|
||||
OutputVector{// source from slice 0
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
tile_0_1,
|
||||
transpose_0_1,
|
||||
tile_0_2,
|
||||
transpose_0_2,
|
||||
// source from slice 1
|
||||
tile_1_0,
|
||||
transpose_1_0,
|
||||
tile_1_1,
|
||||
transpose_1_1,
|
||||
tile_1_2,
|
||||
transpose_1_2,
|
||||
// source from slice 2
|
||||
tile_2_0,
|
||||
transpose_2_0,
|
||||
tile_2_1,
|
||||
transpose_2_1,
|
||||
tile_2_2,
|
||||
transpose_2_2},
|
||||
0);
|
||||
|
||||
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
manager.register_pass<ov::pass::SharedOpOptimization>();
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
|
||||
auto slice_0 = make_slice(data, 1, 2, 3, 3);
|
||||
auto slice_2 = make_slice(data, 1, 3, 3, 3);
|
||||
|
||||
auto tile_0_0 = make_tile(slice_0, {1, 2, 3, 4});
|
||||
auto transpose_0_0 = make_reshape(slice_0, {0, 0, 0, -1});
|
||||
|
||||
auto tile_2_0 = make_tile(slice_2, {1, 2, 3, 4});
|
||||
auto transpose_2_0 = make_reshape(slice_2, {0, 0, 0, -1});
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(
|
||||
OutputVector{// source from slice 0
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
// source from slice 0
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
tile_0_0,
|
||||
transpose_0_0,
|
||||
// source from slice 2
|
||||
tile_2_0,
|
||||
transpose_2_0,
|
||||
tile_2_0,
|
||||
transpose_2_0,
|
||||
tile_2_0,
|
||||
transpose_2_0},
|
||||
0);
|
||||
|
||||
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SharedTransformationTestsF, SharedConcat) {
|
||||
{
|
||||
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
|
||||
auto pre_constant_1 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
|
||||
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
|
||||
|
||||
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
|
||||
auto concat_1 = std::make_shared<v0::Concat>(OutputVector{pre_constant_1, data, post_constant}, 0);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{concat_0, concat_1}, 0);
|
||||
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
manager.register_pass<ov::pass::SharedOpOptimization>();
|
||||
}
|
||||
{
|
||||
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
|
||||
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
|
||||
|
||||
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{concat_0, concat_0}, 0);
|
||||
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SharedTransformationTestsF, SharedSliceInThreeGroups) {
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape::dynamic(10));
|
||||
|
||||
auto slice_0_0 = make_slice(data, 1, 2, 3, 4);
|
||||
auto slice_1_0 = make_slice(data, 2, 3, 4, 5);
|
||||
auto slice_2_0 = make_slice(data, 3, 4, 5, 6);
|
||||
|
||||
auto slice_0_1 = make_slice(data, 1, 2, 3, 4);
|
||||
auto slice_1_1 = make_slice(data, 2, 3, 4, 5);
|
||||
auto slice_2_1 = make_slice(data, 3, 4, 5, 6);
|
||||
|
||||
auto slice_0_2 = make_slice(data, 1, 2, 3, 4);
|
||||
auto slice_1_2 = make_slice(data, 2, 3, 4, 5);
|
||||
auto slice_2_2 = make_slice(data, 3, 4, 5, 6);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0_0,
|
||||
slice_1_0,
|
||||
slice_2_0,
|
||||
slice_0_1,
|
||||
slice_1_1,
|
||||
slice_2_1,
|
||||
slice_0_2,
|
||||
slice_1_2,
|
||||
slice_2_2},
|
||||
0);
|
||||
|
||||
model = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
manager.register_pass<ov::pass::SharedOpOptimization>();
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape::dynamic(10));
|
||||
|
||||
auto slice_0_0 = make_slice(data, 1, 2, 3, 4);
|
||||
auto slice_1_0 = make_slice(data, 2, 3, 4, 5);
|
||||
auto slice_2_0 = make_slice(data, 3, 4, 5, 6);
|
||||
|
||||
auto concat = std::make_shared<v0::Concat>(OutputVector{slice_0_0,
|
||||
slice_1_0,
|
||||
slice_2_0,
|
||||
slice_0_0,
|
||||
slice_1_0,
|
||||
slice_2_0,
|
||||
slice_0_0,
|
||||
slice_1_0,
|
||||
slice_2_0},
|
||||
0);
|
||||
|
||||
model_ref = std::make_shared<ov::Model>(OutputVector{concat}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SharedTransformationTestsF, SharedConcatCheckOpWithResultIsntReplaced) {
|
||||
{
|
||||
auto pre_constant_0 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
|
||||
auto pre_constant_1 = v0::Constant::create(element::f32, Shape{4}, std::vector<float>{3.14f, 42.f, 0.f, 14.f});
|
||||
auto data = std::make_shared<v0::Parameter>(element::f32, PartialShape{-1});
|
||||
auto post_constant = v0::Constant::create(element::f32, Shape{1}, std::vector<float>{3.14f});
|
||||
|
||||
auto concat_0 = std::make_shared<v0::Concat>(OutputVector{pre_constant_0, data, post_constant}, 0);
|
||||
auto concat_1 = std::make_shared<v0::Concat>(OutputVector{pre_constant_1, data, post_constant}, 0);
|
||||
|
||||
model = std::make_shared<ov::Model>(OutputVector{concat_0, concat_1}, ParameterVector{data});
|
||||
manager.register_pass<ov::pass::SharedOpOptimization>();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user