[GPU] Removed some redundant internal passes (#17552)

2023-05-19 13:34:42 +04:00
parent 36dbe95d9e
commit b95aa84b45
7 changed files with 0 additions and 378 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/calculate_prior_boxes.cpp
@@ -1,33 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass_manager.h"
-#include "prior_box_inst.h"
-#include "program_node.h"
-#include "intel_gpu/graph/program.hpp"
-#include <memory>
-
-using namespace cldnn;
-
-void calculate_prior_boxes::run(program& p) {
-    auto itr = p.get_processing_order().begin();
-    while (itr != p.get_processing_order().end()) {
-        auto& node = (*itr++);
-        if (!node->is_type<prior_box>())
-            continue;
-
-        auto& pb_node = node->as<prior_box>();
-        if (pb_node.get_primitive()->support_opset8) {
-            continue;
-        }
-
-        pb_node.calc_result();
-        p.remove_connection(pb_node.input(), pb_node);
-
-        auto result = pb_node.get_result_buffer();
-
-        auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", result));
-        p.replace(pb_node, data_node);
-    }
-}
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/eltwise_remove_stride.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/eltwise_remove_stride.cpp
@@ -1,79 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "intel_gpu/runtime/tensor.hpp"
-
-#include "pass_manager.h"
-
-#include "convolution_inst.h"
-#include "eltwise_inst.h"
-
-#include <memory>
-
-using namespace cldnn;
-
-void eltwise_remove_stride::conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor) {
-    // make sure we have only 1 user
-    if (node.get_users().size() > 1)
-        return;
-
-    const auto conv = std::static_pointer_cast<const convolution>(node.get_primitive());
-    auto weights_node_ptr = p.get_node_ptr(conv->weights);
-    auto filter_size = weights_node_ptr->get_output_layout().get_tensor();
-    // make sure this is conv 1x1
-    if (filter_size.spatial[0] == 1 && filter_size.spatial[1] == 1) {
-        auto deps = node.get_dependencies();
-        for (const auto& dep : deps) {
-            if (dep.first->is_type<convolution>()) {
-                conv_stride_extend(p, *dep.first, tensor);
-                dep.first->recalc_output_layout(true);
-                break;
-            }
-        }
-        node.recalc_output_layout(true);
-    } else {
-        bool can_shrink_x = (filter_size.spatial[0] >= (static_cast<int64_t>(conv->stride[1]) + (tensor.spatial[0] - 1)));
-        bool can_shrink_y = (filter_size.spatial[1] >= (static_cast<int64_t>(conv->stride[0]) + (tensor.spatial[1] - 1)));
-        if (can_shrink_x && can_shrink_y) {
-            auto c = const_cast<convolution*>(&(*conv));
-            c->stride[1] += tensor.spatial[0] - 1;
-            c->stride[0] += tensor.spatial[1] - 1;
-            node.recalc_output_layout(true);
-            tensor.spatial[0] = 1;
-            tensor.spatial[1] = 1;
-        }
-    }
-}
-
-void eltwise_remove_stride::run(program& p) {
-    for (auto& node : p.get_processing_order()) {
-        if (node->is_type<eltwise>()) {
-            // TODO: make fp16 work
-            if (node->get_output_layout().data_type != data_types::i8 &&
-                node->get_output_layout().data_type != data_types::f32) {
-                if (node->get_output_layout().data_type != data_types::f16 ||
-                    (node->get_output_layout().format != format::yxfb &&
-                     node->get_output_layout().format != format::b_fs_yx_fsv16)) {
-                    continue;
-                }
-            }
-
-            if (node->get_output_layout().get_spatial_rank() != 2)
-                continue;
-
-            const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
-            if (!eltw->stride.empty()) {
-                auto deps = node->get_dependencies();
-                for (size_t i = 0; i < deps.size(); i++) {
-                    auto dep = deps[i];
-                    // TODO: add other primitives beside convolution here
-                    if (dep.first->is_type<convolution>()) {
-                        auto e = const_cast<eltwise*>(&(*eltw));
-                        conv_stride_extend(p, *dep.first, e->stride[i]);
-                    }
-                }
-            }
-        }
-    }
-}
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/eltwise_shrinking.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/eltwise_shrinking.cpp
@@ -1,111 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass_manager.h"
-#include "eltwise_inst.h"
-#include <vector>
-
-using namespace cldnn;
-
-void eltwise_shrinking::run(program& p) {
-    std::vector<program_node*> convs_to_shrink;
-
-    for (auto& node : p.get_processing_order()) {
-        if (node->is_type<eltwise>()) {
-            if (!node->is_in_data_flow())
-                continue;
-
-            if (node->get_output_layout().data_type != data_types::i8 &&
-                node->get_output_layout().data_type != data_types::f32) {
-                if (node->get_output_layout().data_type != data_types::f16 ||
-                    (node->get_output_layout().format != format::yxfb &&
-                     node->get_output_layout().format != format::b_fs_yx_fsv16)) {
-                    continue;
-                }
-            }
-
-            if (node->get_output_layout().format == format::fs_b_yx_fsv32)
-                continue;
-
-            const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
-            // TODO: support cases which already have stride!
-            if (eltw->stride.empty() && !node->get_users().empty()) {
-                bool can_shrink = true;
-                size_t stride_x = 0;
-                size_t stride_y = 0;
-                convs_to_shrink.clear();
-                auto users = node->get_users();
-                for (auto user : users) {
-                    // currently we can shrink only if users are convolutions
-                    if (!user->is_type<convolution>()) {
-                        can_shrink = false;
-                        break;
-                    }
-
-                    const auto conv = std::static_pointer_cast<const convolution>(user->get_primitive());
-
-                    // Check that eltwise is not an input of operation fused to convolution
-                    if (user->get_dependency(0).id() != eltw->id) {
-                        can_shrink = false;
-                        break;
-                    }
-
-                    auto filter_size = user->as<convolution>().weights().get_output_layout().get_tensor();
-                    // make sure this is conv 1x1
-                    if (filter_size.spatial[0] != 1 || filter_size.spatial[1] != 1 || conv->stride.size() != 2) {
-                        can_shrink = false;
-                        break;
-                    }
-
-                    // make sure convolution can accept shrinked input by modifying stride
-                    if (conv->stride[0] > 1 || conv->stride[1] > 1) {
-                        if (stride_x == 0)
-                            stride_x = conv->stride[1];
-                        if (stride_y == 0)
-                            stride_y = conv->stride[0];
-
-                        // make sure stride across all eltwise's convolution users is the same
-                        if (conv->stride[1] != stride_x || conv->stride[0] != stride_y) {
-                            can_shrink = false;
-                            break;
-                        }
-                        convs_to_shrink.push_back(user);
-                    } else {
-                        can_shrink = false;
-                        break;
-                    }
-                }
-                if (can_shrink) {
-                    // add stride for every eltwise's inputs to have shrinked output
-                    auto e = const_cast<eltwise*>(&(*eltw));
-                    for (size_t dep = 0; dep < e->input_size(); dep++) {
-                        auto dep_stride_x = stride_x;
-                        auto dep_stride_y = stride_y;
-                        // don't shrink if input is broadcasted
-                        if (node->get_dependency(dep).get_output_layout().spatial(0) == 1) {
-                            dep_stride_x = 1;
-                        }
-
-                        if (node->get_dependency(dep).get_output_layout().spatial(1) == 1) {
-                            dep_stride_y = 1;
-                        }
-
-                        e->stride.push_back({0, 0, static_cast<tensor::value_type>(dep_stride_x), static_cast<tensor::value_type>(dep_stride_y)});
-                    }
-                    node->recalc_output_layout();
-
-                    // change stride on every convolution
-                    for (size_t i = 0; i < convs_to_shrink.size(); i++) {
-                        const auto conv =
-                            std::static_pointer_cast<const convolution>(convs_to_shrink[i]->get_primitive());
-                        auto c = const_cast<convolution*>(&(*conv));
-                        c->stride[0] = 1;
-                        c->stride[1] = 1;
-                        convs_to_shrink[i]->recalc_output_layout();
-                    }
-                }
-            }
-        }
-    }
-}
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_optimize_bias.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_optimize_bias.cpp
@@ -1,70 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "pass_manager.h"
-#include "program_node.h"
-#include "layout_optimizer.h"
-#include "intel_gpu/graph/program.hpp"
-#include "program_helpers.h"
-#include "fully_connected_inst.h"
-
-using namespace cldnn;
-
-pre_optimize_bias::pre_optimize_bias(reorder_factory& rf_ref) : base_pass("pre_optimize_bias"), _rf(rf_ref) {}
-
-void pre_optimize_bias::run(program& p) { run(p, _rf); }
-
-// function which prepares given primitive for weights optimization
-template <typename T>
-bool pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program& p) {
-    size_t weights_offset = node.get_primitive()->input.size();
-    size_t bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
-    bool bias_optimized = false;
-    for (size_t i = bias_offset; i < node.get_dependencies().size() - node.get_fused_inputs_count(); ++i) {
-        // find weights primitive with given pimitive_id and add it to weights_optimizer
-        const program_node& bias = node.get_dependency(i);
-        auto new_layout = layout(bias.get_output_layout().data_type,
-                                 format::bfyx,
-                                 { 1, static_cast<tensor::value_type>(bias.get_output_layout().count()), 1, 1 });
-        auto reorder = rf.get_reorder(bias.id(),
-                                      bias.get_output_layout(),
-                                      new_layout);
-
-        if (reorder.first) {
-            p.add_intermediate(reorder.first, node, i, !reorder.second);
-            bias_optimized = true;
-        }
-    }
-    return bias_optimized;
-}
-template bool pre_optimize_bias::optimize_bias<convolution_node>(convolution_node& node,
-                                                                 reorder_factory& rf,
-                                                                 program& p);
-template bool pre_optimize_bias::optimize_bias<deconvolution_node>(deconvolution_node& node,
-                                                                   reorder_factory& rf,
-                                                                   program& p);
-template bool pre_optimize_bias::optimize_bias<fully_connected_node>(fully_connected_node& node,
-                                                                     reorder_factory& rf,
-                                                                     program& p);
-
-void pre_optimize_bias::run(program& p, reorder_factory& rf) {
-    bool bias_optimized = false;
-    for (auto& prim : p.get_processing_order()) {
-        if (prim->type() == convolution::type_id()) {
-            bool ret = optimize_bias(prim->as<convolution>(), rf, p);
-            bias_optimized = bias_optimized || ret;
-        } else if (prim->type() == deconvolution::type_id()) {
-            bool ret = optimize_bias(prim->as<deconvolution>(), rf, p);
-            bias_optimized = bias_optimized || ret;
-        } else if (prim->type() == fully_connected::type_id()) {
-            bool ret = optimize_bias(prim->as<fully_connected>(), rf, p);
-            bias_optimized = bias_optimized || ret;
-        }
-    }
-    if (bias_optimized) {
-        for (auto n : p.get_processing_order()) {
-            n->recalc_output_layout(true);
-        }
-    }
-}
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@@ -68,14 +68,6 @@ private:
    void run(program& p) override;
 };

-class calculate_prior_boxes : public base_pass {
-public:
-    calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
-
-private:
-    void run(program& p) override;
-};
-
 class compile_graph : public base_pass {
 public:
    compile_graph() : base_pass("compile_graph") {}
@@ -84,23 +76,6 @@ private:
    void run(program& p) override;
 };

-class eltwise_shrinking : public base_pass {
-public:
-    eltwise_shrinking() : base_pass("eltwise_shrinking") {}
-
-private:
-    void run(program& p) override;
-};
-
-class eltwise_remove_stride : public base_pass {
-public:
-    eltwise_remove_stride() : base_pass("eltwise_remove_stride") {}
-
-private:
-    void run(program& p) override;
-    void conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor);
-};
-
 class graph_initializations : public base_pass {
 public:
    graph_initializations() : base_pass("init") {}
@@ -206,18 +181,6 @@ private:
    layout_optimizer& _lo;
 };

-class pre_optimize_bias : public base_pass {
-public:
-    explicit pre_optimize_bias(reorder_factory& rf_ref);
-
-private:
-    void run(program& p) override;
-    virtual void run(program& p, reorder_factory& rf);
-    template <typename T>
-    bool optimize_bias(T& node, reorder_factory& rf, program& p);
-    reorder_factory& _rf;
-};
-
 class prepare_padding : public base_pass {
 public:
    explicit prepare_padding(bool output_size_handling_enabled_switch)
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -488,8 +488,6 @@ void program::init_graph() {
    OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "Program::init_graph");
    apply_opt_pass<graph_initializations>();

-    apply_opt_pass<calculate_prior_boxes>();
-
    apply_opt_pass<mark_nodes>();
 }

@@ -534,17 +532,6 @@ void program::pre_optimize_graph(bool is_internal) {
        // but after format selection to select correct alignment.
        // Unfortunately those passes currently happen in reverse order.
        apply_opt_pass<concat_input_order>();
-
-        // TODO this code should be moved to post compilation after kernel selector will support handling reorder bias
-        apply_opt_pass<pre_optimize_bias>(rf);
-
-        // passes regarding conv + eltwise optimizations
-
-        // shrinking eltwise if users are conv 1x1 with stride > 1 optimization
-        apply_opt_pass<eltwise_shrinking>();
-
-        // trying to set stride to 1x1 by shrinking convolutions before eltwise if doable
-        apply_opt_pass<eltwise_remove_stride>();
    }

    apply_opt_pass<strided_slice_optimize>();
--- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp
@@ -2797,24 +2797,6 @@ public:
 };

 class testing_removal_reorder : public ReorderTest<reorder_test_param> {};
-TEST_P(testing_removal_reorder, removal_reorder_1d_along_f) {
-    auto p = GetParam();
-    create_topologies(input_layout("input", get_input_layout(p)),
-                reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
-                data("weights", get_mem(get_weights_layout(p))),
-                data("bias1", get_mem(get_bias_layout(p))),
-                reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
-                convolution("conv_prim", input_info("reorder_input"), "weights", "", 1, p.stride, {1, 1}, p.pad, p.pad, false),
-                reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
-                eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
-                reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
-    );
-
-    execute(p, false);
-
-    ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
-}
-
 // Testing bugfix not to remove reorder in front of conv has deep depth input
 TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input) {
    auto p = GetParam();
@@ -3236,23 +3218,6 @@ TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_differen
    compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
 }

-TEST_P(testing_removal_reorder, removal_reorder_1d_along_f_cached) {
-    auto p = GetParam();
-    create_topologies(input_layout("input", get_input_layout(p)),
-                reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
-                data("weights", get_mem(get_weights_layout(p))),
-                data("bias1", get_mem(get_bias_layout(p))),
-                reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
-                convolution("conv_prim", input_info("reorder_input"), "weights", "", 1, p.stride, {1, 1}, p.pad, p.pad, false),
-                reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
-                eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
-                reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
-    );
-
-    execute(p, true);
-
-    ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
-}
 #endif

 TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input_cached) {