[GPU] Removed some redundant internal passes (#17552)

This commit is contained in:
Vladimir Paramuzov
2023-05-19 13:34:42 +04:00
committed by GitHub
parent 36dbe95d9e
commit b95aa84b45
7 changed files with 0 additions and 378 deletions

View File

@@ -1,33 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "prior_box_inst.h"
#include "program_node.h"
#include "intel_gpu/graph/program.hpp"
#include <memory>
using namespace cldnn;
void calculate_prior_boxes::run(program& p) {
auto itr = p.get_processing_order().begin();
while (itr != p.get_processing_order().end()) {
auto& node = (*itr++);
if (!node->is_type<prior_box>())
continue;
auto& pb_node = node->as<prior_box>();
if (pb_node.get_primitive()->support_opset8) {
continue;
}
pb_node.calc_result();
p.remove_connection(pb_node.input(), pb_node);
auto result = pb_node.get_result_buffer();
auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", result));
p.replace(pb_node, data_node);
}
}

View File

@@ -1,79 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/runtime/tensor.hpp"
#include "pass_manager.h"
#include "convolution_inst.h"
#include "eltwise_inst.h"
#include <memory>
using namespace cldnn;
void eltwise_remove_stride::conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor) {
// make sure we have only 1 user
if (node.get_users().size() > 1)
return;
const auto conv = std::static_pointer_cast<const convolution>(node.get_primitive());
auto weights_node_ptr = p.get_node_ptr(conv->weights);
auto filter_size = weights_node_ptr->get_output_layout().get_tensor();
// make sure this is conv 1x1
if (filter_size.spatial[0] == 1 && filter_size.spatial[1] == 1) {
auto deps = node.get_dependencies();
for (const auto& dep : deps) {
if (dep.first->is_type<convolution>()) {
conv_stride_extend(p, *dep.first, tensor);
dep.first->recalc_output_layout(true);
break;
}
}
node.recalc_output_layout(true);
} else {
bool can_shrink_x = (filter_size.spatial[0] >= (static_cast<int64_t>(conv->stride[1]) + (tensor.spatial[0] - 1)));
bool can_shrink_y = (filter_size.spatial[1] >= (static_cast<int64_t>(conv->stride[0]) + (tensor.spatial[1] - 1)));
if (can_shrink_x && can_shrink_y) {
auto c = const_cast<convolution*>(&(*conv));
c->stride[1] += tensor.spatial[0] - 1;
c->stride[0] += tensor.spatial[1] - 1;
node.recalc_output_layout(true);
tensor.spatial[0] = 1;
tensor.spatial[1] = 1;
}
}
}
void eltwise_remove_stride::run(program& p) {
for (auto& node : p.get_processing_order()) {
if (node->is_type<eltwise>()) {
// TODO: make fp16 work
if (node->get_output_layout().data_type != data_types::i8 &&
node->get_output_layout().data_type != data_types::f32) {
if (node->get_output_layout().data_type != data_types::f16 ||
(node->get_output_layout().format != format::yxfb &&
node->get_output_layout().format != format::b_fs_yx_fsv16)) {
continue;
}
}
if (node->get_output_layout().get_spatial_rank() != 2)
continue;
const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
if (!eltw->stride.empty()) {
auto deps = node->get_dependencies();
for (size_t i = 0; i < deps.size(); i++) {
auto dep = deps[i];
// TODO: add other primitives beside convolution here
if (dep.first->is_type<convolution>()) {
auto e = const_cast<eltwise*>(&(*eltw));
conv_stride_extend(p, *dep.first, e->stride[i]);
}
}
}
}
}
}

View File

@@ -1,111 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "eltwise_inst.h"
#include <vector>
using namespace cldnn;
void eltwise_shrinking::run(program& p) {
std::vector<program_node*> convs_to_shrink;
for (auto& node : p.get_processing_order()) {
if (node->is_type<eltwise>()) {
if (!node->is_in_data_flow())
continue;
if (node->get_output_layout().data_type != data_types::i8 &&
node->get_output_layout().data_type != data_types::f32) {
if (node->get_output_layout().data_type != data_types::f16 ||
(node->get_output_layout().format != format::yxfb &&
node->get_output_layout().format != format::b_fs_yx_fsv16)) {
continue;
}
}
if (node->get_output_layout().format == format::fs_b_yx_fsv32)
continue;
const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
// TODO: support cases which already have stride!
if (eltw->stride.empty() && !node->get_users().empty()) {
bool can_shrink = true;
size_t stride_x = 0;
size_t stride_y = 0;
convs_to_shrink.clear();
auto users = node->get_users();
for (auto user : users) {
// currently we can shrink only if users are convolutions
if (!user->is_type<convolution>()) {
can_shrink = false;
break;
}
const auto conv = std::static_pointer_cast<const convolution>(user->get_primitive());
// Check that eltwise is not an input of operation fused to convolution
if (user->get_dependency(0).id() != eltw->id) {
can_shrink = false;
break;
}
auto filter_size = user->as<convolution>().weights().get_output_layout().get_tensor();
// make sure this is conv 1x1
if (filter_size.spatial[0] != 1 || filter_size.spatial[1] != 1 || conv->stride.size() != 2) {
can_shrink = false;
break;
}
// make sure convolution can accept shrinked input by modifying stride
if (conv->stride[0] > 1 || conv->stride[1] > 1) {
if (stride_x == 0)
stride_x = conv->stride[1];
if (stride_y == 0)
stride_y = conv->stride[0];
// make sure stride across all eltwise's convolution users is the same
if (conv->stride[1] != stride_x || conv->stride[0] != stride_y) {
can_shrink = false;
break;
}
convs_to_shrink.push_back(user);
} else {
can_shrink = false;
break;
}
}
if (can_shrink) {
// add stride for every eltwise's inputs to have shrinked output
auto e = const_cast<eltwise*>(&(*eltw));
for (size_t dep = 0; dep < e->input_size(); dep++) {
auto dep_stride_x = stride_x;
auto dep_stride_y = stride_y;
// don't shrink if input is broadcasted
if (node->get_dependency(dep).get_output_layout().spatial(0) == 1) {
dep_stride_x = 1;
}
if (node->get_dependency(dep).get_output_layout().spatial(1) == 1) {
dep_stride_y = 1;
}
e->stride.push_back({0, 0, static_cast<tensor::value_type>(dep_stride_x), static_cast<tensor::value_type>(dep_stride_y)});
}
node->recalc_output_layout();
// change stride on every convolution
for (size_t i = 0; i < convs_to_shrink.size(); i++) {
const auto conv =
std::static_pointer_cast<const convolution>(convs_to_shrink[i]->get_primitive());
auto c = const_cast<convolution*>(&(*conv));
c->stride[0] = 1;
c->stride[1] = 1;
convs_to_shrink[i]->recalc_output_layout();
}
}
}
}
}
}

View File

@@ -1,70 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pass_manager.h"
#include "program_node.h"
#include "layout_optimizer.h"
#include "intel_gpu/graph/program.hpp"
#include "program_helpers.h"
#include "fully_connected_inst.h"
using namespace cldnn;
pre_optimize_bias::pre_optimize_bias(reorder_factory& rf_ref) : base_pass("pre_optimize_bias"), _rf(rf_ref) {}
void pre_optimize_bias::run(program& p) { run(p, _rf); }
// function which prepares given primitive for weights optimization
template <typename T>
bool pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program& p) {
size_t weights_offset = node.get_primitive()->input.size();
size_t bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
bool bias_optimized = false;
for (size_t i = bias_offset; i < node.get_dependencies().size() - node.get_fused_inputs_count(); ++i) {
// find weights primitive with given pimitive_id and add it to weights_optimizer
const program_node& bias = node.get_dependency(i);
auto new_layout = layout(bias.get_output_layout().data_type,
format::bfyx,
{ 1, static_cast<tensor::value_type>(bias.get_output_layout().count()), 1, 1 });
auto reorder = rf.get_reorder(bias.id(),
bias.get_output_layout(),
new_layout);
if (reorder.first) {
p.add_intermediate(reorder.first, node, i, !reorder.second);
bias_optimized = true;
}
}
return bias_optimized;
}
template bool pre_optimize_bias::optimize_bias<convolution_node>(convolution_node& node,
reorder_factory& rf,
program& p);
template bool pre_optimize_bias::optimize_bias<deconvolution_node>(deconvolution_node& node,
reorder_factory& rf,
program& p);
template bool pre_optimize_bias::optimize_bias<fully_connected_node>(fully_connected_node& node,
reorder_factory& rf,
program& p);
void pre_optimize_bias::run(program& p, reorder_factory& rf) {
bool bias_optimized = false;
for (auto& prim : p.get_processing_order()) {
if (prim->type() == convolution::type_id()) {
bool ret = optimize_bias(prim->as<convolution>(), rf, p);
bias_optimized = bias_optimized || ret;
} else if (prim->type() == deconvolution::type_id()) {
bool ret = optimize_bias(prim->as<deconvolution>(), rf, p);
bias_optimized = bias_optimized || ret;
} else if (prim->type() == fully_connected::type_id()) {
bool ret = optimize_bias(prim->as<fully_connected>(), rf, p);
bias_optimized = bias_optimized || ret;
}
}
if (bias_optimized) {
for (auto n : p.get_processing_order()) {
n->recalc_output_layout(true);
}
}
}

View File

@@ -68,14 +68,6 @@ private:
void run(program& p) override;
};
class calculate_prior_boxes : public base_pass {
public:
calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
private:
void run(program& p) override;
};
class compile_graph : public base_pass {
public:
compile_graph() : base_pass("compile_graph") {}
@@ -84,23 +76,6 @@ private:
void run(program& p) override;
};
class eltwise_shrinking : public base_pass {
public:
eltwise_shrinking() : base_pass("eltwise_shrinking") {}
private:
void run(program& p) override;
};
class eltwise_remove_stride : public base_pass {
public:
eltwise_remove_stride() : base_pass("eltwise_remove_stride") {}
private:
void run(program& p) override;
void conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor);
};
class graph_initializations : public base_pass {
public:
graph_initializations() : base_pass("init") {}
@@ -206,18 +181,6 @@ private:
layout_optimizer& _lo;
};
class pre_optimize_bias : public base_pass {
public:
explicit pre_optimize_bias(reorder_factory& rf_ref);
private:
void run(program& p) override;
virtual void run(program& p, reorder_factory& rf);
template <typename T>
bool optimize_bias(T& node, reorder_factory& rf, program& p);
reorder_factory& _rf;
};
class prepare_padding : public base_pass {
public:
explicit prepare_padding(bool output_size_handling_enabled_switch)

View File

@@ -488,8 +488,6 @@ void program::init_graph() {
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "Program::init_graph");
apply_opt_pass<graph_initializations>();
apply_opt_pass<calculate_prior_boxes>();
apply_opt_pass<mark_nodes>();
}
@@ -534,17 +532,6 @@ void program::pre_optimize_graph(bool is_internal) {
// but after format selection to select correct alignment.
// Unfortunately those passes currently happen in reverse order.
apply_opt_pass<concat_input_order>();
// TODO this code should be moved to post compilation after kernel selector will support handling reorder bias
apply_opt_pass<pre_optimize_bias>(rf);
// passes regarding conv + eltwise optimizations
// shrinking eltwise if users are conv 1x1 with stride > 1 optimization
apply_opt_pass<eltwise_shrinking>();
// trying to set stride to 1x1 by shrinking convolutions before eltwise if doable
apply_opt_pass<eltwise_remove_stride>();
}
apply_opt_pass<strided_slice_optimize>();

View File

@@ -2797,24 +2797,6 @@ public:
};
class testing_removal_reorder : public ReorderTest<reorder_test_param> {};
TEST_P(testing_removal_reorder, removal_reorder_1d_along_f) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
data("weights", get_mem(get_weights_layout(p))),
data("bias1", get_mem(get_bias_layout(p))),
reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
convolution("conv_prim", input_info("reorder_input"), "weights", "", 1, p.stride, {1, 1}, p.pad, p.pad, false),
reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
);
execute(p, false);
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
}
// Testing bugfix not to remove reorder in front of conv has deep depth input
TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input) {
auto p = GetParam();
@@ -3236,23 +3218,6 @@ TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_differen
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
}
TEST_P(testing_removal_reorder, removal_reorder_1d_along_f_cached) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
data("weights", get_mem(get_weights_layout(p))),
data("bias1", get_mem(get_bias_layout(p))),
reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
convolution("conv_prim", input_info("reorder_input"), "weights", "", 1, p.stride, {1, 1}, p.pad, p.pad, false),
reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
);
execute(p, true);
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
}
#endif
TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input_cached) {