[GPU] Apply fusing/reshape changes into master. (#8002)

* [GPU] Apply fusing/reshape code into master.

Merge prepare_primitive_fusing, handle_reshape into master.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply code-review.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply code-review

Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
hyunback kim
2021-10-20 09:08:39 +09:00
committed by GitHub
parent 03424849fb
commit 6a63cc1936
3 changed files with 41 additions and 6 deletions

View File

@@ -32,7 +32,9 @@ void handle_reshape::run(program& p) {
auto input_lay = input_node.get_output_layout();
auto output_lay = node.get_output_layout();
if (!node.is_in_place())
if (!node.is_in_place() ||
!node.get_fused_activations_funcs().empty() ||
node.has_fused_primitives())
return;
auto are_layouts_identical = program_helpers::are_layouts_identical(input_lay, output_lay);
@@ -43,6 +45,9 @@ void handle_reshape::run(program& p) {
input_node.set_output_layout(output_lay, false);
p.add_optimized_primitive_info(node.id());
p.extract_and_remove(node);
} else if (are_layouts_identical.second) {
p.add_optimized_primitive_info(node.id());
node.can_be_optimized(true);
}
});
}

View File

@@ -55,6 +55,7 @@
void prepare_primitive_fusing::run(program& p) {
fuse_reorders(p);
remove_redundant_reshape(p);
fuse_sigmoid_mul_to_swish(p);
fuse_bias(p);
fuse_simple_primitives(p);
@@ -62,6 +63,25 @@ void prepare_primitive_fusing::run(program& p) {
optimize_fused_ops(p);
}
void prepare_primitive_fusing::remove_redundant_reshape(program &p) {
auto node_itr = p.get_processing_order().begin();
while (node_itr != p.get_processing_order().end()) {
auto node = (*node_itr++);
program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
auto input_lay = node.input().get_output_layout();
auto output_lay = node.get_output_layout();
if (!node.is_in_place())
return;
if (program_helpers::are_layouts_identical(input_lay, output_lay).first) {
p.add_optimized_primitive_info(node.id());
p.extract_and_remove(node);
}
});
}
}
void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) {
auto itr = p.get_processing_order().begin();
while (itr != p.get_processing_order().end()) {
@@ -592,12 +612,13 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
};
auto eltwise_supports_fusings = [&](eltwise_node& node) -> bool {
auto out_layout = node.get_output_layout();
if (out_layout.data_type == data_types::f16 && out_layout.size.batch[0] > 1 &&
(_lo.get_optimization_attributes().fs_b_yx_fsv32_network || out_layout.format == format::fs_b_yx_fsv32)) {
return false;
if (_lo.get_optimization_attributes().use_onednn_impls == 0) {
auto out_layout = node.get_output_layout();
if (out_layout.data_type == data_types::f16 && out_layout.size.batch[0] > 1 &&
(_lo.get_optimization_attributes().fs_b_yx_fsv32_network || out_layout.format == format::fs_b_yx_fsv32)) {
return false;
}
}
return true;
};
@@ -976,6 +997,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
auto fused_node = parents[fused_idx];
auto peer_node = parents[peer_idx];
if (_lo.get_optimization_attributes().use_onednn_impls) {
auto eltw_in_size = peer_node->get_output_layout().size;
// Temporary disable mul fusion with full tensor as onednn doesn't support it
if (fused_node->is_type<convolution>() && prim->mode == eltwise_mode::prod &&
(eltw_in_size.spatial[0] > 1 || eltw_in_size.spatial[1] > 1 || eltw_in_size.batch[0] > 1))
return;
}
if (parent1->is_type<convolution>() && !conv_supports_fusings(parent1->as<convolution>()))
return;

View File

@@ -196,6 +196,7 @@ private:
void fuse_activations(program& p);
void fuse_simple_primitives(program &p);
void optimize_fused_ops(program &p);
void remove_redundant_reshape(program &p);
layout_optimizer& _lo;
};