From 6a63cc1936633d41963596b2b47feb3ce10e8e7f Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Wed, 20 Oct 2021 09:08:39 +0900 Subject: [PATCH] [GPU] Apply fusing/reshape changes into master. (#8002) * [GPU] Apply fusing/reshape code into master. Merge prepare_primitive_fusing, handle_reshape into master. Signed-off-by: hyunback * Apply code-review. Signed-off-by: hyunback * Apply code-review Signed-off-by: hyunback --- .../src/graph_optimizer/handle_reshape.cpp | 7 +++- .../prepare_primitive_fusing.cpp | 39 ++++++++++++++++--- .../clDNN/src/include/pass_manager.h | 1 + 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp index 288f52de7a9..64117505da6 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp @@ -32,7 +32,9 @@ void handle_reshape::run(program& p) { auto input_lay = input_node.get_output_layout(); auto output_lay = node.get_output_layout(); - if (!node.is_in_place()) + if (!node.is_in_place() || + !node.get_fused_activations_funcs().empty() || + node.has_fused_primitives()) return; auto are_layouts_identical = program_helpers::are_layouts_identical(input_lay, output_lay); @@ -43,6 +45,9 @@ void handle_reshape::run(program& p) { input_node.set_output_layout(output_lay, false); p.add_optimized_primitive_info(node.id()); p.extract_and_remove(node); + } else if (are_layouts_identical.second) { + p.add_optimized_primitive_info(node.id()); + node.can_be_optimized(true); } }); } diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index dd415e71f1e..aef93029ce1 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -55,6 +55,7 @@ void prepare_primitive_fusing::run(program& p) { fuse_reorders(p); + remove_redundant_reshape(p); fuse_sigmoid_mul_to_swish(p); fuse_bias(p); fuse_simple_primitives(p); @@ -62,6 +63,25 @@ void prepare_primitive_fusing::run(program& p) { optimize_fused_ops(p); } +void prepare_primitive_fusing::remove_redundant_reshape(program &p) { + auto node_itr = p.get_processing_order().begin(); + while (node_itr != p.get_processing_order().end()) { + auto node = (*node_itr++); + program_helpers::do_for_types(*node, [&p](reshape_node& node) { + auto input_lay = node.input().get_output_layout(); + auto output_lay = node.get_output_layout(); + + if (!node.is_in_place()) + return; + + if (program_helpers::are_layouts_identical(input_lay, output_lay).first) { + p.add_optimized_primitive_info(node.id()); + p.extract_and_remove(node); + } + }); + } +} + void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) { auto itr = p.get_processing_order().begin(); while (itr != p.get_processing_order().end()) { @@ -592,12 +612,13 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { }; auto eltwise_supports_fusings = [&](eltwise_node& node) -> bool { - auto out_layout = node.get_output_layout(); - if (out_layout.data_type == data_types::f16 && out_layout.size.batch[0] > 1 && - (_lo.get_optimization_attributes().fs_b_yx_fsv32_network || out_layout.format == format::fs_b_yx_fsv32)) { - return false; + if (_lo.get_optimization_attributes().use_onednn_impls == 0) { + auto out_layout = node.get_output_layout(); + if (out_layout.data_type == data_types::f16 && out_layout.size.batch[0] > 1 && + (_lo.get_optimization_attributes().fs_b_yx_fsv32_network || out_layout.format == format::fs_b_yx_fsv32)) { + return false; + } } - return true; }; @@ -976,6 +997,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { auto fused_node = parents[fused_idx]; auto peer_node = parents[peer_idx]; + if (_lo.get_optimization_attributes().use_onednn_impls) { + auto eltw_in_size = peer_node->get_output_layout().size; + // Temporary disable mul fusion with full tensor as onednn doesn't support it + if (fused_node->is_type() && prim->mode == eltwise_mode::prod && + (eltw_in_size.spatial[0] > 1 || eltw_in_size.spatial[1] > 1 || eltw_in_size.batch[0] > 1)) + return; + } + if (parent1->is_type() && !conv_supports_fusings(parent1->as())) return; diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h index 156f21dfa2e..670dac7adc6 100644 --- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h +++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h @@ -196,6 +196,7 @@ private: void fuse_activations(program& p); void fuse_simple_primitives(program &p); void optimize_fused_ops(program &p); + void remove_redundant_reshape(program &p); layout_optimizer& _lo; };