From 3d79bd1ac52554a77fcca2e4a53191a3e350a6f6 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 16 Jun 2023 10:33:53 +0400 Subject: [PATCH] [GPU] Minor layout optimizer refactoring (#17553) --- .../intel_gpu/src/graph/concatenation.cpp | 6 +- src/plugins/intel_gpu/src/graph/eltwise.cpp | 8 +- src/plugins/intel_gpu/src/graph/gemm.cpp | 2 +- .../graph/graph_optimizer/compile_graph.cpp | 2 +- .../graph/graph_optimizer/handle_reshape.cpp | 2 +- .../graph_optimizer/pre_replace_deconv.cpp | 11 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 4 +- .../prepare_primitive_fusing.cpp | 52 ++-- .../prepare_primitive_fusing_through.cpp | 4 +- .../graph_optimizer/prepare_quantization.cpp | 6 +- .../remove_redundant_reorders.cpp | 12 +- .../src/graph/impls/ocl/quantize.cpp | 2 +- .../src/graph/include/concatenation_inst.h | 1 - .../src/graph/include/condition_inst.h | 2 +- .../src/graph/include/convert_color_inst.h | 1 - .../src/graph/include/cum_sum_inst.h | 1 - .../src/graph/include/eltwise_inst.h | 1 - .../src/graph/include/embedding_bag_inst.h | 1 - .../intel_gpu/src/graph/include/gemm_inst.h | 1 - .../src/graph/include/layout_optimizer.h | 16 +- .../src/graph/include/program_node.h | 25 +- .../src/graph/include/quantize_inst.h | 1 - .../src/graph/include/reorder_inst.h | 1 - .../intel_gpu/src/graph/include/select_inst.h | 1 - .../intel_gpu/src/graph/layout_optimizer.cpp | 289 ++++++++---------- .../intel_gpu/src/graph/primitive_inst.cpp | 4 +- src/plugins/intel_gpu/src/graph/program.cpp | 4 +- .../intel_gpu/src/graph/program_node.cpp | 5 +- src/plugins/intel_gpu/src/graph/select.cpp | 2 +- .../tests/unit/passes/handle_reshape.cpp | 10 +- 30 files changed, 220 insertions(+), 257 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/concatenation.cpp b/src/plugins/intel_gpu/src/graph/concatenation.cpp index 06df7fe4128..4b70dd14647 100644 --- a/src/plugins/intel_gpu/src/graph/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/concatenation.cpp @@ -82,18 +82,18 @@ std::string concatenation_inst::to_string(concatenation_node const& node) { std::stringstream ss_inputs; std::stringstream primitive_description; - for (size_t i = 0; i < node.inputs_count(); ++i) { + for (size_t i = 0; i < node.get_inputs_count(); ++i) { ss_inputs << node.input(i).id(); if (node.input(i).get_output_layout().is_static()) ss_inputs << ", count: " << node.input(i).get_output_layout().count(); else ss_inputs << ", count: " << "?"; - i != (node.inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << ""; + i != (node.get_inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << ""; } json_composite concat_info; concat_info.add("concat axis", desc->axis); - concat_info.add("inputs count", node.inputs_count()); + concat_info.add("inputs count", node.get_inputs_count()); concat_info.add("inputs", ss_inputs.str()); node_info->add("concat info", concat_info); diff --git a/src/plugins/intel_gpu/src/graph/eltwise.cpp b/src/plugins/intel_gpu/src/graph/eltwise.cpp index c954900b898..785b4eda921 100644 --- a/src/plugins/intel_gpu/src/graph/eltwise.cpp +++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp @@ -305,7 +305,7 @@ std::string eltwise_inst::to_string(eltwise_node const& node) { } json_composite eltwise_info; - for (size_t i = 0; i < node.inputs_count(); i++) { + for (size_t i = 0; i < node.get_inputs_count(); i++) { eltwise_info.add("input_" + std::to_string(i), node.input(i).id()); } eltwise_info.add("mode", str_mode); @@ -322,7 +322,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : check_inputs_count(node); // check for stride auto prim = node.get_primitive(); - auto inputs_count = node.inputs_count(); + auto inputs_count = node.get_inputs_count(); if (is_dynamic()) return; @@ -363,10 +363,10 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : } } else { bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer); - auto input0_pshape = node.input().get_output_layout().get_partial_shape(); + auto input0_pshape = node.get_input_pshape(0); for (size_t i = 1; i < inputs_count; ++i) { - auto input_pshape = node.input(i).get_output_layout().get_partial_shape(); + auto input_pshape = node.get_input_pshape(i); if (input0_pshape.size() > input_pshape.size()) { if (use_new_shape_infer) { diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp index 0234d11d086..c801f029e1c 100644 --- a/src/plugins/intel_gpu/src/graph/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/gemm.cpp @@ -216,7 +216,7 @@ std::string gemm_inst::to_string(gemm_node const& node) { std::stringstream primitive_description; json_composite gemm_info; - for (size_t i = 0; i < node.inputs_count(); i++) { + for (size_t i = 0; i < node.get_inputs_count(); i++) { gemm_info.add("input_" + std::to_string(i), node.input(i).id()); } gemm_info.add("alpha", alpha); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index 1938c76e1ed..b2fa05c7ff2 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -59,7 +59,7 @@ void compile_graph::run(program& p) { // TODO: need to come up with better handling of unsupported shape agnostic cases // e.g. process exceptions from choose_impl() and ignore those for dynamic parameters - if (node->is_type() && node->is_dynamic() && node->get_output_layout().get_partial_shape().size() > 3) + if (node->is_type() && node->is_dynamic() && node->get_output_pshape().size() > 3) can_select_impl = false; // TODO: Remove this WA once we have shape agnostic arg_max_min_axis kernel with non-const k input diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp index 95ea2410ad7..a30c7d2666f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp @@ -104,7 +104,7 @@ void handle_reshape::run(program& p) { if (user->is_type() || user->is_type()) { bool is_fc = user->is_type(); auto wei_dt = is_fc ? user->as().weights().get_output_layout().data_type : - user->as().get_dependency(1).get_output_layout().data_type; + user->as().get_input_layout(1).data_type; onednn_support = layout_optimizer::onednn_check_data_types_for_fc_gemm(output_data_type, wei_dt, out_dt); } else if (user->is_type() || user->is_type()) { bool is_conv = user->is_type(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp index a70e951cf58..f9a6978f065 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp @@ -39,6 +39,7 @@ void pre_replace_deconv::run(program& p) { auto weights_nodes_id = deconv_prim->weights; auto biases_nodes_id = deconv_prim->bias; auto& input_node = deconv_node.get_dependency(0); + auto input_layout = deconv_node.get_input_layout(0); const primitive_id deconv_node_id = deconv_node.id(); const primitive_id& input_node_id = input_node.id(); @@ -50,12 +51,12 @@ void pre_replace_deconv::run(program& p) { bool perform_opt = false; // fp16 and fp32 bfyx implementation supports transposed convolution - perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 && - (input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) && - !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) && + perform_opt |= cldnn::format::dimension(input_layout.format) == 4 && + (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) && + !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_layout.format == format::b_fs_yx_fsv16) && _lo.is_format_optimized(deconv_node, format::b_fs_yx_fsv16)); // int8/uint8 input - perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8); + perform_opt |= (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8); if (!perform_opt) continue; @@ -64,7 +65,7 @@ void pre_replace_deconv::run(program& p) { // setting convolution parameters based on deconvolution params auto output_layout = deconv_node.get_output_layout(); auto output_pshape = output_layout.get_partial_shape(); - auto input_pshape = input_node.get_output_layout().get_partial_shape(); + auto input_pshape = input_layout.get_partial_shape(); auto spatial_rank = output_layout.get_spatial_rank(); auto stride = deconv_prim->stride; auto pad = deconv_prim->pad; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 7532a0d842c..4432f7b7ade 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -342,7 +342,7 @@ void prepare_buffer_fusing::run(program& p) { auto can_optimize = [](const program_node* node) { bool is_dynamic = node->is_dynamic(); bool is_planar = format::is_default_format(node->get_output_layout().format); - bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding; + bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layout(0).data_padding; if (node->is_type() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) { return true; } @@ -398,7 +398,7 @@ void prepare_buffer_fusing::run(program& p) { const auto& crop_layout = node.get_output_layout(); auto format = crop_layout.format; auto crop_prim = node.get_primitive(); - auto input_layout = node.get_dependency(0).get_output_layout(); + auto input_layout = node.get_input_layout(0); const auto& crop_size = crop_layout.get_tensor(); const auto& out_padd = crop_layout.data_padding; auto opt_lower_pad = crop_prim->offsets.feature[0]; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 4ad51dae1cf..6ca6a654d6a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -184,7 +184,7 @@ void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) { p.get_processing_order().insert_next(&input, &swish); - swish.calc_output_layout(); + swish.recalc_output_layout(); }); } } @@ -291,10 +291,10 @@ void prepare_primitive_fusing::fuse_bias(program &p) { // Change out_features value to proper dimension for 3D FC case if (is_3d_fully_connected(node->get_dependency(0))) { - out_features = node->get_dependency(0).get_output_layout().spatial(1); + out_features = node->get_input_layout(0).spatial(1); is_3d_fc = true; } else if (is_3d_fully_connected(node->get_dependency(1))) { - out_features = node->get_dependency(1).get_output_layout().spatial(1); + out_features = node->get_input_layout(1).spatial(1); is_3d_fc = true; } auto& const_dep = eltw_node.get_dependency(const_dep_idx); @@ -486,7 +486,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { continue; auto is_grouped_conv = [](convolution_node& node) -> bool { - auto in_layout = node.get_dependency(0).get_output_layout(); + auto in_layout = node.get_input_layout(0); return (node.get_groups() > 1 && node.get_groups() != static_cast(in_layout.feature())); }; @@ -504,7 +504,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { // Since reorder inputs is called after this pass // we have to check that blocked formats can be used in the network and layer is optimized for it. if ((node.get_output_layout().format == format::b_fs_yx_fsv16 || - _lo.should_select_b_fs_yx_fsv16_layout(node, node.get_dependency(1).get_output_layout())) && + _lo.should_select_b_fs_yx_fsv16_layout(node, node.get_input_layout(1))) && !is_grouped_conv(node)) return true; @@ -517,7 +517,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { _lo.is_format_optimized(node, format::fs_b_yx_fsv32) && node.get_primitive()->groups == 1))) return true; - const size_t in_feature = node.get_dependency(0).get_output_layout().feature(); + const size_t in_feature = node.get_input_layout(0).feature(); if ((node.get_output_layout().format == format::b_fs_zyx_fsv16 || (_lo.is_format_optimized(node, format::b_fs_zyx_fsv16) && _lo.get_optimization_attributes().b_fs_zyx_fsv16_network)) && in_feature != 3) @@ -534,7 +534,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { if (node.get_output_layout().format == format::bs_fs_yx_bsv32_fsv16 || _lo.is_format_optimized(node, format::bs_fs_yx_bsv32_fsv16)) return true; - auto in_dt = node.get_dependency(0).get_output_layout().data_type; + auto in_dt = node.get_input_layout(0).data_type; // TODO: check if that's enough for correct work return data_type_traits::is_i8_u8(in_dt); @@ -547,7 +547,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { if (eltw_node.get_dependencies().size() < 2) return false; - auto const_layout = eltw_node.get_dependency(1).get_output_layout(); + auto const_layout = eltw_node.get_input_layout(1); auto conv_layout = conv_node.get_output_layout(); auto per_channel_eltwise = const_layout.feature() == conv_layout.feature(); @@ -564,17 +564,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { _lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) { return true; } else { - auto in_dt = node.get_dependency(0).get_output_layout().data_type; + auto in_dt = node.get_input_layout(0).data_type; return data_type_traits::is_i8_u8(in_dt); } }; auto gemm_supports_fusings = [](gemm_node& node) -> bool { bool does_support_fusings = false; - auto in0_dt = node.get_dependency(0).get_output_layout().data_type; - auto in1_dt = node.get_dependency(1).get_output_layout().data_type; - auto in0_fmt = node.get_dependency(0).get_output_layout().format; - auto in1_fmt = node.get_dependency(1).get_output_layout().format; + auto in0_dt = node.get_input_layout(0).data_type; + auto in1_dt = node.get_input_layout(1).data_type; + auto in0_fmt = node.get_input_layout(0).format; + auto in1_fmt = node.get_input_layout(1).format; if (data_type_traits::is_floating_point(in0_dt) && data_type_traits::is_floating_point(in1_dt)) @@ -582,9 +582,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { if (data_type_traits::is_i8_u8(in0_dt) && in0_fmt == format::bfyx && data_type_traits::is_i8_u8(in1_dt) && in1_fmt == format::bfyx) { - if (node.inputs_count() == 3) { - auto in2_dt = node.get_dependency(2).get_output_layout().data_type; - auto in2_fmt = node.get_dependency(2).get_output_layout().format; + if (node.get_inputs_count() == 3) { + auto in2_dt = node.get_input_layout(2).data_type; + auto in2_fmt = node.get_input_layout(2).format; does_support_fusings = data_type_traits::is_i8_u8(in2_dt) && in2_fmt == format::bfyx ? true : false; } else { does_support_fusings = true; @@ -595,7 +595,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { }; auto mvn_supports_fusings = [](mvn_node& node, bool for_eltwise = false) -> bool { - auto in_layout = node.get_dependency(0).get_output_layout(); + auto in_layout = node.get_input_layout(0); if (node.get_primitive()->requires_alignment(in_layout.get_partial_shape())) return false; return data_type_traits::is_i8_u8(in_layout.data_type) || for_eltwise; @@ -608,8 +608,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { auto& eltw = static_cast(*node.get_users().front()->get_primitive()); auto& conv = node.get_dependency(0).as(); auto eltw_mode = eltw.mode == eltwise_mode::sum; - auto conv_size = conv.get_dependency(0).get_output_layout().spatial(0) % 128 == 0 && - conv.get_dependency(0).get_output_layout().spatial(1) % 2 == 0; + auto conv_size = conv.get_input_layout(0).spatial(0) % 128 == 0 && + conv.get_input_layout(0).spatial(1) % 2 == 0; auto format = conv.get_output_layout().format == format::bfyx; auto dt = conv.get_output_layout().data_type == data_types::f16; if (eltw_mode && conv_size && format && dt) @@ -743,7 +743,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { should_fuse |= input.is_type(); - should_fuse |= input.is_type() && data_type_traits::is_i8_u8(input.get_dependency(0).get_output_layout().data_type); + should_fuse |= input.is_type() && data_type_traits::is_i8_u8(input.get_input_layout(0).data_type); should_fuse |= input.is_type(); @@ -832,7 +832,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { auto out_layout = quantize_node.get_output_layout(); auto in_layout = input_data.get_output_layout(); auto out_dt = out_layout.data_type; - auto in_dt = input_data.get_dependency(0).get_output_layout().data_type; + auto in_dt = input_data.get_input_layout(0).data_type; auto out_dt_is_i8_u8 = data_type_traits::is_i8_u8(out_dt); auto in_dt_is_i8_u8 = data_type_traits::is_i8_u8(in_dt); @@ -858,7 +858,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { ((out_dt == data_types::f32 || out_dt == data_types::f16) || in_layout.format == format::b_fs_yx_fsv16 || in_layout.format == format::bs_fs_yx_bsv32_fsv16 || - (_lo.should_select_b_fs_yx_fsv16_layout(input_data.as(), input_data.get_dependency(1).get_output_layout()) && + (_lo.should_select_b_fs_yx_fsv16_layout(input_data.as(), input_data.get_input_layout(1)) && !is_grouped_conv(input_data.as())) || // Avoid fusing to b_fs_yx_fsv16 (and similar) kernels _lo.get_optimization_attributes().use_onednn_impls || @@ -935,7 +935,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { eltwise_mode::div }; - if (node.is_output() || node.inputs_count() != 2 || + if (node.is_output() || node.get_inputs_count() != 2 || std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() || !prim->stride.empty()) return; @@ -1008,9 +1008,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { // E.g. parent1 [?,?,768], parent2 [?,?,1] // expected eltw out shape: [?,?,768] // but w/o this check we can fuse eltwise to parent2 and return [?,?,1] as output shape which is unexpected - auto parent1_pshape = parent1.first->get_output_layout().get_partial_shape(); - auto parent2_pshape = parent2.first->get_output_layout().get_partial_shape(); - auto out_pshape = node.get_output_layout().get_partial_shape(); + auto parent1_pshape = parent1.first->get_output_pshape(0); + auto parent2_pshape = parent2.first->get_output_pshape(0); + auto out_pshape = node.get_output_pshape(0); auto are_compatible = [](const ov::PartialShape& out_shape, const ov::PartialShape& in_shape) -> bool { if (out_shape.rank().get_length() != in_shape.rank().get_length()) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp index 0d09ca97139..c85e4cbd8de 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp @@ -39,7 +39,7 @@ void prepare_primitive_fusing_through::run(program& p) { return false; if (node->is_type() && - node->get_output_layout().data_type != node->get_dependency(0).get_output_layout().data_type) + node->get_output_layout().data_type != node->get_input_layout(0).data_type) return false; // Not to fuse reshape after Reduce changing the order of un-reduced axes. It is expected to be optimized out. @@ -48,7 +48,7 @@ void prepare_primitive_fusing_through::run(program& p) { // Not to raise up target node through reshape where the size of dimension is changed (e.g. Unsqueeze) if (node->is_type() && - node->get_output_layout().get_partial_shape().size() != node->get_dependency(0).get_output_layout().get_partial_shape().size()) + node->get_output_pshape().size() != node->get_input_pshape(0).size()) return false; return true; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp index 50adb3d21b0..ba09b64676e 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp @@ -444,7 +444,7 @@ void prepare_quantization::remove_fake_reorders(program& p, reorder_node& reorde auto &usr = reorder_node.get_users().front(); auto &dep = reorder_node.get_dependency(0); - if (!(usr->is_type() && usr->get_dependency(1).get_output_layout().data_type == data_types::i8) || + if (!(usr->is_type() && usr->get_input_layout(1).data_type == data_types::i8) || !dep.is_input() || dep.get_output_layout().data_type != data_types::u8 || (reorder_node.get_output_layout().data_type != data_types::f32 && reorder_node.get_output_layout().data_type != data_types::f16) || @@ -492,8 +492,8 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti if (node.get_users().size() != 1) return false; - auto in0_layout = node.get_dependency(0).get_output_layout(); - auto in1_layout = node.get_dependency(1).get_output_layout(); + auto in0_layout = node.get_input_layout(0); + auto in1_layout = node.get_input_layout(1); if (!node.get_dependency(1).is_type()) return false; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 8a160a4f1dc..8bd5dbb74d8 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -279,7 +279,7 @@ void remove_redundant_reorders::run(program& p) { continue; auto o_layout = r_node.get_output_layout(); - auto i_layout = r_node.get_dependency(0).get_output_layout(); + auto i_layout = r_node.get_input_layout(0); // Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer, // but pads need to be handled correctly. @@ -513,9 +513,9 @@ void remove_redundant_reorders::run(program& p) { return false; auto node_format = node->get_output_layout().format; - for (size_t axis = 0; axis < node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format).size(); axis++) { + for (size_t axis = 0; axis < node->get_input_layout(0).data_padding.lower_size().sizes(node_format).size(); axis++) { if (!user->is_padding_supported(static_cast(axis), - node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format)[axis])) + node->get_input_layout(0).data_padding.lower_size().sizes(node_format)[axis])) return false; } } @@ -580,7 +580,7 @@ void remove_redundant_reorders::run(program& p) { // Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter fused_primitive_desc local_desc(node->get_primitive()); - local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout + local_desc.input_layout = input.get_input_layout(0); // original convolution's output layout node->set_input_layout(local_desc.input_layout); local_desc.f_param = node->get_fuse_params(); local_desc.outer_dep_start_idx = -1; @@ -654,7 +654,7 @@ void remove_redundant_reorders::run(program& p) { bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() && !reshape_input_node.has_fused_primitives(); bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() && - reshape_input_node.get_dependency(0).get_output_layout() == reshape_node.get_output_layout() && + reshape_input_node.get_input_layout(0) == reshape_node.get_output_layout() && !reshape_node.has_fused_primitives(); if (remove_dep) { @@ -692,7 +692,7 @@ void remove_redundant_reorders::run(program& p) { for (auto n : p.get_processing_order()) { if (n->is_in_data_flow() && n->is_type()) { - auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_dependency(0).get_output_layout().format); + auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_input_layout(0).format); n->set_preferred_impl_type(preferred_impl); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp index a7adbaf1e9e..88624da50d1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp @@ -76,7 +76,7 @@ public: quantize_params.out_scale = arg.get_output_scale_val(); quantize_params.out_shift = arg.get_output_shift_val(); - for (size_t i = 1; i < arg.inputs_count(); i++) { + for (size_t i = 1; i < arg.get_inputs_count(); i++) { quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i])); } diff --git a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h index a83a1aee2b0..21fdf3fedab 100644 --- a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h @@ -23,7 +23,6 @@ public: program_node& input(size_t idx = 0) const { return get_dependency(idx); } - size_t inputs_count() const { return desc->input.size(); } std::vector get_shape_infer_dependencies() const override { return {}; } }; diff --git a/src/plugins/intel_gpu/src/graph/include/condition_inst.h b/src/plugins/intel_gpu/src/graph/include/condition_inst.h index e3a5a12758c..9562d38d151 100644 --- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h @@ -36,7 +36,7 @@ private: program::ptr _program = nullptr; void add_or_change_input_layout(const program_node& node) { - auto layout = node.get_dependency(0).get_output_layout(); + auto layout = node.get_input_layout(0); auto input_id = node.as().result_id(); if (_topology.get_primitives().count(input_id) == 0) { _topology.add_primitive(std::make_shared(input_id, layout)); diff --git a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h index 1e41d95c09a..e7001d74154 100644 --- a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h @@ -15,7 +15,6 @@ struct typed_program_node : public typed_program_node_baseinput.size(); } }; using convert_color_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h index 2f5538e46e2..3209383bad6 100644 --- a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h @@ -15,7 +15,6 @@ public: using parent::parent; program_node& input(size_t index = 0) const { return get_dependency(index); } - size_t inputs_count() const { return get_dependencies().size(); } std::vector get_shape_infer_dependencies() const override { return {}; } }; diff --git a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h index e1a7256e2cc..dba8ed8dab0 100644 --- a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h @@ -30,7 +30,6 @@ public: } program_node& input(size_t idx = 0) const { return get_dependency(idx); } - size_t inputs_count() const { return get_primitive()->input.size(); } std::shared_ptr get_fuse_params() const override { return std::make_shared(typed_desc()); diff --git a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h index 1d4f15ec776..2fb5c34f2a3 100644 --- a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h @@ -17,7 +17,6 @@ public: using parent::parent; program_node& input(size_t index = 0) const { return get_dependency(index); } - size_t inputs_count() const { return get_dependencies().size(); } }; using embedding_bag_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h index fe81d863548..d6a52f018fc 100644 --- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h @@ -17,7 +17,6 @@ public: using parent::parent; program_node& input(size_t idx = 0) const { return get_dependency(idx); } - size_t inputs_count() const { return this->get_primitive()->input_size(); } std::vector get_shape_infer_dependencies() const override { return {}; } }; diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index 739788a570f..de382fe3d51 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -16,6 +16,7 @@ #include "deconvolution_inst.h" #include "detection_output_inst.h" #include "binary_convolution_inst.h" +#include "quantize_inst.h" #include #include @@ -111,18 +112,9 @@ private: size_t _total_conv; std::map, size_t> _optimized_conv_count; - layout get_expected_layout(layout const& current_layout, - convolution_node const& node, - layout const& output_or_weights_layout); - layout get_expected_layout(layout const& current_layout, - deconvolution_node const& node, - layout const& output_or_weights_layout); - layout get_expected_layout(layout const& current_layout, - detection_output_node const& node, - layout const& output_or_weights_layout); - layout get_expected_layout(layout const& current_layout, - binary_convolution_node const& node, - layout const& output_or_weights_layout); + format get_expected_format(convolution_node const& node); + format get_expected_format(deconvolution_node const& node); + format get_expected_format(quantize_node const& node); bool is_depthwise(const convolution_node& node) const; format imad_case(convolution_node const& node) const; diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 32d6da5f8d8..21937f94b65 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -166,6 +166,11 @@ public: program_node& get_dependency(size_t idx) const { return *dependencies.at(idx).first; } std::pair get_dependency_with_port(size_t idx) const { return dependencies.at(idx); } + // Count of original primitive inputs, i.e. it doesn't include fused dependencies + size_t get_inputs_count() const { return desc->input_size(); } + // Count of original primitive outputs + size_t get_outputs_count() const { return desc->output_size(); } + std::vector const get_input_layouts() const { std::vector layouts; for (const auto& i : dependencies) { @@ -174,6 +179,20 @@ public: return layouts; } + layout get_input_layout(size_t idx = 0) const { + return get_dependency(idx).get_output_layout(false); + } + + ov::PartialShape get_input_pshape(size_t idx = 0) const { + return get_input_layout(idx).get_partial_shape(); + } + + ov::PartialShape get_output_pshape(size_t idx = 0) const { + if (!is_valid_output_layout(idx)) + return calc_output_layouts()[idx].get_partial_shape(); + return get_output_layout(idx).get_partial_shape(); + } + // replaces idx-th dependency of 'this' with 'new_dep', calls program::remove_if_dangling(old_dep) void replace_dependency(size_t idx, program_node& new_dep, bool remove_if_dangling = true); // searches for 'old_dep' in dependencies list of 'this' and replaces it with 'new_dep', calls @@ -185,8 +204,8 @@ public: void remove_dependency(size_t idx); void remove_dependency(program_node& node); - size_t get_dependency_index(program_node& node) const; - size_t get_user_index(program_node& node) const; + size_t get_dependency_index(const program_node& node) const; + size_t get_user_index(const program_node& node) const; std::set get_memory_dependencies() const; void add_memory_dependency(primitive_id); @@ -242,8 +261,6 @@ public: bool set_output_layout(layout& new_layout, bool invalidate_users_if_changed = true, size_t idx = 0); bool set_output_layouts(std::vector& new_layout, bool invalidate_users_if_changed = true); - size_t get_outputs_count() const { return num_outputs; } - // forces recalculation of cached output layout, invalidates users if new layout is different than previous one and // @p invalidate_users_if_changed is set to true returns whether output layout has changed bool recalc_output_layout(bool invalidate_users_if_changed = true); diff --git a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h index 0336178ff57..ef3be9ba34c 100644 --- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h @@ -141,7 +141,6 @@ public: using parent::parent; program_node& input(size_t index = 0) const { return get_dependency(index); } - size_t inputs_count() const { return get_dependencies().size(); } int get_levels() const { return get_primitive()->levels; } bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::bin; } bool get_scale_shift_opt() const { return get_primitive()->scale_shift_opt; } diff --git a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h index d7b8e3f3be5..c7b881ada90 100644 --- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h @@ -29,7 +29,6 @@ public: support_padding_all(true); } - size_t inputs_count() const { return get_primitive()->input.size(); } program_node& mean_nv12() const { return get_dependency(2); } program_node& input(size_t idx = 0) const { return get_dependency(idx); } program_node& mean() const { return get_dependency(1); } diff --git a/src/plugins/intel_gpu/src/graph/include/select_inst.h b/src/plugins/intel_gpu/src/graph/include/select_inst.h index ce6b9d67109..e904d998757 100644 --- a/src/plugins/intel_gpu/src/graph/include/select_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h @@ -17,7 +17,6 @@ public: using parent::parent; program_node& input(size_t idx = 0) const { return get_dependency(idx); } - size_t inputs_count() const { return get_dependencies().size(); } std::vector get_shape_infer_dependencies() const override { return {}; } }; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 2669d3c918f..2680623efce 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -183,8 +183,8 @@ bool layout_optimizer::is_format_supported(program_node& node, format::type fmt) return false; if (node.is_type() && fmt == format::b_fs_yx_fsv16 && - node.get_dependency(0).get_output_layout().data_type != data_types::i8 && - node.get_dependency(0).get_output_layout().data_type != data_types::u8) + node.get_input_layout(0).data_type != data_types::i8 && + node.get_input_layout(0).data_type != data_types::u8) return false; if (node.is_type()) @@ -271,7 +271,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, next.get_preferred_impl_type() == impl_types::onednn && ((fmt_prev == format::byxf && fmt_next == format::byxf) || (fmt_prev == format::bfyx && fmt_next == format::byxf && - (prev_dt == data_types::f16 && next.get_dependency(0).get_output_layout().feature() <= 8))) && + (prev_dt == data_types::f16 && next.get_input_layout(0).feature() <= 8))) && is_input_reorder(prev, next)) return true; @@ -531,8 +531,8 @@ layout_optimizer::layout_optimizer(bool output_size_handling_enabled) } bool layout_optimizer::is_depthwise(const convolution_node& node) const { - const int32_t output_channels = node.get_output_layout().feature(); - const int32_t input_channels = node.get_dependency(0).get_output_layout().feature(); + const int32_t output_channels = node.get_output_layout(0).feature(); + const int32_t input_channels = node.get_input_layout(0).feature(); return node.get_groups() == static_cast(input_channels) && input_channels == output_channels; } @@ -667,7 +667,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(const layout& input_layout, bool layout_optimizer::should_select_b_fs_yx_fsv16_layout(convolution_node const& node, layout const& weights_layout) { auto prim = node.get_primitive(); - auto input_layout = node.get_dependency(0).get_output_layout(); + auto input_layout = node.get_input_layout(0); auto const cond_denom = _total_conv > 0 ? 1.0f / static_cast(_total_conv) : 1.0f; auto fully_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, false}); auto partially_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, true}); @@ -843,8 +843,8 @@ static bool is_node_for_onednn(reduce_node const& node, format preferred_format) static bool is_node_for_onednn(deconvolution_node const& node) { auto prim = node.get_primitive(); - auto input_layout = node.get_dependency(0).get_output_layout(); - auto output_layout = node.get_output_layout(); + auto input_layout = node.get_input_layout(0); + auto output_layout = node.get_output_layout(0); if (input_layout.is_dynamic() || output_layout.is_dynamic()) return false; @@ -920,7 +920,7 @@ bool layout_optimizer::users_for_convolution_byxf_opt(program_node const& node, } else if (user->type() == cldnn::convolution::type_id()) { if (convolution_byxf_opt(node.get_output_layout(), user->calc_output_layout(), - user->get_dependency(1).get_output_layout(), + user->get_input_layout(1), user->as())) { if (!users_for_convolution_byxf_opt(*user, depth - 1)) return false; @@ -1032,18 +1032,15 @@ bool layout_optimizer::is_mixed_layout(program_node& prev, program_node& next, b return false; } -layout layout_optimizer::get_expected_layout(layout const& current_layout, - convolution_node const& node, - layout const& weights_layout) { +format layout_optimizer::get_expected_format(convolution_node const& node) { auto prim = node.get_primitive(); - auto expected_data_type = current_layout.data_type; - auto expected_format = current_layout.format; - auto input_layout = node.get_dependency(0).get_output_layout(); - auto output_layout = node.calc_output_layout(); + auto input_layout = node.get_input_layout(0); + auto output_layout = node.get_output_layout(0); + auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(prim->grouped_weights_shape); + auto expected_format = output_layout.format; if (prim->deformable_mode) { - output_layout.format = format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size()); - return output_layout; + return format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size()); } if (input_layout.is_dynamic() || output_layout.is_dynamic()) { @@ -1051,10 +1048,9 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, expected_format = format::b_fs_yx_fsv16; else if (input_layout.get_partial_shape().size() == 5) expected_format = format::b_fs_zyx_fsv16; - return layout(current_layout.get_partial_shape(), expected_data_type, expected_format); + return expected_format; } - auto expected_tensor = current_layout.get_tensor(); const float cond_denom = _total_conv > 0 ? 1.0f / static_cast(_total_conv) : 1.0f; bool onednn_valid_post_ops = get_post_ops_count(node) <= 32; @@ -1079,22 +1075,18 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, } else { expected_format = imad_case(node); } - expected_tensor = current_layout.get_tensor(); } else if (_optimization_attributes.b_fs_zyx_fsv16_network && convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim)) { - expected_tensor = current_layout.get_tensor(); - if ((current_layout.data_type == data_types::f32 && current_layout.batch() % 16 == 0) || - (current_layout.data_type == data_types::f16 && current_layout.batch() % 32 == 0)) + if ((output_layout.data_type == data_types::f32 && output_layout.batch() % 16 == 0) || + (output_layout.data_type == data_types::f16 && output_layout.batch() % 32 == 0)) expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16; else expected_format = cldnn::format::b_fs_zyx_fsv16; - } else if (current_layout.format == format::bfzyx) { - expected_tensor = current_layout.get_tensor(); + } else if (output_layout.format == format::bfzyx) { expected_format = cldnn::format::bfzyx; } else if (_optimization_attributes.bs_fs_yx_bsv16_fsv16_network && convolution_bs_fs_yx_bsv16_fsv16_opt(node.input().get_output_layout(), output_layout, weights_layout, prim)) { - expected_tensor = current_layout.get_tensor(); expected_format = cldnn::format::bs_fs_yx_bsv16_fsv16; } else if (_optimization_attributes.fs_b_yx_fsv32_network && !node.get_transposed() && ((convolution_fs_b_yx_fsv32_opt(input_layout, @@ -1109,72 +1101,67 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, // 2-nd: the previous conv primitive supports fs_b_yx_fsv32 layout and // current conv primitives supports this one with weak restrictions - // that should be cheaper than reordering data to another layout - expected_tensor = current_layout.get_tensor(); expected_format = format::fs_b_yx_fsv32; } else if (should_select_b_fs_yx_fsv16_layout(node, weights_layout)) { - expected_tensor = current_layout.get_tensor(); expected_format = cldnn::format::b_fs_yx_fsv16; - } else if (current_layout.data_type == data_types::f16 && - layout_optimizer::convolution_byxf_opt(input_layout, current_layout, weights_layout, node) && + } else if (output_layout.data_type == data_types::f16 && + layout_optimizer::convolution_byxf_opt(input_layout, output_layout, weights_layout, node) && (users_for_convolution_byxf_opt(node, 2) || deps_for_convolution_byxf_opt(node, 2)) && // todo: remove this condition when yxfb optimizations will be disabled - current_layout.format != cldnn::format::yxfb && current_layout.batch() == 1) { - expected_tensor = current_layout.get_tensor(); + output_layout.format != cldnn::format::yxfb && output_layout.batch() == 1) { expected_format = cldnn::format::byxf; - } else if (current_layout.format == format::b_fs_yx_fsv4 || - current_layout.format == format::os_is_yx_osv16_isv4) { + } else if (output_layout.format == format::b_fs_yx_fsv4 || + output_layout.format == format::os_is_yx_osv16_isv4) { // imad case // nothing to do, just go out from here. - } else if (layout_optimizer::convolution_bfyx_opt(current_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) { - expected_tensor = current_layout.get_tensor(); - if (current_layout.format == format::b_fs_zyx_fsv16 || current_layout.format == format::bs_fs_zyx_bsv16_fsv16) - expected_format = cldnn::format::bfzyx; - else - expected_format = cldnn::format::bfyx; + } else if (layout_optimizer::convolution_bfyx_opt(output_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) { + { + if (output_layout.format == format::b_fs_zyx_fsv16 || output_layout.format == format::bs_fs_zyx_bsv16_fsv16) + expected_format = cldnn::format::bfzyx; + else + expected_format = cldnn::format::bfyx; + } } else { - expected_tensor = current_layout.get_tensor(); expected_format = cldnn::format::yxfb; } } - return layout(expected_data_type, expected_format, expected_tensor); + return expected_format; } -layout layout_optimizer::get_expected_layout(layout const& current_layout, - deconvolution_node const& node, - layout const& output_or_weights_layout) { +format layout_optimizer::get_expected_format(deconvolution_node const& node) { auto prim = node.get_primitive(); - auto expected_data_type = current_layout.data_type; - auto expected_format = current_layout.format; - auto input_layout = node.get_dependency(0).get_output_layout(); - auto output_layout = node.calc_output_layout(); + auto input_layout = node.get_input_layout(0); + auto output_layout = node.get_output_layout(0); + auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(node.get_primitive()->grouped_weights_shape); + auto expected_format = output_layout.format; if (input_layout.is_dynamic() || output_layout.is_dynamic()) { if (input_layout.get_partial_shape().size() <= 4) expected_format = format::b_fs_yx_fsv16; else if (input_layout.get_partial_shape().size() == 5) expected_format = format::b_fs_zyx_fsv16; - return layout(current_layout.get_partial_shape(), expected_data_type, expected_format); + return expected_format; } - auto expected_tensor = current_layout.get_tensor(); + auto expected_shape = output_layout.get_shape(); bool use_onednn_impls = _optimization_attributes.use_onednn_impls; if (use_onednn_impls && is_node_for_onednn(node)) { // XXX: need to take the situation into consideration where it is called from prepare_primitive_fusing expected_format = node.get_preferred_output_fmt(); } else if (_optimization_attributes.b_fs_zyx_fsv16_network && - deconvolution_b_fs_zyx_fsv16_opt(current_layout, output_or_weights_layout, prim)) { - if ((current_layout.data_type == data_types::f32 && expected_tensor.batch[0] % 16 == 0) || - (current_layout.data_type == data_types::f16 && expected_tensor.batch[0] % 32 == 0)) + deconvolution_b_fs_zyx_fsv16_opt(output_layout, weights_layout, prim)) { + if ((output_layout.data_type == data_types::f32 && expected_shape[0] % 16 == 0) || + (output_layout.data_type == data_types::f16 && expected_shape[0] % 32 == 0)) expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16; else expected_format = cldnn::format::b_fs_zyx_fsv16; } else if ((_optimization_attributes.b_fs_yx_fsv16_network) && - deconvolution_b_fs_yx_fsv16_opt(current_layout, output_or_weights_layout, prim)) { - auto input_tensor = node.get_dependency(0).get_output_layout().get_tensor(); - int input_features = input_tensor.feature[0]; - int output_features = expected_tensor.feature[0]; + deconvolution_b_fs_yx_fsv16_opt(output_layout, weights_layout, prim)) { + auto input_shape = input_layout.get_shape(); + auto input_features = input_shape[1]; + auto output_features = expected_shape[1]; float f_cost = static_cast(input_features * output_features) / (align_to(input_features, 16) * align_to(output_features, 16)); float stride_cost = 1 / static_cast(prim->stride[prim->stride.size() - 1]); if (f_cost * stride_cost > 0.1f) @@ -1182,33 +1169,80 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, else expected_format = cldnn::format::bfyx; } - return layout(expected_data_type, expected_format, expected_tensor); + return expected_format; } -layout layout_optimizer::get_expected_layout(layout const& current_layout, - detection_output_node const& node, - layout const& output_or_weights_layout) { - auto prim = node.get_primitive(); - auto expected_tensor = current_layout.get_tensor(); - auto expected_data_type = data_types::f32; - auto expected_format = output_or_weights_layout.format; +format layout_optimizer::get_expected_format(quantize_node const& node) { + auto layout = node.get_output_layout(); + auto expected = format::any; - return layout(expected_data_type, expected_format, expected_tensor); -} + std::function only_gemm_users = [&](const program_node& node) { + bool all_users_gemm = true; -layout layout_optimizer::get_expected_layout(layout const& current_layout, - binary_convolution_node const& node, - layout const& /*output_or_weights_layout*/) { - auto prim = node.get_primitive(); - auto expected_tensor = current_layout.get_tensor(); - auto expected_data_type = data_types::bin; - auto expected_format = cldnn::format::b_fs_yx_32fp; + for (auto user : node.get_users()) { + if (user->is_type() || user->is_type()) + all_users_gemm &= only_gemm_users(*user); + else if (user->is_type()) + all_users_gemm &= true; + else + return false; + } - return layout(expected_data_type, expected_format, expected_tensor); + return all_users_gemm; + }; + + auto use_onednn_impls = _optimization_attributes.use_onednn_impls; + + if (use_onednn_impls) { + auto& user = node.get_users().front(); + if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { + expected = user->get_preferred_input_fmt(user->get_dependency_index(node)); + } else { + expected = format::any; + } + } else if (only_gemm_users(node)) { + // TODO: Gemm is not supporting fsv layouts + expected = format::get_default_format(node.get_output_layout().format.dimension()); + // TODO: check other types for first conv + } else if (layout.is_static() && layout.format.spatial_num() == 2 && + (layout.data_type == data_types::i8 || layout.data_type == data_types::u8) && + layout.batch() % 16 == 0) { + if (use_onednn_impls && layout.batch() % 32 == 0) { + if (node.get_users().size() == 1 && node.get_users().front()->is_type()) { + auto& conv = node.get_users().front()->as(); + auto ws = conv.get_input_layout(1).get_tensor(); + if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1) + expected = format::bfyx; + else + expected = format::bs_fs_yx_bsv16_fsv4; + + auto conv_output_layout = conv.get_output_layout(); + auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape); + format expected_conv_fmt = get_expected_format(conv); + if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0) + expected = expected_conv_fmt; + } + } else if (layout.feature() > 8) { + expected = format::b_fs_yx_fsv16; + } else { + expected = format::b_fs_yx_fsv4; + } + } else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) { + expected = format::b_fs_zyx_fsv16; + } + + // In case of input -> ... -> quantize -> concat + if (layout.is_static() && expected == format::any + && (node.get_users().size() == 1 && node.get_users().front()->is_type()) + && (layout.batch() < 4 && layout.feature() < 4)) { + expected = format::get_default_format(layout.get_rank(), false, false); + } + + return expected; } bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) { - auto in_dt = node.get_dependency(0).get_output_layout(false).data_type; + auto in_dt = node.get_input_layout(0).data_type; auto out_dt = node.get_output_layout(false).data_type; // Generally, fp32 input does NOT use oneDNN @@ -1230,10 +1264,10 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) { } else if (node.is_type() || node.is_type()) { bool is_fc = node.is_type(); auto wei_dt = is_fc ? node.as().weights().get_output_layout().data_type : - node.as().get_dependency(1).get_output_layout().data_type; + node.as().get_input_layout(1).data_type; return onednn_check_data_types_for_fc_gemm(in_dt, wei_dt, out_dt); } else if (node.is_type()) { - auto input_fmt = node.get_dependency(0).get_output_layout().format; + auto input_fmt = node.get_input_layout(0).format; auto output_fmt = node.get_output_layout().format; // For mixed precision case, oneDNN is slower than clDNN @@ -1398,7 +1432,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32, }; - if (blocked_formats.find(node.get_dependency(0).get_output_layout().format) != blocked_formats.end()) { + if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) { preferred_impl = impl_types::ocl; } else { auto& nms_node = node.as(); @@ -1440,7 +1474,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format format::bs_fs_yx_bsv32_fsv32, }; - auto input_layout = node.get_dependency(0).get_output_layout(); + auto input_layout = node.get_input_layout(0); auto output_layout = node.get_output_layout(); auto input_fmt = input_layout.format; @@ -1597,13 +1631,13 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (allow_new_shape_infer) { if (node.is_type()) - return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank()); + return format::get_default_format(node.get_input_layout(0).get_rank()); // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa auto out_lay_rank = node.get_output_layout(false).get_rank(); auto dep_size = node.get_dependencies().size(); for (size_t i = 0; i < dep_size; i++) { - auto in_lay_rank = node.get_dependency(i).get_output_layout(false).get_rank(); + auto in_lay_rank = node.get_input_layout(i).get_rank(); const auto& shape_infer_deps = node.get_shape_infer_dependencies(); if (std::find(shape_infer_deps.begin(), shape_infer_deps.end(), i) != shape_infer_deps.end()) { auto fmt = format::get_default_format(in_lay_rank, false, false); @@ -1631,80 +1665,11 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { expected = _forcing_map.at(node.id()).first; } else if (node.is_type()) { - auto& conv_node = node.as(); - auto weights_layout = conv_node.weights().get_output_layout().convert_to_weights_layout(conv_node.get_primitive()->grouped_weights_shape); - expected = get_expected_layout(output_layout, conv_node, weights_layout).format; + expected = get_expected_format(node.as()); } else if (node.is_type()) { - auto& bconv_node = node.as(); - auto weights_layout = bconv_node.weights().get_output_layout().convert_to_weights_layout(false); - expected = get_expected_layout(output_layout, bconv_node, weights_layout).format; - } else if (node.is_type()) { - expected = get_expected_layout( - output_layout, - node.as(), - layout{ data_types::f32, format::bfyx, tensor{} }).format; + expected = cldnn::format::b_fs_yx_32fp; } else if (node.is_type()) { - auto layout = node.get_output_layout(); - - std::function only_gemm_users = [&](const program_node& node) { - bool all_users_gemm = true; - - for (auto user : node.get_users()) { - if (user->is_type() || user->is_type()) - all_users_gemm &= only_gemm_users(*user); - else if (user->is_type()) - all_users_gemm &= true; - else - return false; - } - - return all_users_gemm; - }; - - if (use_onednn_impls) { - auto& user = node.get_users().front(); - if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) { - expected = user->get_preferred_input_fmt(user->get_dependency_index(node)); - } else { - expected = format::any; - } - } else if (only_gemm_users(node)) { - // TODO: Gemm is not supporting fsv layouts - expected = format::get_default_format(node.get_output_layout().format.dimension()); - // TODO: check other types for first conv - } else if (layout.is_static() && layout.format.spatial_num() == 2 && - (layout.data_type == data_types::i8 || layout.data_type == data_types::u8) && - layout.batch() % 16 == 0) { - if (use_onednn_impls && layout.batch() % 32 == 0) { - if (node.get_users().size() == 1 && node.get_users().front()->is_type()) { - auto& conv = node.get_users().front()->as(); - auto ws = conv.get_dependency(1).get_output_layout().get_tensor(); - if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1) - expected = format::bfyx; - else - expected = format::bs_fs_yx_bsv16_fsv4; - - auto conv_output_layout = conv.get_output_layout(); - auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape); - format expected_conv_fmt = get_expected_layout(conv_output_layout, conv, weights_layout).format; - if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0) - expected = expected_conv_fmt; - } - } else if (layout.feature() > 8) { - expected = format::b_fs_yx_fsv16; - } else { - expected = format::b_fs_yx_fsv4; - } - } else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) { - expected = format::b_fs_zyx_fsv16; - } - - // In case of input -> ... -> quantize -> concat - if (layout.is_static() && expected == format::any - && (node.get_users().size() == 1 && node.get_users().front()->is_type()) - && (layout.batch() < 4 && layout.feature() < 4)) { - expected = format::get_default_format(layout.get_rank(), false, false); - } + expected = get_expected_format(node.as()); } else if (node.is_type() || node.is_type()) { if (node.is_type() && node.as().get_primitive()->has_surface_input()) { expected = format::nv12; @@ -1712,13 +1677,11 @@ format layout_optimizer::get_preferred_format(program_node& node) { expected = node.get_output_layout().format; } } else if (node.is_type()) { - expected = format::get_default_format(node.get_output_layout().format.dimension()); + expected = format::get_default_format(node.get_output_layout().get_rank()); } else if (node.is_type()) { - auto& deconv_node = node.as(); - auto weights_layout = deconv_node.weights().get_output_layout().convert_to_weights_layout(deconv_node.get_primitive()->grouped_weights_shape); - expected = get_expected_layout(output_layout, deconv_node, weights_layout).format; + expected = get_expected_format(node.as()); } else if (node.is_type()) { - auto input_layout = node.get_dependency(0).get_output_layout(); + auto input_layout = node.get_input_layout(0); if (input_layout.format.dimension() == 5 && (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16)) expected = format::bfzyx; @@ -1810,7 +1773,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d can_optimize_permute = pnode.get_users().size() == 1 && pnode.get_output_layout().data_type == node.get_output_layout().data_type && !pnode.has_fused_primitives() - && !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static() + && !pnode.is_output() && pnode.get_input_layout(0).is_static() && pnode.is_reverse_rotating_except_batch(); } if (!can_optimize_permute) { @@ -1846,7 +1809,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d auto& pnode = node.get_users().front()->as(); auto can_optimize_permute = pnode.get_output_layout().data_type == node.get_output_layout().data_type && !pnode.has_fused_primitives() - && !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static() + && !pnode.is_output() && pnode.get_input_layout(0).is_static() && pnode.is_rotating_except_batch(); if (can_optimize_permute) { dst_fmt = format::byxf; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 4f5e9acc27b..53e1cafbd01 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -799,8 +799,8 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool , _org_id(node.get_org_primitive_id()) , _is_input(node.is_input()) , _is_output(node.is_output()) - , _inputs_memory_count(node.get_primitive()->input_size()) - , _outputs_memory_count(node.get_primitive()->output_size()) + , _inputs_memory_count(node.get_inputs_count()) + , _outputs_memory_count(node.get_outputs_count()) , _fused_mem_count(node.get_fused_inputs_count()) , _fused_mem_offset((_fused_mem_count > 0 && node.has_fused_dep()) ? node.get_first_fused_dep_idx() : 0) , _can_be_optimized(node.can_be_optimized()) diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 11a4464edec..76719be1511 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1054,7 +1054,7 @@ void program::fuse_nodes(program_node &fused_node, fused_primitive_desc local_desc(peer_node.get_primitive()); local_desc.f_param = get_node_ptr(peer_node.id())->get_fuse_params(); local_desc.total_num_deps = peer_node.get_dependencies().size(); - local_desc.input_layout = peer_node.get_dependency(0).get_output_layout(); + local_desc.input_layout = peer_node.get_input_layout(0); local_desc.output_layout = peer_layout; if (fused_node.in_shape_of_subgraph && !peer_node.in_shape_of_subgraph) { @@ -1350,7 +1350,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { if (!conv.is_dynamic()) { // In dynamic shape, conv is fixed as a predefined format b_fs_yx_fsv16 - auto input_size = node->get_dependency(0).get_output_layout().get_tensor(); + auto input_size = node->get_input_layout(0).get_tensor(); auto ifm = static_cast(input_size.feature[0]); if (conv.get_primitive()->groups == ifm && conv.get_primitive()->groups >= 16) total_dw_conv_layers++; diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index ac0b4d1f8f8..03f9592f2bd 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -219,7 +219,7 @@ void program_node::remove_dependency(program_node& node) { remove_dependency(i); } -size_t program_node::get_user_index(program_node& node) const { +size_t program_node::get_user_index(const program_node& node) const { size_t idx = 0; for (auto& user : users) { if (user == &node) @@ -231,7 +231,7 @@ size_t program_node::get_user_index(program_node& node) const { OPENVINO_ASSERT(false, "Search invalid user node" + node.id() + " node"); } -size_t program_node::get_dependency_index(program_node& node) const { +size_t program_node::get_dependency_index(const program_node& node) const { for (size_t i = 0; i < dependencies.size(); ++i) if (dependencies[i].first == &node) return i; @@ -1268,4 +1268,3 @@ void program_node::init_onednn_primitive_attributes() { #endif // ENABLE_ONEDNN_FOR_GPU - diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index d95ddbd72f3..74cef26e0a9 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -64,7 +64,7 @@ std::string select_inst::to_string(select_node const& node) { std::stringstream primitive_description; json_composite select_info; - for (size_t i = 0; i < node.inputs_count(); i++) { + for (size_t i = 0; i < node.get_inputs_count(); i++) { select_info.add("input_" + std::to_string(i), node.input(i).id()); } diff --git a/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp b/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp index d28437d6e3a..d4cb1847bd7 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp @@ -91,7 +91,7 @@ TEST(handle_reshape, skip_reorder_node_to_split_when_onndnn_not_support) { ASSERT_NE(prog, nullptr); - ASSERT_TRUE(prog->get_node("matmul").get_dependency(0).get_output_layout().data_type == data_types::f16); + ASSERT_TRUE(prog->get_node("matmul").get_input_layout(0).data_type == data_types::f16); } TEST(handle_reshape, correct_parameters_propagation) { @@ -123,8 +123,8 @@ TEST(handle_reshape, correct_parameters_propagation) { ASSERT_TRUE(prog->get_node("reshape").can_be_optimized()); - auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape(); - auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape(); + auto out_shape0 = prog->get_node("e2").get_output_pshape(); + auto out_shape1 = prog->get_node("reorder").get_output_pshape(); ov::PartialShape expected_out_shape{2, 12}; @@ -171,8 +171,8 @@ TEST(handle_reshape, correct_parameters_propagation_2_inputs) { ASSERT_TRUE(reshape_split_node.is_type()); ASSERT_EQ(reshape_split_node.get_dependencies().size(), 2); - auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape(); - auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape(); + auto out_shape0 = prog->get_node("e2").get_output_pshape(); + auto out_shape1 = prog->get_node("reorder").get_output_pshape(); ov::PartialShape expected_out_shape{2, 12};