[GPU] Minor layout optimizer refactoring (#17553)

2023-06-16 10:33:53 +04:00 · 2023-06-16 10:33:53 +04:00 · 3d79bd1ac5
commit 3d79bd1ac5
parent 55156f9a6c
30 changed files with 220 additions and 257 deletions
--- a/src/plugins/intel_gpu/src/graph/concatenation.cpp
+++ b/src/plugins/intel_gpu/src/graph/concatenation.cpp
@ -82,18 +82,18 @@ std::string concatenation_inst::to_string(concatenation_node const& node) {
    std::stringstream ss_inputs;
    std::stringstream primitive_description;

-    for (size_t i = 0; i < node.inputs_count(); ++i) {
+    for (size_t i = 0; i < node.get_inputs_count(); ++i) {
        ss_inputs << node.input(i).id();
        if (node.input(i).get_output_layout().is_static())
            ss_inputs << ", count: " << node.input(i).get_output_layout().count();
        else
            ss_inputs << ", count: " << "?";
-        i != (node.inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << "";
+        i != (node.get_inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << "";
    }

    json_composite concat_info;
    concat_info.add("concat axis", desc->axis);
-    concat_info.add("inputs count", node.inputs_count());
+    concat_info.add("inputs count", node.get_inputs_count());
    concat_info.add("inputs", ss_inputs.str());

    node_info->add("concat info", concat_info);
--- a/src/plugins/intel_gpu/src/graph/eltwise.cpp
+++ b/src/plugins/intel_gpu/src/graph/eltwise.cpp
@ -305,7 +305,7 @@ std::string eltwise_inst::to_string(eltwise_node const& node) {
    }

    json_composite eltwise_info;
-    for (size_t i = 0; i < node.inputs_count(); i++) {
+    for (size_t i = 0; i < node.get_inputs_count(); i++) {
        eltwise_info.add("input_" + std::to_string(i), node.input(i).id());
    }
    eltwise_info.add("mode", str_mode);
@ -322,7 +322,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
    check_inputs_count(node);
    // check for stride
    auto prim = node.get_primitive();
-    auto inputs_count = node.inputs_count();
+    auto inputs_count = node.get_inputs_count();

    if (is_dynamic())
        return;
@ -363,10 +363,10 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
        }
    } else {
        bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
-        auto input0_pshape = node.input().get_output_layout().get_partial_shape();
+        auto input0_pshape = node.get_input_pshape(0);

        for (size_t i = 1; i < inputs_count; ++i) {
-            auto input_pshape = node.input(i).get_output_layout().get_partial_shape();
+            auto input_pshape = node.get_input_pshape(i);

            if (input0_pshape.size() > input_pshape.size()) {
                if (use_new_shape_infer) {
--- a/src/plugins/intel_gpu/src/graph/gemm.cpp
+++ b/src/plugins/intel_gpu/src/graph/gemm.cpp
@ -216,7 +216,7 @@ std::string gemm_inst::to_string(gemm_node const& node) {
    std::stringstream primitive_description;

    json_composite gemm_info;
-    for (size_t i = 0; i < node.inputs_count(); i++) {
+    for (size_t i = 0; i < node.get_inputs_count(); i++) {
        gemm_info.add("input_" + std::to_string(i), node.input(i).id());
    }
    gemm_info.add("alpha", alpha);
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp
@ -59,7 +59,7 @@ void compile_graph::run(program& p) {

        // TODO: need to come up with better handling of unsupported shape agnostic cases
        // e.g. process exceptions from choose_impl() and ignore those for dynamic parameters
-        if (node->is_type<fully_connected>() && node->is_dynamic() && node->get_output_layout().get_partial_shape().size() > 3)
+        if (node->is_type<fully_connected>() && node->is_dynamic() && node->get_output_pshape().size() > 3)
            can_select_impl = false;

        // TODO: Remove this WA once we have shape agnostic arg_max_min_axis kernel with non-const k input
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
@ -104,7 +104,7 @@ void handle_reshape::run(program& p) {
                        if (user->is_type<fully_connected>() || user->is_type<gemm>()) {
                            bool is_fc = user->is_type<fully_connected>();
                            auto wei_dt = is_fc ? user->as<fully_connected>().weights().get_output_layout().data_type :
-                                                    user->as<gemm>().get_dependency(1).get_output_layout().data_type;
+                                                    user->as<gemm>().get_input_layout(1).data_type;
                            onednn_support = layout_optimizer::onednn_check_data_types_for_fc_gemm(output_data_type, wei_dt, out_dt);
                        } else if (user->is_type<convolution>() || user->is_type<deconvolution>()) {
                            bool is_conv = user->is_type<convolution>();
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/pre_replace_deconv.cpp
@ -39,6 +39,7 @@ void pre_replace_deconv::run(program& p) {
            auto weights_nodes_id = deconv_prim->weights;
            auto biases_nodes_id = deconv_prim->bias;
            auto& input_node = deconv_node.get_dependency(0);
+            auto input_layout = deconv_node.get_input_layout(0);
            const primitive_id deconv_node_id = deconv_node.id();
            const primitive_id& input_node_id = input_node.id();

@ -50,12 +51,12 @@ void pre_replace_deconv::run(program& p) {

                bool perform_opt = false;
                // fp16 and fp32 bfyx implementation supports transposed convolution
-                perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 &&
-                               (input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) &&
-                               !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
+                perform_opt |= cldnn::format::dimension(input_layout.format) == 4 &&
+                               (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
+                               !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_layout.format == format::b_fs_yx_fsv16) &&
                                _lo.is_format_optimized(deconv_node, format::b_fs_yx_fsv16));
                // int8/uint8 input
-                perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8);
+                perform_opt |= (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8);

                if (!perform_opt)
                    continue;
@ -64,7 +65,7 @@ void pre_replace_deconv::run(program& p) {
                // setting convolution parameters based on deconvolution params
                auto output_layout = deconv_node.get_output_layout();
                auto output_pshape = output_layout.get_partial_shape();
-                auto input_pshape = input_node.get_output_layout().get_partial_shape();
+                auto input_pshape = input_layout.get_partial_shape();
                auto spatial_rank = output_layout.get_spatial_rank();
                auto stride = deconv_prim->stride;
                auto pad = deconv_prim->pad;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp
@ -342,7 +342,7 @@ void prepare_buffer_fusing::run(program& p) {
    auto can_optimize = [](const program_node* node) {
        bool is_dynamic = node->is_dynamic();
        bool is_planar = format::is_default_format(node->get_output_layout().format);
-        bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
+        bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layout(0).data_padding;
        if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
            return true;
        }
@ -398,7 +398,7 @@ void prepare_buffer_fusing::run(program& p) {
                const auto& crop_layout = node.get_output_layout();
                auto format = crop_layout.format;
                auto crop_prim = node.get_primitive();
-                auto input_layout = node.get_dependency(0).get_output_layout();
+                auto input_layout = node.get_input_layout(0);
                const auto& crop_size = crop_layout.get_tensor();
                const auto& out_padd = crop_layout.data_padding;
                auto opt_lower_pad = crop_prim->offsets.feature[0];
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@ -184,7 +184,7 @@ void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) {

            p.get_processing_order().insert_next(&input, &swish);

-            swish.calc_output_layout();
+            swish.recalc_output_layout();
        });
    }
 }
@ -291,10 +291,10 @@ void prepare_primitive_fusing::fuse_bias(program &p) {

            // Change out_features value to proper dimension for 3D FC case
            if (is_3d_fully_connected(node->get_dependency(0))) {
-                out_features = node->get_dependency(0).get_output_layout().spatial(1);
+                out_features = node->get_input_layout(0).spatial(1);
                is_3d_fc = true;
            } else if (is_3d_fully_connected(node->get_dependency(1))) {
-                out_features = node->get_dependency(1).get_output_layout().spatial(1);
+                out_features = node->get_input_layout(1).spatial(1);
                is_3d_fc = true;
            }
            auto& const_dep = eltw_node.get_dependency(const_dep_idx);
@ -486,7 +486,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            continue;

        auto is_grouped_conv = [](convolution_node& node) -> bool {
-            auto in_layout = node.get_dependency(0).get_output_layout();
+            auto in_layout = node.get_input_layout(0);
            return (node.get_groups() > 1 && node.get_groups() != static_cast<uint32_t>(in_layout.feature()));
        };

@ -504,7 +504,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            // Since reorder inputs is called after this pass
            // we have to check that blocked formats can be used in the network and layer is optimized for it.
            if ((node.get_output_layout().format == format::b_fs_yx_fsv16 ||
-                _lo.should_select_b_fs_yx_fsv16_layout(node, node.get_dependency(1).get_output_layout())) &&
+                _lo.should_select_b_fs_yx_fsv16_layout(node, node.get_input_layout(1))) &&
                 !is_grouped_conv(node))
                return true;

@ -517,7 +517,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                 _lo.is_format_optimized(node, format::fs_b_yx_fsv32) && node.get_primitive()->groups == 1)))
                    return true;

-            const size_t in_feature = node.get_dependency(0).get_output_layout().feature();
+            const size_t in_feature = node.get_input_layout(0).feature();
            if ((node.get_output_layout().format == format::b_fs_zyx_fsv16 ||
                 (_lo.is_format_optimized(node, format::b_fs_zyx_fsv16) &&
                  _lo.get_optimization_attributes().b_fs_zyx_fsv16_network)) && in_feature != 3)
@ -534,7 +534,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            if (node.get_output_layout().format == format::bs_fs_yx_bsv32_fsv16 || _lo.is_format_optimized(node, format::bs_fs_yx_bsv32_fsv16))
                return true;

-            auto in_dt = node.get_dependency(0).get_output_layout().data_type;
+            auto in_dt = node.get_input_layout(0).data_type;

            // TODO: check if that's enough for correct work
            return data_type_traits::is_i8_u8(in_dt);
@ -547,7 +547,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            if (eltw_node.get_dependencies().size() < 2)
                return false;

-            auto const_layout = eltw_node.get_dependency(1).get_output_layout();
+            auto const_layout = eltw_node.get_input_layout(1);
            auto conv_layout = conv_node.get_output_layout();
            auto per_channel_eltwise = const_layout.feature() == conv_layout.feature();

@ -564,17 +564,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                _lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) {
                return true;
            } else {
-                auto in_dt = node.get_dependency(0).get_output_layout().data_type;
+                auto in_dt = node.get_input_layout(0).data_type;
                return data_type_traits::is_i8_u8(in_dt);
            }
        };

        auto gemm_supports_fusings = [](gemm_node& node) -> bool {
            bool does_support_fusings = false;
-            auto in0_dt = node.get_dependency(0).get_output_layout().data_type;
-            auto in1_dt = node.get_dependency(1).get_output_layout().data_type;
-            auto in0_fmt = node.get_dependency(0).get_output_layout().format;
-            auto in1_fmt = node.get_dependency(1).get_output_layout().format;
+            auto in0_dt = node.get_input_layout(0).data_type;
+            auto in1_dt = node.get_input_layout(1).data_type;
+            auto in0_fmt = node.get_input_layout(0).format;
+            auto in1_fmt = node.get_input_layout(1).format;

            if (data_type_traits::is_floating_point(in0_dt) &&
                data_type_traits::is_floating_point(in1_dt))
@ -582,9 +582,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {

            if (data_type_traits::is_i8_u8(in0_dt) && in0_fmt == format::bfyx &&
                data_type_traits::is_i8_u8(in1_dt) && in1_fmt == format::bfyx) {
-                if (node.inputs_count() == 3) {
-                    auto in2_dt = node.get_dependency(2).get_output_layout().data_type;
-                    auto in2_fmt = node.get_dependency(2).get_output_layout().format;
+                if (node.get_inputs_count() == 3) {
+                    auto in2_dt = node.get_input_layout(2).data_type;
+                    auto in2_fmt = node.get_input_layout(2).format;
                    does_support_fusings = data_type_traits::is_i8_u8(in2_dt) && in2_fmt == format::bfyx ? true : false;
                } else {
                    does_support_fusings = true;
@ -595,7 +595,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
        };

        auto mvn_supports_fusings = [](mvn_node& node, bool for_eltwise = false) -> bool {
-            auto in_layout = node.get_dependency(0).get_output_layout();
+            auto in_layout = node.get_input_layout(0);
            if (node.get_primitive()->requires_alignment(in_layout.get_partial_shape()))
                return false;
            return data_type_traits::is_i8_u8(in_layout.data_type) || for_eltwise;
@ -608,8 +608,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                auto& eltw = static_cast<const eltwise&>(*node.get_users().front()->get_primitive());
                auto& conv = node.get_dependency(0).as<convolution>();
                auto eltw_mode = eltw.mode == eltwise_mode::sum;
-                auto conv_size = conv.get_dependency(0).get_output_layout().spatial(0) % 128 == 0 &&
-                                 conv.get_dependency(0).get_output_layout().spatial(1) % 2 == 0;
+                auto conv_size = conv.get_input_layout(0).spatial(0) % 128 == 0 &&
+                                 conv.get_input_layout(0).spatial(1) % 2 == 0;
                auto format = conv.get_output_layout().format == format::bfyx;
                auto dt = conv.get_output_layout().data_type == data_types::f16;
                if (eltw_mode && conv_size && format && dt)
@ -743,7 +743,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {

            should_fuse |= input.is_type<mvn>();

-            should_fuse |= input.is_type<normalize>() && data_type_traits::is_i8_u8(input.get_dependency(0).get_output_layout().data_type);
+            should_fuse |= input.is_type<normalize>() && data_type_traits::is_i8_u8(input.get_input_layout(0).data_type);

            should_fuse |= input.is_type<deconvolution>();

@ -832,7 +832,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            auto out_layout = quantize_node.get_output_layout();
            auto in_layout = input_data.get_output_layout();
            auto out_dt = out_layout.data_type;
-            auto in_dt = input_data.get_dependency(0).get_output_layout().data_type;
+            auto in_dt = input_data.get_input_layout(0).data_type;
            auto out_dt_is_i8_u8 = data_type_traits::is_i8_u8(out_dt);
            auto in_dt_is_i8_u8 = data_type_traits::is_i8_u8(in_dt);

@ -858,7 +858,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                           ((out_dt == data_types::f32 || out_dt == data_types::f16)  ||
                            in_layout.format == format::b_fs_yx_fsv16 ||
                            in_layout.format == format::bs_fs_yx_bsv32_fsv16 ||
-                            (_lo.should_select_b_fs_yx_fsv16_layout(input_data.as<convolution>(), input_data.get_dependency(1).get_output_layout()) &&
+                            (_lo.should_select_b_fs_yx_fsv16_layout(input_data.as<convolution>(), input_data.get_input_layout(1)) &&
                             !is_grouped_conv(input_data.as<convolution>())) ||
                           // Avoid fusing to b_fs_yx_fsv16 (and similar) kernels
                           _lo.get_optimization_attributes().use_onednn_impls ||
@ -935,7 +935,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                eltwise_mode::div
            };

-            if (node.is_output() || node.inputs_count() != 2 ||
+            if (node.is_output() || node.get_inputs_count() != 2 ||
                std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() ||
                !prim->stride.empty())
                return;
@ -1008,9 +1008,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                // E.g. parent1 [?,?,768], parent2 [?,?,1]
                // expected eltw out shape: [?,?,768]
                // but w/o this check we can fuse eltwise to parent2 and return [?,?,1] as output shape which is unexpected
-                auto parent1_pshape = parent1.first->get_output_layout().get_partial_shape();
-                auto parent2_pshape = parent2.first->get_output_layout().get_partial_shape();
-                auto out_pshape = node.get_output_layout().get_partial_shape();
+                auto parent1_pshape = parent1.first->get_output_pshape(0);
+                auto parent2_pshape = parent2.first->get_output_pshape(0);
+                auto out_pshape = node.get_output_pshape(0);

                auto are_compatible = [](const ov::PartialShape& out_shape, const ov::PartialShape& in_shape) -> bool {
                    if (out_shape.rank().get_length() != in_shape.rank().get_length())
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing_through.cpp
@ -39,7 +39,7 @@ void prepare_primitive_fusing_through::run(program& p) {
                return false;

            if (node->is_type<reorder>() &&
-                node->get_output_layout().data_type != node->get_dependency(0).get_output_layout().data_type)
+                node->get_output_layout().data_type != node->get_input_layout(0).data_type)
                return false;

            // Not to fuse reshape after Reduce changing the order of un-reduced axes. It is expected to be optimized out.
@ -48,7 +48,7 @@ void prepare_primitive_fusing_through::run(program& p) {

            // Not to raise up target node through reshape where the size of dimension is changed (e.g. Unsqueeze)
            if (node->is_type<reshape>() &&
-                node->get_output_layout().get_partial_shape().size() != node->get_dependency(0).get_output_layout().get_partial_shape().size())
+                node->get_output_pshape().size() != node->get_input_pshape(0).size())
                return false;

            return true;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
@ -444,7 +444,7 @@ void prepare_quantization::remove_fake_reorders(program& p, reorder_node& reorde

    auto &usr = reorder_node.get_users().front();
    auto &dep = reorder_node.get_dependency(0);
-    if (!(usr->is_type<convolution>() && usr->get_dependency(1).get_output_layout().data_type == data_types::i8) ||
+    if (!(usr->is_type<convolution>() && usr->get_input_layout(1).data_type == data_types::i8) ||
        !dep.is_input() ||
        dep.get_output_layout().data_type != data_types::u8 ||
        (reorder_node.get_output_layout().data_type != data_types::f32 && reorder_node.get_output_layout().data_type != data_types::f16) ||
@ -492,8 +492,8 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti
        if (node.get_users().size() != 1)
            return false;

-        auto in0_layout = node.get_dependency(0).get_output_layout();
-        auto in1_layout = node.get_dependency(1).get_output_layout();
+        auto in0_layout = node.get_input_layout(0);
+        auto in1_layout = node.get_input_layout(1);

        if (!node.get_dependency(1).is_type<data>())
            return false;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp
@ -279,7 +279,7 @@ void remove_redundant_reorders::run(program& p) {
            continue;

        auto o_layout = r_node.get_output_layout();
-        auto i_layout = r_node.get_dependency(0).get_output_layout();
+        auto i_layout = r_node.get_input_layout(0);

        // Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer,
        // but pads need to be handled correctly.
@ -513,9 +513,9 @@ void remove_redundant_reorders::run(program& p) {
                        return false;

                    auto node_format = node->get_output_layout().format;
-                    for (size_t axis = 0; axis < node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format).size(); axis++) {
+                    for (size_t axis = 0; axis < node->get_input_layout(0).data_padding.lower_size().sizes(node_format).size(); axis++) {
                        if (!user->is_padding_supported(static_cast<int>(axis),
-                            node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format)[axis]))
+                            node->get_input_layout(0).data_padding.lower_size().sizes(node_format)[axis]))
                            return false;
                    }
                }
@ -580,7 +580,7 @@ void remove_redundant_reorders::run(program& p) {

            // Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter
            fused_primitive_desc local_desc(node->get_primitive());
-            local_desc.input_layout = input.get_dependency(0).get_output_layout();  // original convolution's output layout
+            local_desc.input_layout = input.get_input_layout(0);  // original convolution's output layout
            node->set_input_layout(local_desc.input_layout);
            local_desc.f_param = node->get_fuse_params();
            local_desc.outer_dep_start_idx = -1;
@ -654,7 +654,7 @@ void remove_redundant_reorders::run(program& p) {
        bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
                          !reshape_input_node.has_fused_primitives();
        bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&
-                              reshape_input_node.get_dependency(0).get_output_layout() == reshape_node.get_output_layout() &&
+                              reshape_input_node.get_input_layout(0) == reshape_node.get_output_layout() &&
                              !reshape_node.has_fused_primitives();

        if (remove_dep) {
@ -692,7 +692,7 @@ void remove_redundant_reorders::run(program& p) {

    for (auto n : p.get_processing_order()) {
        if (n->is_in_data_flow() && n->is_type<reorder>()) {
-            auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_dependency(0).get_output_layout().format);
+            auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_input_layout(0).format);
            n->set_preferred_impl_type(preferred_impl);
        }

--- a/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/quantize.cpp
@ -76,7 +76,7 @@ public:
        quantize_params.out_scale = arg.get_output_scale_val();
        quantize_params.out_shift = arg.get_output_shift_val();

-        for (size_t i = 1; i < arg.inputs_count(); i++) {
+        for (size_t i = 1; i < arg.get_inputs_count(); i++) {
            quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
        }

--- a/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/concatenation_inst.h
@ -23,7 +23,6 @@ public:

    program_node& input(size_t idx = 0) const { return get_dependency(idx); }

-    size_t inputs_count() const { return desc->input.size(); }
    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
 };

--- a/src/plugins/intel_gpu/src/graph/include/condition_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/condition_inst.h
@ -36,7 +36,7 @@ private:
        program::ptr _program = nullptr;

        void add_or_change_input_layout(const program_node& node) {
-            auto layout = node.get_dependency(0).get_output_layout();
+            auto layout = node.get_input_layout(0);
            auto input_id = node.as<condition>().result_id();
            if (_topology.get_primitives().count(input_id) == 0) {
                _topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
--- a/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/convert_color_inst.h
@ -15,7 +15,6 @@ struct typed_program_node<convert_color> : public typed_program_node_base<conver
 public:
    using parent::parent;
    program_node& input(size_t index = 0) const { return get_dependency(index); }
-    size_t inputs_count() const { return get_primitive()->input.size(); }
 };

 using convert_color_node = typed_program_node<convert_color>;
--- a/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/cum_sum_inst.h
@ -15,7 +15,6 @@ public:
    using parent::parent;

    program_node& input(size_t index = 0) const { return get_dependency(index); }
-    size_t inputs_count() const { return get_dependencies().size(); }

    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
 };
--- a/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/eltwise_inst.h
@ -30,7 +30,6 @@ public:
    }

    program_node& input(size_t idx = 0) const { return get_dependency(idx); }
-    size_t inputs_count() const { return get_primitive()->input.size(); }

    std::shared_ptr<NodeFuseParams> get_fuse_params() const override {
        return std::make_shared<EltwiseFuseParams>(typed_desc());
--- a/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/embedding_bag_inst.h
@ -17,7 +17,6 @@ public:
    using parent::parent;

    program_node& input(size_t index = 0) const { return get_dependency(index); }
-    size_t inputs_count() const { return get_dependencies().size(); }
 };

 using embedding_bag_node = typed_program_node<embedding_bag>;
--- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h
@ -17,7 +17,6 @@ public:
    using parent::parent;

    program_node& input(size_t idx = 0) const { return get_dependency(idx); }
-    size_t inputs_count() const { return this->get_primitive()->input_size(); }
    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
 };

--- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
+++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
@ -16,6 +16,7 @@
 #include "deconvolution_inst.h"
 #include "detection_output_inst.h"
 #include "binary_convolution_inst.h"
+#include "quantize_inst.h"

 #include <vector>
 #include <memory>
@ -111,18 +112,9 @@ private:
    size_t _total_conv;
    std::map<std::pair<format::type, bool>, size_t> _optimized_conv_count;

-    layout get_expected_layout(layout const& current_layout,
-                               convolution_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               deconvolution_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               detection_output_node const& node,
-                               layout const& output_or_weights_layout);
-    layout get_expected_layout(layout const& current_layout,
-                               binary_convolution_node const& node,
-                               layout const& output_or_weights_layout);
+    format get_expected_format(convolution_node const& node);
+    format get_expected_format(deconvolution_node const& node);
+    format get_expected_format(quantize_node const& node);

    bool is_depthwise(const convolution_node& node) const;
    format imad_case(convolution_node const& node) const;
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@ -166,6 +166,11 @@ public:
    program_node& get_dependency(size_t idx) const { return *dependencies.at(idx).first; }
    std::pair<program_node*, int32_t> get_dependency_with_port(size_t idx) const { return dependencies.at(idx); }

+    // Count of original primitive inputs, i.e. it doesn't include fused dependencies
+    size_t get_inputs_count() const { return desc->input_size(); }
+    // Count of original primitive outputs
+    size_t get_outputs_count() const { return desc->output_size(); }
+
    std::vector<layout> const get_input_layouts() const {
        std::vector<layout> layouts;
        for (const auto& i : dependencies) {
@ -174,6 +179,20 @@ public:
        return layouts;
    }

+    layout get_input_layout(size_t idx = 0) const {
+       return get_dependency(idx).get_output_layout(false);
+    }
+
+    ov::PartialShape get_input_pshape(size_t idx = 0) const {
+       return get_input_layout(idx).get_partial_shape();
+    }
+
+    ov::PartialShape get_output_pshape(size_t idx = 0) const {
+        if (!is_valid_output_layout(idx))
+            return calc_output_layouts()[idx].get_partial_shape();
+       return get_output_layout(idx).get_partial_shape();
+    }
+
    // replaces idx-th dependency of 'this' with 'new_dep', calls program::remove_if_dangling(old_dep)
    void replace_dependency(size_t idx, program_node& new_dep, bool remove_if_dangling = true);
    // searches for 'old_dep' in dependencies list of 'this' and replaces it with 'new_dep', calls
@ -185,8 +204,8 @@ public:
    void remove_dependency(size_t idx);
    void remove_dependency(program_node& node);

-    size_t get_dependency_index(program_node& node) const;
-    size_t get_user_index(program_node& node) const;
+    size_t get_dependency_index(const program_node& node) const;
+    size_t get_user_index(const program_node& node) const;

    std::set<primitive_id> get_memory_dependencies() const;
    void add_memory_dependency(primitive_id);
@ -242,8 +261,6 @@ public:
    bool set_output_layout(layout& new_layout, bool invalidate_users_if_changed = true, size_t idx = 0);
    bool set_output_layouts(std::vector<layout>& new_layout, bool invalidate_users_if_changed = true);

-    size_t get_outputs_count() const { return num_outputs; }
-
    // forces recalculation of cached output layout, invalidates users if new layout is different than previous one and
    // @p invalidate_users_if_changed is set to true returns whether output layout has changed
    bool recalc_output_layout(bool invalidate_users_if_changed = true);
--- a/src/plugins/intel_gpu/src/graph/include/quantize_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/quantize_inst.h
@ -141,7 +141,6 @@ public:
    using parent::parent;

    program_node& input(size_t index = 0) const { return get_dependency(index); }
-    size_t inputs_count() const { return get_dependencies().size(); }
    int get_levels() const { return get_primitive()->levels; }
    bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::bin; }
    bool get_scale_shift_opt() const { return get_primitive()->scale_shift_opt; }
--- a/src/plugins/intel_gpu/src/graph/include/reorder_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/reorder_inst.h
@ -29,7 +29,6 @@ public:
        support_padding_all(true);
    }

-    size_t inputs_count() const { return get_primitive()->input.size(); }
    program_node& mean_nv12() const { return get_dependency(2); }
    program_node& input(size_t idx = 0) const { return get_dependency(idx); }
    program_node& mean() const { return get_dependency(1); }
--- a/src/plugins/intel_gpu/src/graph/include/select_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/select_inst.h
@ -17,7 +17,6 @@ public:
    using parent::parent;

    program_node& input(size_t idx = 0) const { return get_dependency(idx); }
-    size_t inputs_count() const { return get_dependencies().size(); }
    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
 };

--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -183,8 +183,8 @@ bool layout_optimizer::is_format_supported(program_node& node, format::type fmt)
        return false;

    if (node.is_type<mvn>() && fmt == format::b_fs_yx_fsv16 &&
-        node.get_dependency(0).get_output_layout().data_type != data_types::i8 &&
-        node.get_dependency(0).get_output_layout().data_type != data_types::u8)
+        node.get_input_layout(0).data_type != data_types::i8 &&
+        node.get_input_layout(0).data_type != data_types::u8)
        return false;

    if (node.is_type<input_layout>())
@ -271,7 +271,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
        next.get_preferred_impl_type() == impl_types::onednn &&
        ((fmt_prev == format::byxf && fmt_next == format::byxf) ||
         (fmt_prev == format::bfyx && fmt_next == format::byxf &&
-            (prev_dt == data_types::f16 && next.get_dependency(0).get_output_layout().feature() <= 8))) &&
+            (prev_dt == data_types::f16 && next.get_input_layout(0).feature() <= 8))) &&
        is_input_reorder(prev, next))
        return true;

@ -531,8 +531,8 @@ layout_optimizer::layout_optimizer(bool output_size_handling_enabled)
 }

 bool layout_optimizer::is_depthwise(const convolution_node& node) const {
-        const int32_t output_channels = node.get_output_layout().feature();
-        const int32_t input_channels = node.get_dependency(0).get_output_layout().feature();
+        const int32_t output_channels = node.get_output_layout(0).feature();
+        const int32_t input_channels = node.get_input_layout(0).feature();

        return node.get_groups() == static_cast<uint32_t>(input_channels) && input_channels == output_channels;
 }
@ -667,7 +667,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(const layout& input_layout,

 bool layout_optimizer::should_select_b_fs_yx_fsv16_layout(convolution_node const& node, layout const& weights_layout) {
    auto prim = node.get_primitive();
-    auto input_layout = node.get_dependency(0).get_output_layout();
+    auto input_layout = node.get_input_layout(0);
    auto const cond_denom = _total_conv > 0 ? 1.0f / static_cast<float>(_total_conv) : 1.0f;
    auto fully_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, false});
    auto partially_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, true});
@ -843,8 +843,8 @@ static bool is_node_for_onednn(reduce_node const& node, format preferred_format)

 static bool is_node_for_onednn(deconvolution_node const& node) {
    auto prim = node.get_primitive();
-    auto input_layout = node.get_dependency(0).get_output_layout();
-    auto output_layout = node.get_output_layout();
+    auto input_layout = node.get_input_layout(0);
+    auto output_layout = node.get_output_layout(0);

    if (input_layout.is_dynamic() || output_layout.is_dynamic())
        return false;
@ -920,7 +920,7 @@ bool layout_optimizer::users_for_convolution_byxf_opt(program_node const& node,
        } else if (user->type() == cldnn::convolution::type_id()) {
            if (convolution_byxf_opt(node.get_output_layout(),
                                     user->calc_output_layout(),
-                                     user->get_dependency(1).get_output_layout(),
+                                     user->get_input_layout(1),
                                     user->as<convolution>())) {
                if (!users_for_convolution_byxf_opt(*user, depth - 1))
                    return false;
@ -1032,18 +1032,15 @@ bool layout_optimizer::is_mixed_layout(program_node& prev, program_node& next, b
    return false;
 }

-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             convolution_node const& node,
-                                             layout const& weights_layout) {
+format layout_optimizer::get_expected_format(convolution_node const& node) {
    auto prim = node.get_primitive();
-    auto expected_data_type = current_layout.data_type;
-    auto expected_format = current_layout.format;
-    auto input_layout = node.get_dependency(0).get_output_layout();
-    auto output_layout = node.calc_output_layout();
+    auto input_layout = node.get_input_layout(0);
+    auto output_layout = node.get_output_layout(0);
+    auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(prim->grouped_weights_shape);
+    auto expected_format = output_layout.format;

    if (prim->deformable_mode) {
-        output_layout.format = format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size());
-        return output_layout;
+        return format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size());
    }

    if (input_layout.is_dynamic() || output_layout.is_dynamic()) {
@ -1051,10 +1048,9 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
            expected_format = format::b_fs_yx_fsv16;
        else if (input_layout.get_partial_shape().size() == 5)
            expected_format = format::b_fs_zyx_fsv16;
-        return layout(current_layout.get_partial_shape(), expected_data_type, expected_format);
+        return expected_format;
    }

-    auto expected_tensor = current_layout.get_tensor();
    const float cond_denom = _total_conv > 0 ? 1.0f / static_cast<float>(_total_conv) : 1.0f;

    bool onednn_valid_post_ops = get_post_ops_count(node) <= 32;
@ -1079,22 +1075,18 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
            } else {
                expected_format = imad_case(node);
            }
-            expected_tensor = current_layout.get_tensor();
        } else if (_optimization_attributes.b_fs_zyx_fsv16_network &&
                convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim)) {
-            expected_tensor = current_layout.get_tensor();
-            if ((current_layout.data_type == data_types::f32 && current_layout.batch() % 16 == 0) ||
-                (current_layout.data_type == data_types::f16 && current_layout.batch() % 32 == 0))
+            if ((output_layout.data_type == data_types::f32 && output_layout.batch() % 16 == 0) ||
+                (output_layout.data_type == data_types::f16 && output_layout.batch() % 32 == 0))
                expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16;
            else
                expected_format = cldnn::format::b_fs_zyx_fsv16;

-        } else if (current_layout.format == format::bfzyx) {
-            expected_tensor = current_layout.get_tensor();
+        } else if (output_layout.format == format::bfzyx) {
            expected_format = cldnn::format::bfzyx;
        } else if (_optimization_attributes.bs_fs_yx_bsv16_fsv16_network &&
                convolution_bs_fs_yx_bsv16_fsv16_opt(node.input().get_output_layout(), output_layout, weights_layout, prim)) {
-            expected_tensor = current_layout.get_tensor();
            expected_format = cldnn::format::bs_fs_yx_bsv16_fsv16;
        } else if (_optimization_attributes.fs_b_yx_fsv32_network && !node.get_transposed() &&
                ((convolution_fs_b_yx_fsv32_opt(input_layout,
@ -1109,72 +1101,67 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
            //                                          2-nd: the previous conv primitive supports fs_b_yx_fsv32 layout and
            //                                                current conv primitives supports this one with weak restrictions -
            //                                                that should be cheaper than reordering data to another layout
-            expected_tensor = current_layout.get_tensor();
            expected_format = format::fs_b_yx_fsv32;
        } else if (should_select_b_fs_yx_fsv16_layout(node, weights_layout)) {
-            expected_tensor = current_layout.get_tensor();
            expected_format = cldnn::format::b_fs_yx_fsv16;
-        } else if (current_layout.data_type == data_types::f16 &&
-                    layout_optimizer::convolution_byxf_opt(input_layout, current_layout, weights_layout, node) &&
+        } else if (output_layout.data_type == data_types::f16 &&
+                    layout_optimizer::convolution_byxf_opt(input_layout, output_layout, weights_layout, node) &&
                    (users_for_convolution_byxf_opt(node, 2) || deps_for_convolution_byxf_opt(node, 2)) &&
                    // todo: remove this condition when yxfb optimizations will be disabled
-                    current_layout.format != cldnn::format::yxfb && current_layout.batch() == 1) {
-            expected_tensor = current_layout.get_tensor();
+                    output_layout.format != cldnn::format::yxfb && output_layout.batch() == 1) {
            expected_format = cldnn::format::byxf;
-        } else if (current_layout.format == format::b_fs_yx_fsv4 ||
-                    current_layout.format == format::os_is_yx_osv16_isv4) {
+        } else if (output_layout.format == format::b_fs_yx_fsv4 ||
+                    output_layout.format == format::os_is_yx_osv16_isv4) {
            // imad case
            // nothing to do, just go out from here.
-        } else if (layout_optimizer::convolution_bfyx_opt(current_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) {
-            expected_tensor = current_layout.get_tensor();
-            if (current_layout.format == format::b_fs_zyx_fsv16 || current_layout.format == format::bs_fs_zyx_bsv16_fsv16)
+        } else if (layout_optimizer::convolution_bfyx_opt(output_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) {
+            {
+                if (output_layout.format == format::b_fs_zyx_fsv16 || output_layout.format == format::bs_fs_zyx_bsv16_fsv16)
                    expected_format = cldnn::format::bfzyx;
                else
                    expected_format = cldnn::format::bfyx;
+            }
        } else {
-            expected_tensor = current_layout.get_tensor();
            expected_format = cldnn::format::yxfb;
        }
    }

-    return layout(expected_data_type, expected_format, expected_tensor);
+    return expected_format;
 }

-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             deconvolution_node const& node,
-                                             layout const& output_or_weights_layout) {
+format layout_optimizer::get_expected_format(deconvolution_node const& node) {
    auto prim = node.get_primitive();
-    auto expected_data_type = current_layout.data_type;
-    auto expected_format = current_layout.format;
-    auto input_layout = node.get_dependency(0).get_output_layout();
-    auto output_layout = node.calc_output_layout();
+    auto input_layout = node.get_input_layout(0);
+    auto output_layout = node.get_output_layout(0);
+    auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(node.get_primitive()->grouped_weights_shape);
+    auto expected_format = output_layout.format;

    if (input_layout.is_dynamic() || output_layout.is_dynamic()) {
        if (input_layout.get_partial_shape().size() <= 4)
            expected_format = format::b_fs_yx_fsv16;
        else if (input_layout.get_partial_shape().size() == 5)
            expected_format = format::b_fs_zyx_fsv16;
-        return layout(current_layout.get_partial_shape(), expected_data_type, expected_format);
+        return expected_format;
    }

-    auto expected_tensor = current_layout.get_tensor();
+    auto expected_shape = output_layout.get_shape();
    bool use_onednn_impls = _optimization_attributes.use_onednn_impls;

    if (use_onednn_impls && is_node_for_onednn(node)) {
        // XXX: need to take the situation into consideration where it is called from prepare_primitive_fusing
        expected_format = node.get_preferred_output_fmt();
    } else if (_optimization_attributes.b_fs_zyx_fsv16_network &&
-        deconvolution_b_fs_zyx_fsv16_opt(current_layout, output_or_weights_layout, prim)) {
-        if ((current_layout.data_type == data_types::f32 && expected_tensor.batch[0] % 16 == 0) ||
-            (current_layout.data_type == data_types::f16 && expected_tensor.batch[0] % 32 == 0))
+        deconvolution_b_fs_zyx_fsv16_opt(output_layout, weights_layout, prim)) {
+        if ((output_layout.data_type == data_types::f32 && expected_shape[0] % 16 == 0) ||
+            (output_layout.data_type == data_types::f16 && expected_shape[0] % 32 == 0))
            expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16;
        else
            expected_format = cldnn::format::b_fs_zyx_fsv16;
    } else if ((_optimization_attributes.b_fs_yx_fsv16_network) &&
-               deconvolution_b_fs_yx_fsv16_opt(current_layout, output_or_weights_layout, prim)) {
-        auto input_tensor = node.get_dependency(0).get_output_layout().get_tensor();
-        int input_features = input_tensor.feature[0];
-        int output_features = expected_tensor.feature[0];
+               deconvolution_b_fs_yx_fsv16_opt(output_layout, weights_layout, prim)) {
+        auto input_shape = input_layout.get_shape();
+        auto input_features = input_shape[1];
+        auto output_features = expected_shape[1];
        float f_cost = static_cast<float>(input_features * output_features) / (align_to(input_features, 16) * align_to(output_features, 16));
        float stride_cost = 1 / static_cast<float>(prim->stride[prim->stride.size() - 1]);
        if (f_cost * stride_cost > 0.1f)
@ -1182,33 +1169,80 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
        else
            expected_format = cldnn::format::bfyx;
    }
-    return layout(expected_data_type, expected_format, expected_tensor);
+    return expected_format;
 }

-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             detection_output_node const& node,
-                                             layout const& output_or_weights_layout) {
-    auto prim = node.get_primitive();
-    auto expected_tensor = current_layout.get_tensor();
-    auto expected_data_type = data_types::f32;
-    auto expected_format = output_or_weights_layout.format;
+format layout_optimizer::get_expected_format(quantize_node const& node) {
+    auto layout = node.get_output_layout();
+    auto expected = format::any;

-    return layout(expected_data_type, expected_format, expected_tensor);
-}
+    std::function<bool(const program_node& node)> only_gemm_users = [&](const program_node& node) {
+        bool all_users_gemm = true;

-layout layout_optimizer::get_expected_layout(layout const& current_layout,
-                                             binary_convolution_node const& node,
-                                             layout const& /*output_or_weights_layout*/) {
-    auto prim = node.get_primitive();
-    auto expected_tensor = current_layout.get_tensor();
-    auto expected_data_type = data_types::bin;
-    auto expected_format = cldnn::format::b_fs_yx_32fp;
+        for (auto user : node.get_users()) {
+            if (user->is_type<reorder>() || user->is_type<reshape>())
+                all_users_gemm &= only_gemm_users(*user);
+            else if (user->is_type<gemm>())
+                all_users_gemm &= true;
+            else
+                return false;
+        }

-    return layout(expected_data_type, expected_format, expected_tensor);
+        return all_users_gemm;
+    };
+
+    auto use_onednn_impls = _optimization_attributes.use_onednn_impls;
+
+    if (use_onednn_impls) {
+        auto& user = node.get_users().front();
+        if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) {
+            expected = user->get_preferred_input_fmt(user->get_dependency_index(node));
+        } else {
+            expected = format::any;
+        }
+    } else if (only_gemm_users(node)) {
+        // TODO: Gemm is not supporting fsv layouts
+        expected = format::get_default_format(node.get_output_layout().format.dimension());
+        // TODO: check other types for first conv
+    } else if (layout.is_static() && layout.format.spatial_num() == 2 &&
+                (layout.data_type == data_types::i8 || layout.data_type == data_types::u8) &&
+                layout.batch() % 16 == 0) {
+        if (use_onednn_impls && layout.batch() % 32 == 0) {
+            if (node.get_users().size() == 1 && node.get_users().front()->is_type<convolution>()) {
+                auto& conv = node.get_users().front()->as<convolution>();
+                auto ws = conv.get_input_layout(1).get_tensor();
+                if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1)
+                    expected = format::bfyx;
+                else
+                    expected = format::bs_fs_yx_bsv16_fsv4;
+
+                auto conv_output_layout = conv.get_output_layout();
+                auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape);
+                format expected_conv_fmt = get_expected_format(conv);
+                if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0)
+                    expected = expected_conv_fmt;
+            }
+        } else if (layout.feature() > 8) {
+            expected = format::b_fs_yx_fsv16;
+        } else {
+            expected = format::b_fs_yx_fsv4;
+        }
+    } else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) {
+        expected = format::b_fs_zyx_fsv16;
+    }
+
+    // In case of input -> ... -> quantize -> concat
+    if (layout.is_static() && expected == format::any
+        && (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>())
+        && (layout.batch() < 4 && layout.feature() < 4)) {
+        expected = format::get_default_format(layout.get_rank(), false, false);
+    }
+
+    return expected;
 }

 bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
-    auto in_dt = node.get_dependency(0).get_output_layout(false).data_type;
+    auto in_dt = node.get_input_layout(0).data_type;
    auto out_dt = node.get_output_layout(false).data_type;

    // Generally, fp32 input does NOT use oneDNN
@ -1230,10 +1264,10 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
    } else if (node.is_type<fully_connected>() || node.is_type<gemm>()) {
        bool is_fc = node.is_type<fully_connected>();
        auto wei_dt = is_fc ? node.as<fully_connected>().weights().get_output_layout().data_type :
-                              node.as<gemm>().get_dependency(1).get_output_layout().data_type;
+                              node.as<gemm>().get_input_layout(1).data_type;
        return onednn_check_data_types_for_fc_gemm(in_dt, wei_dt, out_dt);
    } else if (node.is_type<reorder>()) {
-        auto input_fmt = node.get_dependency(0).get_output_layout().format;
+        auto input_fmt = node.get_input_layout(0).format;
        auto output_fmt = node.get_output_layout().format;

        // For mixed precision case, oneDNN is slower than clDNN
@ -1398,7 +1432,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
            format::bs_fs_yx_bsv32_fsv16,
            format::bs_fs_yx_bsv32_fsv32,
        };
-        if (blocked_formats.find(node.get_dependency(0).get_output_layout().format) != blocked_formats.end()) {
+        if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) {
            preferred_impl = impl_types::ocl;
        } else {
            auto& nms_node = node.as<non_max_suppression>();
@ -1440,7 +1474,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
            format::bs_fs_yx_bsv32_fsv32,
        };

-        auto input_layout = node.get_dependency(0).get_output_layout();
+        auto input_layout = node.get_input_layout(0);
        auto output_layout = node.get_output_layout();

        auto input_fmt = input_layout.format;
@ -1597,13 +1631,13 @@ format layout_optimizer::get_preferred_format(program_node& node) {

    if (allow_new_shape_infer) {
        if (node.is_type<shape_of>())
-            return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
+            return format::get_default_format(node.get_input_layout(0).get_rank());

        // Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa
        auto out_lay_rank = node.get_output_layout(false).get_rank();
        auto dep_size = node.get_dependencies().size();
        for (size_t i = 0; i < dep_size; i++) {
-            auto in_lay_rank = node.get_dependency(i).get_output_layout(false).get_rank();
+            auto in_lay_rank = node.get_input_layout(i).get_rank();
            const auto& shape_infer_deps = node.get_shape_infer_dependencies();
            if (std::find(shape_infer_deps.begin(), shape_infer_deps.end(), i) != shape_infer_deps.end()) {
                auto fmt = format::get_default_format(in_lay_rank, false, false);
@ -1631,80 +1665,11 @@ format layout_optimizer::get_preferred_format(program_node& node) {
    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
        expected = _forcing_map.at(node.id()).first;
    } else if (node.is_type<convolution>()) {
-        auto& conv_node = node.as<convolution>();
-        auto weights_layout = conv_node.weights().get_output_layout().convert_to_weights_layout(conv_node.get_primitive()->grouped_weights_shape);
-        expected = get_expected_layout(output_layout, conv_node, weights_layout).format;
+        expected = get_expected_format(node.as<convolution>());
    } else if (node.is_type<binary_convolution>()) {
-        auto& bconv_node = node.as<binary_convolution>();
-        auto weights_layout = bconv_node.weights().get_output_layout().convert_to_weights_layout(false);
-        expected = get_expected_layout(output_layout, bconv_node, weights_layout).format;
-    } else if (node.is_type<detection_output>()) {
-        expected = get_expected_layout(
-            output_layout,
-            node.as<detection_output>(),
-            layout{ data_types::f32, format::bfyx, tensor{} }).format;
+        expected = cldnn::format::b_fs_yx_32fp;
    } else if (node.is_type<quantize>()) {
-        auto layout = node.get_output_layout();
-
-        std::function<bool(const program_node& node)> only_gemm_users = [&](const program_node& node) {
-            bool all_users_gemm = true;
-
-            for (auto user : node.get_users()) {
-                if (user->is_type<reorder>() || user->is_type<reshape>())
-                    all_users_gemm &= only_gemm_users(*user);
-                else if (user->is_type<gemm>())
-                    all_users_gemm &= true;
-                else
-                    return false;
-            }
-
-            return all_users_gemm;
-        };
-
-        if (use_onednn_impls) {
-            auto& user = node.get_users().front();
-            if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) {
-                expected = user->get_preferred_input_fmt(user->get_dependency_index(node));
-            } else {
-                expected = format::any;
-            }
-        } else if (only_gemm_users(node)) {
-            // TODO: Gemm is not supporting fsv layouts
-            expected = format::get_default_format(node.get_output_layout().format.dimension());
-            // TODO: check other types for first conv
-        } else if (layout.is_static() && layout.format.spatial_num() == 2 &&
-                  (layout.data_type == data_types::i8 || layout.data_type == data_types::u8) &&
-                  layout.batch() % 16 == 0) {
-            if (use_onednn_impls && layout.batch() % 32 == 0) {
-                if (node.get_users().size() == 1 && node.get_users().front()->is_type<convolution>()) {
-                    auto& conv = node.get_users().front()->as<convolution>();
-                    auto ws = conv.get_dependency(1).get_output_layout().get_tensor();
-                    if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1)
-                        expected = format::bfyx;
-                    else
-                        expected = format::bs_fs_yx_bsv16_fsv4;
-
-                    auto conv_output_layout = conv.get_output_layout();
-                    auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape);
-                    format expected_conv_fmt = get_expected_layout(conv_output_layout, conv, weights_layout).format;
-                    if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0)
-                        expected = expected_conv_fmt;
-                }
-            } else if (layout.feature() > 8) {
-                expected = format::b_fs_yx_fsv16;
-            } else {
-                expected = format::b_fs_yx_fsv4;
-            }
-        } else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) {
-            expected = format::b_fs_zyx_fsv16;
-        }
-
-        // In case of input -> ... -> quantize -> concat
-        if (layout.is_static() && expected == format::any
-            && (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>())
-            && (layout.batch() < 4 && layout.feature() < 4)) {
-                expected = format::get_default_format(layout.get_rank(), false, false);
-        }
+        expected = get_expected_format(node.as<quantize>());
    } else if (node.is_type<reorder>() || node.is_type<input_layout>()) {
        if (node.is_type<reorder>() && node.as<reorder>().get_primitive()->has_surface_input()) {
            expected = format::nv12;
@ -1712,13 +1677,11 @@ format layout_optimizer::get_preferred_format(program_node& node) {
            expected = node.get_output_layout().format;
        }
    } else if (node.is_type<reshape>()) {
-        expected = format::get_default_format(node.get_output_layout().format.dimension());
+        expected = format::get_default_format(node.get_output_layout().get_rank());
    } else if (node.is_type<deconvolution>()) {
-        auto& deconv_node = node.as<deconvolution>();
-        auto weights_layout = deconv_node.weights().get_output_layout().convert_to_weights_layout(deconv_node.get_primitive()->grouped_weights_shape);
-        expected = get_expected_layout(output_layout, deconv_node, weights_layout).format;
+        expected = get_expected_format(node.as<deconvolution>());
    } else if (node.is_type<mvn>()) {
-        auto input_layout = node.get_dependency(0).get_output_layout();
+        auto input_layout = node.get_input_layout(0);
        if (input_layout.format.dimension() == 5 &&
            (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16))
            expected = format::bfzyx;
@ -1810,7 +1773,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
                    can_optimize_permute = pnode.get_users().size() == 1
                        && pnode.get_output_layout().data_type == node.get_output_layout().data_type
                        && !pnode.has_fused_primitives()
-                        && !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static()
+                        && !pnode.is_output() && pnode.get_input_layout(0).is_static()
                        && pnode.is_reverse_rotating_except_batch();
                }
                if (!can_optimize_permute) {
@ -1846,7 +1809,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
                auto& pnode = node.get_users().front()->as<permute>();
                auto can_optimize_permute = pnode.get_output_layout().data_type == node.get_output_layout().data_type
                    && !pnode.has_fused_primitives()
-                    && !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static()
+                    && !pnode.is_output() && pnode.get_input_layout(0).is_static()
                    && pnode.is_rotating_except_batch();
                if (can_optimize_permute) {
                    dst_fmt = format::byxf;
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -799,8 +799,8 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
    , _org_id(node.get_org_primitive_id())
    , _is_input(node.is_input())
    , _is_output(node.is_output())
-    , _inputs_memory_count(node.get_primitive()->input_size())
-    , _outputs_memory_count(node.get_primitive()->output_size())
+    , _inputs_memory_count(node.get_inputs_count())
+    , _outputs_memory_count(node.get_outputs_count())
    , _fused_mem_count(node.get_fused_inputs_count())
    , _fused_mem_offset((_fused_mem_count > 0 && node.has_fused_dep()) ? node.get_first_fused_dep_idx() : 0)
    , _can_be_optimized(node.can_be_optimized())
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -1054,7 +1054,7 @@ void program::fuse_nodes(program_node &fused_node,
    fused_primitive_desc local_desc(peer_node.get_primitive());
    local_desc.f_param = get_node_ptr(peer_node.id())->get_fuse_params();
    local_desc.total_num_deps = peer_node.get_dependencies().size();
-    local_desc.input_layout = peer_node.get_dependency(0).get_output_layout();
+    local_desc.input_layout = peer_node.get_input_layout(0);
    local_desc.output_layout = peer_layout;

    if (fused_node.in_shape_of_subgraph && !peer_node.in_shape_of_subgraph) {
@ -1350,7 +1350,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {

            if (!conv.is_dynamic()) {
                // In dynamic shape, conv is fixed as a predefined format b_fs_yx_fsv16
-                auto input_size = node->get_dependency(0).get_output_layout().get_tensor();
+                auto input_size = node->get_input_layout(0).get_tensor();
                auto ifm = static_cast<uint32_t>(input_size.feature[0]);
                if (conv.get_primitive()->groups == ifm && conv.get_primitive()->groups >= 16)
                    total_dw_conv_layers++;
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@ -219,7 +219,7 @@ void program_node::remove_dependency(program_node& node) {
            remove_dependency(i);
 }

-size_t program_node::get_user_index(program_node& node) const {
+size_t program_node::get_user_index(const program_node& node) const {
    size_t idx = 0;
    for (auto& user : users) {
        if (user == &node)
@ -231,7 +231,7 @@ size_t program_node::get_user_index(program_node& node) const {
    OPENVINO_ASSERT(false, "Search invalid user node" + node.id() + " node");
 }

-size_t program_node::get_dependency_index(program_node& node) const {
+size_t program_node::get_dependency_index(const program_node& node) const {
    for (size_t i = 0; i < dependencies.size(); ++i)
        if (dependencies[i].first == &node)
            return i;
@ -1268,4 +1268,3 @@ void program_node::init_onednn_primitive_attributes() {


 #endif // ENABLE_ONEDNN_FOR_GPU
-
--- a/src/plugins/intel_gpu/src/graph/select.cpp
+++ b/src/plugins/intel_gpu/src/graph/select.cpp
@ -64,7 +64,7 @@ std::string select_inst::to_string(select_node const& node) {
    std::stringstream primitive_description;

    json_composite select_info;
-    for (size_t i = 0; i < node.inputs_count(); i++) {
+    for (size_t i = 0; i < node.get_inputs_count(); i++) {
        select_info.add("input_" + std::to_string(i), node.input(i).id());
    }

--- a/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/tests/unit/passes/handle_reshape.cpp
@ -91,7 +91,7 @@ TEST(handle_reshape, skip_reorder_node_to_split_when_onndnn_not_support) {

    ASSERT_NE(prog, nullptr);

-    ASSERT_TRUE(prog->get_node("matmul").get_dependency(0).get_output_layout().data_type == data_types::f16);
+    ASSERT_TRUE(prog->get_node("matmul").get_input_layout(0).data_type == data_types::f16);
 }

 TEST(handle_reshape, correct_parameters_propagation) {
@ -123,8 +123,8 @@ TEST(handle_reshape, correct_parameters_propagation) {

    ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());

-    auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape();
-    auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape();
+    auto out_shape0 = prog->get_node("e2").get_output_pshape();
+    auto out_shape1 = prog->get_node("reorder").get_output_pshape();

    ov::PartialShape expected_out_shape{2, 12};

@ -171,8 +171,8 @@ TEST(handle_reshape, correct_parameters_propagation_2_inputs) {
    ASSERT_TRUE(reshape_split_node.is_type<reshape>());
    ASSERT_EQ(reshape_split_node.get_dependencies().size(),  2);

-    auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape();
-    auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape();
+    auto out_shape0 = prog->get_node("e2").get_output_pshape();
+    auto out_shape1 = prog->get_node("reorder").get_output_pshape();

    ov::PartialShape expected_out_shape{2, 12};