diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index abbdbe1ce1b..e87b90537e7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -70,7 +70,7 @@ void prepare_primitive_fusing::remove_redundant_reshape(program &p) { for (auto prev : node.get_dependencies()) { if (!prev.first->is_type()) return; - if (prev.first->get_users().size() > 1) + if (prev.first->get_users().size() > 1 || prev.first->get_dependencies().size() > 1) return; if (prev.first->as().input().get_output_layout() == node.get_output_layout()) { p.add_optimized_primitive_info(prev.first->id()); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index a6f7755d908..f072f798fac 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -153,6 +153,9 @@ void primitive_inst::update_shape() { auto idx = _deps[i].second; auto new_shape = _deps[i].first->_impl_params->get_output_layout(idx); if (_impl_params->get_input_layout(i) != new_shape) { + GPU_DEBUG_TRACE_DETAIL << id() << ": update shape dep: " << _deps[i].first->id() + << " was: " << _impl_params->get_input_layout(i).to_short_string() + << " now: " << new_shape.to_short_string() << std::endl; _impl_params->input_layouts[i] = new_shape; input_shape_changed = true; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp index c5dc9a2db62..4e15c2d8fef 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_kernel_ref.cpp @@ -297,7 +297,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional 2, GetFusedPrimitiveInputsCount(params), 1, - newParams.outputs[0].is_dynamic()); + newParams.has_dynamic_tensors()); return {kd}; } diff --git a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp index 6a7d397edb2..b4175a32ed7 100644 --- a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp @@ -11,6 +11,8 @@ #include "data_inst.h" #include "eltwise_inst.h" #include "reduce_inst.h" +#include "reshape_inst.h" +#include "gemm_inst.h" #include "pass_manager.h" #include "to_string_utils.h" @@ -403,3 +405,38 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_2) { ASSERT_EQ(lock[2], 93); ASSERT_EQ(lock[3], 94); } + +TEST(prepare_primitive_fusing, dont_remove_only_dep_reshape) { + // Topology: + // input -> reshape(w/ 2nd non-const input) -> reshape(w/ 2nd const input) -> gemm + // + // Expectation: + // If only the input size of depedency reshape is not 1 among the sequence of reshapes + // The current reshape alone should not be removed, and removing redundant reshapes is skipped + + auto& engine = get_test_engine(); + auto in_layout = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx }; + auto pattern_layout = layout{ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }; + + std::vector output_pattern { 0, 1, -1, 0 }; + + topology topology; + topology.add(input_layout("input1", in_layout)); + topology.add(input_layout("pattern1", pattern_layout)); + topology.add(input_layout("input2", in_layout)); + topology.add(reshape("reshape1", input_info("input1"), input_info("pattern1"), true, ov::PartialShape::dynamic(4))); + topology.add(reshape("reshape2", input_info("reshape1"), true, output_pattern, ov::PartialShape::dynamic(4))); + topology.add(gemm("gemm", { input_info("reshape2"), input_info("input2") }, data_types::f32, false, false)); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); + + layout_optimizer lo(true); + + program_wrapper::apply_opt_pass(*prog, lo); + + ASSERT_NE(prog, nullptr); + ASSERT_TRUE(has_node(*prog, "reshape2")); +}