diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index a478cf290fd..1eb61449675 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -332,7 +332,7 @@ void remove_redundant_reorders::run(program& p) { bool same_data_type = input.get_output_layout().data_type == output_layout.data_type; bool allowed_dt_conversion_fuse = (input.is_type() || input.is_type() || - input.is_type() || input.is_type()); + input.is_type() || input.is_type() || input.is_type()); if (!same_data_type && !allowed_dt_conversion_fuse) continue; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 5128dc6aa47..5d16e800a97 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -581,16 +581,18 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) } const auto reorder_input_detection_output = [&p, &rf](typed_program_node& detection_output_node) { - auto detection_output_prim = detection_output_node.get_primitive(); + if (detection_output_node.get_preferred_impl_type() == impl_types::cpu) { + auto detection_output_prim = detection_output_node.get_primitive(); - for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++) { - auto& input = detection_output_node.get_dependency(i); - auto new_input = rf.get_reorder(input.id(), - input.get_output_layout(), - layout{ data_types::f32, format::bfyx, input.get_output_layout().get_tensor() }); + for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++) { + auto& input = detection_output_node.get_dependency(i); + auto new_input = rf.get_reorder(input.id(), + input.get_output_layout(), + layout{ data_types::f32, format::bfyx, input.get_output_layout().get_tensor() }); - if (new_input.first) { - p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second); + if (new_input.first) { + p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second); + } } } }; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index d739c409156..eea226dff0c 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -365,8 +365,8 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node if (prev.is_dynamic() || (next && next->is_dynamic())) return false; - // Ref kernels are the main for depth_to_space and region_yolo. It can do anything. Should not see next. - if (prev.is_type() || prev.is_type()) + // Ref kernels are the main for depth_to_space, region_yolo and detection_output. It can do anything. Should not see next. + if (prev.is_type() || prev.is_type() || prev.is_type()) return true; if (next == nullptr) diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp index e2b57ade263..18f55477d8b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp @@ -17,6 +17,7 @@ ParamsKey DetectionOutputKernelRef::GetSupportedKey() const { k.EnableOutputDataType(Datatype::F32); k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); + k.EnableDifferentTypes(); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching();