[GPU] Support cross-data type from detection output (#13125)

* detection_output kernel support cross-type(fp16/fp32),
  so graph-optimization also support cross-type detection_output

Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
hyunback kim 2022-09-22 14:07:10 +09:00 committed by GitHub
parent f2c0e0b4d7
commit 079021c673
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 11 deletions

View File

@ -332,7 +332,7 @@ void remove_redundant_reorders::run(program& p) {
bool same_data_type = input.get_output_layout().data_type == output_layout.data_type;
bool allowed_dt_conversion_fuse = (input.is_type<one_hot>() || input.is_type<permute>() ||
input.is_type<depth_to_space>() || input.is_type<region_yolo>());
input.is_type<depth_to_space>() || input.is_type<region_yolo>() || input.is_type<detection_output>());
if (!same_data_type && !allowed_dt_conversion_fuse)
continue;

View File

@ -581,6 +581,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
}
const auto reorder_input_detection_output = [&p, &rf](typed_program_node<detection_output>& detection_output_node) {
if (detection_output_node.get_preferred_impl_type() == impl_types::cpu) {
auto detection_output_prim = detection_output_node.get_primitive();
for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++) {
@ -593,6 +594,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
p.add_intermediate(new_input.first, detection_output_node, i, !new_input.second);
}
}
}
};
const auto reorder_input_binary_convolution = [&p, &rf](typed_program_node<binary_convolution>& binary_conv_node) {

View File

@ -365,8 +365,8 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node
if (prev.is_dynamic() || (next && next->is_dynamic()))
return false;
// Ref kernels are the main for depth_to_space and region_yolo. It can do anything. Should not see next.
if (prev.is_type<depth_to_space>() || prev.is_type<region_yolo>())
// Ref kernels are the main for depth_to_space, region_yolo and detection_output. It can do anything. Should not see next.
if (prev.is_type<depth_to_space>() || prev.is_type<region_yolo>() || prev.is_type<detection_output>())
return true;
if (next == nullptr)

View File

@ -17,6 +17,7 @@ ParamsKey DetectionOutputKernelRef::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableDifferentTypes();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();