[GPU] apply LoadType change condition when input1 fs != output fs(broadcast) in eltwise fusing (#12710)
* apply condition when input1 fs != output fs(broadcast) * modify condition of LT unaligned read transition
This commit is contained in:
parent
2d04f93486
commit
cf5d93b273
@ -1877,10 +1877,13 @@ std::string FusedOpsCodeGenerator::GetJitLoad(const FusedOpsConfiguration& conf,
|
||||
|
||||
bool safe_load = conf.boundary_check == FusedOpsConfiguration::BoundaryCheck::ENABLED;
|
||||
|
||||
// Fsv16 Eltwise whcih requires f axis broadcast such as input[1,1,z,1,1], output[b,f,z,y,x] need to use LT unligned read.
|
||||
// In this case, intel_sub_group_block_read() introduces increasing index in feature block.
|
||||
bool f_axis_broadcast = ((input_tensor.Feature().v != prim_output.Feature().v) && (input_tensor.Feature().v == 1) && (vec_size == 1));
|
||||
// Change JitLoad to ignore LT_ALIGNED_READ LoadType if this input tensor has a planar format(SimpleLayout)
|
||||
if (desc.GetType() == KernelType::ELTWISE && input_tensor.SimpleLayout() && input_tensor.GetLayout() != orig_output_layout &&
|
||||
conf.load_type == FusedOpsConfiguration::LoadType::LT_ALIGNED_READ &&
|
||||
input_tensor.SameDimsSizes(prim_output) && input_tensor.LogicalSize() != 1) {
|
||||
(input_tensor.SameDimsSizes(prim_output) || f_axis_broadcast) && input_tensor.LogicalSize() != 1) {
|
||||
std::string sub_group_local_id_str = "get_sub_group_local_id";
|
||||
size_t found_sub = conf.bfzyx_idx_order[1].rfind(sub_group_local_id_str);
|
||||
if (found_sub != std::string::npos) {
|
||||
|
Loading…
Reference in New Issue
Block a user