[GPU] Optimize DepthToSpace (#11761)

* lws optimize
This commit is contained in:
Felix Dohyun Kim
2022-06-13 11:17:22 +09:00
committed by GitHub
parent 0066ddbd22
commit d831047f30

View File

@@ -40,8 +40,14 @@ CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params
params.outputs[0].Feature().v,
params.outputs[0].Z().v * params.outputs[0].Y().v * params.outputs[0].X().v };
// this kernel only supports bfyx and b_fs_yx_fsv16 layout.
// The reason why reverse input/output of GetOptimalLocalWorkGroupSizes():
// Large X*Y*Z lws size is better than large batch lws, but current GetOptimalLocalWorkGroupSizes not work like that.
reverse(dims_by_gws.begin(), dims_by_gws.end());
reverse(dispatchData.gws.begin(), dispatchData.gws.end());
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout, dims_by_gws);
reverse(dispatchData.lws.begin(), dispatchData.lws.end());
reverse(dispatchData.gws.begin(), dispatchData.gws.end());
return dispatchData;
}