[GPU] Update tuning params of shape agnostic version of fully_connected_bf_tiled kernel for dGPUs (#16482)
This commit is contained in:
parent
7601e8a874
commit
e434c320f5
@ -201,12 +201,21 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params,
|
||||
max_tile_ofm *= 2;
|
||||
|
||||
if (params.is_shape_agnostic) {
|
||||
// Use special tuning params for Gen12HP dGPUs, since these parameters demonstrate higher performance
|
||||
// due to better HW utilization (reduced TILE_OFM parameter) and better assembler kernel's code
|
||||
// generation (extended TILE_K parameter) for both FP16 and FP32 data types
|
||||
if (dtype == Datatype::F16) {
|
||||
// tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options)
|
||||
selector.Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, EXE_MODE_AGE_BASED));
|
||||
if (params.engineInfo.supports_immad)
|
||||
selector.Case(tune_params(8, 1, 1, 4, 1, 1, EXE_MODE_AGE_BASED));
|
||||
else
|
||||
selector.Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, EXE_MODE_AGE_BASED));
|
||||
} else if (dtype == Datatype::F32) {
|
||||
// tune_params(tile_b, tile_ofm, tile_ifm, tile_k, dispatch_bsv, dispatch_fsv, exec_options)
|
||||
selector.Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 1, 1, 1, EXE_MODE_AGE_BASED));
|
||||
if (params.engineInfo.supports_immad)
|
||||
selector.Case(tune_params(8, 1, 1, 4, 1, 1, EXE_MODE_AGE_BASED));
|
||||
else
|
||||
selector.Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 1, 1, 1, EXE_MODE_AGE_BASED));
|
||||
}
|
||||
} else {
|
||||
if (dtype == Datatype::F16) {
|
||||
|
Loading…
Reference in New Issue
Block a user