[GPU] Gemm opt tile_n min size fix (#10369)
This commit is contained in:
@@ -60,17 +60,14 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem
|
||||
auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v);
|
||||
tuning_data.simd_size = 8;
|
||||
|
||||
if (n_size >= 8) {
|
||||
tuning_data.tile_n_size = tuning_data.simd_size;
|
||||
|
||||
while (tuning_data.tile_n_size < 64 && n_size / (tuning_data.tile_n_size * 2) >= 1) {
|
||||
tuning_data.tile_n_size *= 2;
|
||||
}
|
||||
tuning_data.tile_n_size = tuning_data.simd_size;
|
||||
while (tuning_data.tile_n_size < 64 && n_size / (tuning_data.tile_n_size * 2) >= 1) {
|
||||
tuning_data.tile_n_size *= 2;
|
||||
}
|
||||
|
||||
// tuning_data.tile_k_size must be the same as simd_size when k % tile_k != 0
|
||||
tuning_data.tile_k_size = tuning_data.simd_size;
|
||||
tuning_data.tile_m_size = 8;
|
||||
tuning_data.tile_m_size = tuning_data.simd_size;
|
||||
|
||||
bool leftovers = m_size % tuning_data.tile_m_size || k_size % tuning_data.tile_k_size || n_size % tuning_data.tile_n_size;
|
||||
|
||||
@@ -78,7 +75,7 @@ GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gem
|
||||
tuning_data.simd_size = 16;
|
||||
tuning_data.tile_n_size = tuning_data.simd_size;
|
||||
tuning_data.tile_k_size = tuning_data.simd_size;
|
||||
tuning_data.tile_m_size = 16;
|
||||
tuning_data.tile_m_size = tuning_data.simd_size;
|
||||
}
|
||||
|
||||
return tuning_data;
|
||||
|
||||
@@ -16,7 +16,7 @@ public:
|
||||
size_t simd_size = 8;
|
||||
size_t tile_m_size = 1;
|
||||
size_t tile_k_size = 1;
|
||||
size_t tile_n_size = 1;
|
||||
size_t tile_n_size = 8;
|
||||
};
|
||||
|
||||
GemmKernelTiledOpt() : GemmKernelBase("gemm_tiled_opt") {}
|
||||
|
||||
Reference in New Issue
Block a user