From bee357bcf80927bf90ccd4657d04ed1d2718498f Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Wed, 12 Apr 2023 12:01:31 -0700 Subject: [PATCH] Fix softmax perf of stable diffusion (#16869) --- .../src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp index 1474ee9d1d6..8bd8fabed72 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp @@ -54,9 +54,9 @@ SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softma } dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0]; - //if (dispatchData.leftovers % subgroup_size) { // To use subgroup read/write, the starting address should be aligned to 128 bit - if ((dispatchData.dataSetSize * params.inputs[0].ElementSize()) >> 4) { + size_t dataSetSizeInByte = dispatchData.dataSetSize * params.inputs[0].ElementSize(); + if ((dispatchData.dataSetsCount > 1) && ((dataSetSizeInByte - ((dataSetSizeInByte >> 4) << 4)))) { dispatchData.subgroupBlockSize = 1; } else { if (dispatchData.itemsNum >> 3)