diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp index 1474ee9d1d6..8bd8fabed72 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/softmax/softmax_kernel_bf.cpp @@ -54,9 +54,9 @@ SoftmaxKernel_bf::Parent::DispatchData SoftmaxKernel_bf::SetDefault(const softma } dispatchData.leftovers = dispatchData.dataSetSize % dispatchData.lws[0]; - //if (dispatchData.leftovers % subgroup_size) { // To use subgroup read/write, the starting address should be aligned to 128 bit - if ((dispatchData.dataSetSize * params.inputs[0].ElementSize()) >> 4) { + size_t dataSetSizeInByte = dispatchData.dataSetSize * params.inputs[0].ElementSize(); + if ((dispatchData.dataSetsCount > 1) && ((dataSetSizeInByte - ((dataSetSizeInByte >> 4) << 4)))) { dispatchData.subgroupBlockSize = 1; } else { if (dispatchData.itemsNum >> 3)