fix(lsp): str_byteindex_enc bounds checking #30747

Problem:
Previously the index was only checked against the UTF8 length. This
could cause unexpected behaviours for strings containing multibyte chars

Solution:
Check indicies correctly against their max value before returning the
fallback length
This commit is contained in:
Tristan Knight 2024-10-16 17:12:19 +01:00 committed by GitHub
parent f72dc2b4c8
commit 80e37aa533
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -171,25 +171,24 @@ end
---@param encoding string utf-8|utf-16|utf-32| defaults to utf-16
---@return integer byte (utf-8) index of `encoding` index `index` in `line`
function M._str_byteindex_enc(line, index, encoding)
local len = #line
if index > len then
-- LSP spec: if character > line length, default to the line length.
-- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
return len
end
-- LSP spec: if character > line length, default to the line length.
-- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
local len8 = #line
if not encoding then
encoding = 'utf-16'
end
if encoding == 'utf-8' then
if index then
if index and index <= len8 then
return index
else
return len
return len8
end
elseif encoding == 'utf-16' then
return vim.str_byteindex(line, index, true)
end
local len32, len16 = vim.str_utfindex(line)
if encoding == 'utf-16' then
return index <= len16 and vim.str_byteindex(line, index, true) or len8
elseif encoding == 'utf-32' then
return vim.str_byteindex(line, index)
return index <= len32 and vim.str_byteindex(line, index) or len8
else
error('Invalid encoding: ' .. vim.inspect(encoding))
end