mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
refactor(lsp): use LPeg for watchfiles matching (#23788)
This commit is contained in:
parent
3c6d971e54
commit
416fe8d185
@ -1,152 +1,81 @@
|
||||
local bit = require('bit')
|
||||
local lpeg = require('lpeg')
|
||||
local watch = require('vim._watch')
|
||||
local protocol = require('vim.lsp.protocol')
|
||||
|
||||
local M = {}
|
||||
|
||||
---@private
|
||||
---Parses the raw pattern into a number of Lua-native patterns.
|
||||
--- Parses the raw pattern into an |lpeg| pattern. LPeg patterns natively support the "this" or "that"
|
||||
--- alternative constructions described in the LSP spec that cannot be expressed in a standard Lua pattern.
|
||||
---
|
||||
---@param pattern string The raw glob pattern
|
||||
---@return table A list of Lua patterns. A match with any of them matches the input glob pattern.
|
||||
---@return userdata An |lpeg| representation of the pattern, or nil if the pattern is invalid.
|
||||
local function parse(pattern)
|
||||
local patterns = { '' }
|
||||
local l = lpeg
|
||||
|
||||
local path_sep = '[/\\]'
|
||||
local non_path_sep = '[^/\\]'
|
||||
local P, S, V = lpeg.P, lpeg.S, lpeg.V
|
||||
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
|
||||
|
||||
local function append(chunks)
|
||||
local new_patterns = {}
|
||||
for _, p in ipairs(patterns) do
|
||||
for _, chunk in ipairs(chunks) do
|
||||
table.insert(new_patterns, p .. chunk)
|
||||
end
|
||||
local pathsep = '/'
|
||||
|
||||
local function class(inv, ranges)
|
||||
for i, r in ipairs(ranges) do
|
||||
ranges[i] = r[1] .. r[2]
|
||||
end
|
||||
patterns = new_patterns
|
||||
local patt = l.R(unpack(ranges))
|
||||
if inv == '!' then
|
||||
patt = P(1) - patt
|
||||
end
|
||||
return patt
|
||||
end
|
||||
|
||||
local function split(s, sep)
|
||||
local segments = {}
|
||||
local segment = ''
|
||||
local in_braces = false
|
||||
local in_brackets = false
|
||||
for i = 1, #s do
|
||||
local c = string.sub(s, i, i)
|
||||
if c == sep and not in_braces and not in_brackets then
|
||||
table.insert(segments, segment)
|
||||
segment = ''
|
||||
else
|
||||
if c == '{' then
|
||||
in_braces = true
|
||||
elseif c == '}' then
|
||||
in_braces = false
|
||||
elseif c == '[' then
|
||||
in_brackets = true
|
||||
elseif c == ']' then
|
||||
in_brackets = false
|
||||
end
|
||||
segment = segment .. c
|
||||
end
|
||||
end
|
||||
if segment ~= '' then
|
||||
table.insert(segments, segment)
|
||||
end
|
||||
return segments
|
||||
local function add(acc, a)
|
||||
return acc + a
|
||||
end
|
||||
|
||||
local function escape(c)
|
||||
if
|
||||
c == '?'
|
||||
or c == '.'
|
||||
or c == '('
|
||||
or c == ')'
|
||||
or c == '%'
|
||||
or c == '['
|
||||
or c == ']'
|
||||
or c == '*'
|
||||
or c == '+'
|
||||
or c == '-'
|
||||
then
|
||||
return '%' .. c
|
||||
end
|
||||
return c
|
||||
local function mul(acc, m)
|
||||
return acc * m
|
||||
end
|
||||
|
||||
local segments = split(pattern, '/')
|
||||
for i, segment in ipairs(segments) do
|
||||
local last_seg = i == #segments
|
||||
if segment == '**' then
|
||||
local chunks = {
|
||||
path_sep .. '-',
|
||||
'.-' .. path_sep,
|
||||
}
|
||||
if last_seg then
|
||||
chunks = { '.-' }
|
||||
end
|
||||
append(chunks)
|
||||
else
|
||||
local in_braces = false
|
||||
local brace_val = ''
|
||||
local in_brackets = false
|
||||
local bracket_val = ''
|
||||
for j = 1, #segment do
|
||||
local char = string.sub(segment, j, j)
|
||||
if char ~= '}' and in_braces then
|
||||
brace_val = brace_val .. char
|
||||
else
|
||||
if in_brackets and (char ~= ']' or bracket_val == '') then
|
||||
local res
|
||||
if char == '-' then
|
||||
res = char
|
||||
elseif bracket_val == '' and char == '!' then
|
||||
res = '^'
|
||||
elseif char == '/' then
|
||||
res = ''
|
||||
else
|
||||
res = escape(char)
|
||||
end
|
||||
bracket_val = bracket_val .. res
|
||||
else
|
||||
if char == '{' then
|
||||
in_braces = true
|
||||
elseif char == '[' then
|
||||
in_brackets = true
|
||||
elseif char == '}' then
|
||||
local choices = split(brace_val, ',')
|
||||
local parsed_choices = {}
|
||||
for _, choice in ipairs(choices) do
|
||||
table.insert(parsed_choices, parse(choice))
|
||||
end
|
||||
append(vim.tbl_flatten(parsed_choices))
|
||||
in_braces = false
|
||||
brace_val = ''
|
||||
elseif char == ']' then
|
||||
append({ '[' .. bracket_val .. ']' })
|
||||
in_brackets = false
|
||||
bracket_val = ''
|
||||
elseif char == '?' then
|
||||
append({ non_path_sep })
|
||||
elseif char == '*' then
|
||||
append({ non_path_sep .. '-' })
|
||||
else
|
||||
append({ escape(char) })
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if not last_seg and (segments[i + 1] ~= '**' or i + 1 < #segments) then
|
||||
append({ path_sep })
|
||||
end
|
||||
end
|
||||
local function star(stars, after)
|
||||
return (-after * (l.P(1) - pathsep)) ^ #stars * after
|
||||
end
|
||||
|
||||
return patterns
|
||||
local function dstar(after)
|
||||
return (-after * l.P(1)) ^ 0 * after
|
||||
end
|
||||
|
||||
local p = P({
|
||||
'Pattern',
|
||||
Pattern = V('Elem') ^ -1 * V('End'),
|
||||
Elem = Cf(
|
||||
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
|
||||
* (V('Elem') + V('End')),
|
||||
mul
|
||||
),
|
||||
DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
|
||||
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
|
||||
Ques = P('?') * Cc(l.P(1) - pathsep),
|
||||
Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
|
||||
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
|
||||
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
|
||||
-- wildcard semantics it usually has.
|
||||
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
|
||||
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
|
||||
-- condition means "everything after the {}" where several other options separated by ',' may
|
||||
-- exist in between that should not be matched by '*'.
|
||||
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
|
||||
+ Cc(l.P(0)),
|
||||
Literal = P(1) / l.P,
|
||||
End = P(-1) * Cc(l.P(-1)),
|
||||
})
|
||||
|
||||
return p:match(pattern)
|
||||
end
|
||||
|
||||
---@private
|
||||
--- Implementation of LSP 3.17.0's pattern matching: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
|
||||
--- Modeled after VSCode's implementation: https://github.com/microsoft/vscode/blob/0319eed971719ad48e9093daba9d65a5013ec5ab/src/vs/base/common/glob.ts#L509
|
||||
---
|
||||
---@param pattern string|table The glob pattern (raw or parsed) to match.
|
||||
---@param s string The string to match against pattern.
|
||||
@ -155,15 +84,7 @@ function M._match(pattern, s)
|
||||
if type(pattern) == 'string' then
|
||||
pattern = parse(pattern)
|
||||
end
|
||||
-- Since Lua's built-in string pattern matching does not have an alternate
|
||||
-- operator like '|', `parse` will construct one pattern for each possible
|
||||
-- alternative. Any pattern that matches thus matches the glob.
|
||||
for _, p in ipairs(pattern) do
|
||||
if s:match('^' .. p .. '$') then
|
||||
return true
|
||||
end
|
||||
end
|
||||
return false
|
||||
return pattern:match(s) ~= nil
|
||||
end
|
||||
|
||||
M._watchfunc = (vim.fn.has('win32') == 1 or vim.fn.has('mac') == 1) and watch.watch or watch.poll
|
||||
@ -226,11 +147,11 @@ function M.register(reg, ctx)
|
||||
local kind = w.kind
|
||||
or protocol.WatchKind.Create + protocol.WatchKind.Change + protocol.WatchKind.Delete
|
||||
|
||||
local pattern = glob_pattern.pattern
|
||||
local pattern = parse(glob_pattern.pattern)
|
||||
assert(pattern, 'invalid pattern: ' .. glob_pattern.pattern)
|
||||
if relative_pattern then
|
||||
pattern = base_dir .. '/' .. pattern
|
||||
pattern = lpeg.P(base_dir .. '/') * pattern
|
||||
end
|
||||
pattern = parse(pattern)
|
||||
|
||||
table.insert(watch_regs, {
|
||||
base_dir = base_dir,
|
||||
|
@ -2,7 +2,6 @@ local helpers = require('test.functional.helpers')(after_each)
|
||||
|
||||
local eq = helpers.eq
|
||||
local exec_lua = helpers.exec_lua
|
||||
local has_err = require('luassert').has.errors
|
||||
|
||||
describe('vim.lsp._watchfiles', function()
|
||||
before_each(helpers.clear)
|
||||
@ -17,21 +16,31 @@ describe('vim.lsp._watchfiles', function()
|
||||
eq(true, match('', ''))
|
||||
eq(false, match('', 'a'))
|
||||
eq(true, match('a', 'a'))
|
||||
eq(true, match('/', '/'))
|
||||
eq(true, match('abc', 'abc'))
|
||||
eq(false, match('abc', 'abcdef'))
|
||||
eq(false, match('abc', 'a'))
|
||||
eq(false, match('abc', 'bc'))
|
||||
eq(false, match('a', 'b'))
|
||||
eq(false, match('.', 'a'))
|
||||
eq(true, match('$', '$'))
|
||||
eq(true, match('/dir', '/dir'))
|
||||
eq(true, match('dir/', 'dir/'))
|
||||
eq(true, match('dir/subdir', 'dir/subdir'))
|
||||
eq(false, match('dir/subdir', 'subdir'))
|
||||
eq(false, match('dir/subdir', 'dir/subdir/file'))
|
||||
eq(true, match('🤠', '🤠'))
|
||||
end)
|
||||
|
||||
it('should match * wildcards', function()
|
||||
-- eq(false, match('*', '')) -- TODO: this fails
|
||||
eq(false, match('*', ''))
|
||||
eq(true, match('*', 'a'))
|
||||
eq(false, match('*', '/'))
|
||||
eq(false, match('*', '/a'))
|
||||
eq(false, match('*', 'a/'))
|
||||
eq(true, match('*', 'aaa'))
|
||||
eq(true, match('*a', 'aa'))
|
||||
eq(true, match('*a', 'abca'))
|
||||
eq(true, match('*.txt', 'file.txt'))
|
||||
eq(false, match('*.txt', 'file.txtxt'))
|
||||
eq(false, match('*.txt', 'dir/file.txt'))
|
||||
@ -40,9 +49,31 @@ describe('vim.lsp._watchfiles', function()
|
||||
eq(false, match('*.dir', 'test.dir/file'))
|
||||
eq(true, match('file.*', 'file.txt'))
|
||||
eq(false, match('file.*', 'not-file.txt'))
|
||||
eq(true, match('*/file.txt', 'dir/file.txt'))
|
||||
eq(false, match('*/file.txt', 'dir/subdir/file.txt'))
|
||||
eq(false, match('*/file.txt', '/dir/file.txt'))
|
||||
eq(true, match('dir/*', 'dir/file.txt'))
|
||||
eq(false, match('dir/*', 'dir'))
|
||||
eq(false, match('dir/*.txt', 'file.txt'))
|
||||
eq(true, match('dir/*.txt', 'dir/file.txt'))
|
||||
eq(false, match('dir/*.txt', 'dir/subdir/file.txt'))
|
||||
eq(false, match('dir/*/file.txt', 'dir/file.txt'))
|
||||
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
|
||||
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
|
||||
|
||||
-- TODO: The spec does not describe this, but VSCode only interprets ** when it's by
|
||||
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
|
||||
-- The following tests show how this behavior should work, but is not yet fully implemented.
|
||||
-- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a".
|
||||
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
|
||||
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
|
||||
eq(true, match('**c', 'abc'))
|
||||
-- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
|
||||
eq(false, match('**c', 'bc'))
|
||||
eq(true, match('a**', 'abcd'))
|
||||
eq(true, match('**d', 'abcd'))
|
||||
-- eq(false, match('a**', 'abc/d'))
|
||||
eq(false, match('**d', 'abc/d'))
|
||||
end)
|
||||
|
||||
it('should match ? wildcards', function()
|
||||
@ -58,52 +89,64 @@ describe('vim.lsp._watchfiles', function()
|
||||
it('should match ** wildcards', function()
|
||||
eq(true, match('**', ''))
|
||||
eq(true, match('**', 'a'))
|
||||
eq(true, match('**', '/'))
|
||||
eq(true, match('**', 'a/'))
|
||||
eq(true, match('**', '/a'))
|
||||
eq(true, match('**', 'C:/a'))
|
||||
eq(true, match('**', 'a/a'))
|
||||
eq(true, match('**', 'a/a/a'))
|
||||
eq(false, match('a**', ''))
|
||||
eq(true, match('a**', 'a'))
|
||||
eq(true, match('a**', 'abcd'))
|
||||
eq(false, match('a**', 'ba'))
|
||||
eq(false, match('a**', 'a/b'))
|
||||
eq(false, match('**a', ''))
|
||||
eq(true, match('**a', 'a'))
|
||||
eq(true, match('**a', 'dcba'))
|
||||
eq(false, match('**a', 'ab'))
|
||||
eq(false, match('**a', 'b/a'))
|
||||
eq(false, match('/**', '')) -- /** matches leading / literally
|
||||
eq(true, match('/**', '/'))
|
||||
eq(true, match('/**', '/a/b/c'))
|
||||
eq(true, match('**/', '')) -- **/ absorbs trailing /
|
||||
eq(true, match('**/', '/a/b/c'))
|
||||
eq(true, match('**/**', ''))
|
||||
eq(true, match('**/**', 'a'))
|
||||
eq(false, match('a/**', ''))
|
||||
eq(true, match('a/**', 'a'))
|
||||
eq(false, match('a/**', 'a'))
|
||||
eq(true, match('a/**', 'a/b'))
|
||||
eq(true, match('a/**', 'a/b/c'))
|
||||
eq(false, match('a/**', 'b/a'))
|
||||
eq(false, match('a/**', '/a'))
|
||||
eq(false, match('**/a', ''))
|
||||
eq(true, match('**/a', 'a'))
|
||||
eq(false, match('**/a', 'a/b'))
|
||||
eq(true, match('**/a', '/a'))
|
||||
eq(true, match('**/a', '/b/a'))
|
||||
eq(true, match('**/a', '/c/b/a'))
|
||||
eq(true, match('**/a', '/a/a'))
|
||||
eq(true, match('**/a', '/abc/a'))
|
||||
eq(false, match('a/**/c', 'a'))
|
||||
eq(false, match('a/**/c', 'c'))
|
||||
eq(true, match('a/**/c', 'a/c'))
|
||||
eq(true, match('a/**/c', 'a/b/c'))
|
||||
eq(true, match('a/**/c', 'a/b/b/c'))
|
||||
eq(true, match('**/a/**', 'a'))
|
||||
eq(true, match('**/a/**', '/dir/a'))
|
||||
eq(false, match('**/a/**', 'a'))
|
||||
eq(true, match('**/a/**', 'a/'))
|
||||
eq(false, match('**/a/**', '/dir/a'))
|
||||
eq(false, match('**/a/**', 'dir/a'))
|
||||
eq(true, match('**/a/**', 'dir/a/'))
|
||||
eq(true, match('**/a/**', 'a/dir'))
|
||||
eq(true, match('**/a/**', 'dir/a/dir'))
|
||||
eq(true, match('**/a/**', '/a/dir'))
|
||||
eq(true, match('**/a/**', 'C:/a/dir'))
|
||||
-- eq(false, match('**/a/**', 'a.txt')) -- TODO: this fails
|
||||
eq(false, match('**/a/**', 'a.txt'))
|
||||
end)
|
||||
|
||||
it('should match {} groups', function()
|
||||
eq(false, match('{}', ''))
|
||||
eq(true, match('{,}', ''))
|
||||
eq(true, match('{}', ''))
|
||||
eq(false, match('{}', 'a'))
|
||||
eq(true, match('a{}', 'a'))
|
||||
eq(true, match('{}a', 'a'))
|
||||
eq(true, match('{,}', ''))
|
||||
eq(true, match('{a,}', ''))
|
||||
eq(true, match('{a,}', 'a'))
|
||||
eq(true, match('{a}', 'a'))
|
||||
eq(false, match('{a}', 'aa'))
|
||||
eq(false, match('{a}', 'ab'))
|
||||
eq(true, match('{a?c}', 'abc'))
|
||||
eq(false, match('{ab}', 'a'))
|
||||
eq(false, match('{ab}', 'b'))
|
||||
eq(true, match('{ab}', 'ab'))
|
||||
eq(true, match('{a,b}', 'a'))
|
||||
eq(true, match('{a,b}', 'b'))
|
||||
@ -112,11 +155,11 @@ describe('vim.lsp._watchfiles', function()
|
||||
eq(false, match('{ab,cd}', 'a'))
|
||||
eq(true, match('{ab,cd}', 'cd'))
|
||||
eq(true, match('{a,b,c}', 'c'))
|
||||
eq(false, match('{a,{b,c}}', 'c')) -- {} can't nest
|
||||
eq(true, match('{a,{b,c}}', 'c'))
|
||||
end)
|
||||
|
||||
it('should match [] groups', function()
|
||||
eq(true, match('[]', ''))
|
||||
eq(true, match('[]', '[]')) -- empty [] is a literal
|
||||
eq(false, match('[a-z]', ''))
|
||||
eq(true, match('[a-z]', 'a'))
|
||||
eq(false, match('[a-z]', 'ab'))
|
||||
@ -141,7 +184,7 @@ describe('vim.lsp._watchfiles', function()
|
||||
end)
|
||||
|
||||
it('should match [!...] groups', function()
|
||||
has_err(function() match('[!]', '') end) -- not a valid pattern
|
||||
eq(true, match('[!]', '[!]')) -- [!] is a literal
|
||||
eq(false, match('[!a-z]', ''))
|
||||
eq(false, match('[!a-z]', 'a'))
|
||||
eq(false, match('[!a-z]', 'z'))
|
||||
@ -159,11 +202,17 @@ describe('vim.lsp._watchfiles', function()
|
||||
it('should match complex patterns', function()
|
||||
eq(false, match('**/*.{c,h}', ''))
|
||||
eq(false, match('**/*.{c,h}', 'c'))
|
||||
eq(false, match('**/*.{c,h}', 'file.m'))
|
||||
eq(true, match('**/*.{c,h}', 'file.c'))
|
||||
eq(true, match('**/*.{c,h}', 'file.h'))
|
||||
eq(true, match('**/*.{c,h}', '/file.c'))
|
||||
eq(true, match('**/*.{c,h}', 'dir/subdir/file.c'))
|
||||
eq(true, match('**/*.{c,h}', 'dir/subdir/file.h'))
|
||||
eq(true, match('**/*.{c,h}', '/dir/subdir/file.c'))
|
||||
eq(true, match('**/*.{c,h}', 'C:/dir/subdir/file.c'))
|
||||
eq(true, match('/dir/**/*.{c,h}', '/dir/file.c'))
|
||||
eq(false, match('/dir/**/*.{c,h}', 'dir/file.c'))
|
||||
eq(true, match('/dir/**/*.{c,h}', '/dir/subdir/subdir/file.c'))
|
||||
|
||||
eq(true, match('{[0-9],[a-z]}', '0'))
|
||||
eq(true, match('{[0-9],[a-z]}', 'a'))
|
||||
|
Loading…
Reference in New Issue
Block a user