Merge #32082 refactor(treesitter): use coroutines for resuming _parse()

This commit is contained in:
Justin M. Keyes 2025-02-03 09:11:04 -08:00 committed by GitHub
commit eacd662ccb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 132 additions and 95 deletions

View File

@ -379,6 +379,8 @@ TREESITTER
activated by passing the `on_parse` callback parameter. activated by passing the `on_parse` callback parameter.
• |vim.treesitter.query.set()| can now inherit and/or extend runtime file • |vim.treesitter.query.set()| can now inherit and/or extend runtime file
queries in addition to overriding. queries in addition to overriding.
• |LanguageTree:is_valid()| now accepts a range parameter to narrow the scope
of the validity check.
TUI TUI

View File

@ -1581,7 +1581,8 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
Parameters: ~ Parameters: ~
• {reload} (`boolean?`) • {reload} (`boolean?`)
LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()* *LanguageTree:is_valid()*
LanguageTree:is_valid({exclude_children}, {range})
Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()|
reflects the latest state of the source. If invalid, user should call reflects the latest state of the source. If invalid, user should call
|LanguageTree:parse()|. |LanguageTree:parse()|.
@ -1589,6 +1590,7 @@ LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()*
Parameters: ~ Parameters: ~
• {exclude_children} (`boolean?`) whether to ignore the validity of • {exclude_children} (`boolean?`) whether to ignore the validity of
children (default `false`) children (default `false`)
• {range} (`Range?`) range to check for validity
Return: ~ Return: ~
(`boolean`) (`boolean`)

View File

@ -60,6 +60,8 @@ local default_parse_timeout_ms = 3
---| 'on_child_added' ---| 'on_child_added'
---| 'on_child_removed' ---| 'on_child_removed'
---@alias ParserThreadState { timeout: integer? }
--- @type table<TSCallbackNameOn,TSCallbackName> --- @type table<TSCallbackNameOn,TSCallbackName>
local TSCallbackNames = { local TSCallbackNames = {
on_changedtree = 'changedtree', on_changedtree = 'changedtree',
@ -83,6 +85,8 @@ local TSCallbackNames = {
---Table of callback queues, keyed by each region for which the callbacks should be run ---Table of callback queues, keyed by each region for which the callbacks should be run
---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]> ---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]>
---@field private _regions table<integer, Range6[]>? ---@field private _regions table<integer, Range6[]>?
---The total number of regions. Since _regions can have holes, we cannot simply read this value from #_regions.
---@field private _num_regions integer
---List of regions this tree should manage and parse. If nil then regions are ---List of regions this tree should manage and parse. If nil then regions are
---taken from _trees. This is mostly a short-lived cache for included_regions() ---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _lang string Language name ---@field private _lang string Language name
@ -90,7 +94,8 @@ local TSCallbackNames = {
---@field private _source (integer|string) Buffer or string to parse ---@field private _source (integer|string) Buffer or string to parse
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language). ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid. ---Each key is the index of region, which is synced with _regions and _valid.
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid ---@field private _valid_regions table<integer,true> Set of valid region IDs.
---@field private _is_entirely_valid boolean Whether the entire tree (excluding children) is valid.
---@field private _logger? fun(logtype: string, msg: string) ---@field private _logger? fun(logtype: string, msg: string)
---@field private _logfile? file* ---@field private _logfile? file*
local LanguageTree = {} local LanguageTree = {}
@ -132,7 +137,9 @@ function LanguageTree.new(source, lang, opts)
_injection_query = injections[lang] and query.parse(lang, injections[lang]) _injection_query = injections[lang] and query.parse(lang, injections[lang])
or query.get(lang, 'injections'), or query.get(lang, 'injections'),
_injections_processed = false, _injections_processed = false,
_valid = false, _valid_regions = {},
_num_regions = 1,
_is_entirely_valid = false,
_parser = vim._create_ts_parser(lang), _parser = vim._create_ts_parser(lang),
_ranges_being_parsed = {}, _ranges_being_parsed = {},
_cb_queues = {}, _cb_queues = {},
@ -238,7 +245,8 @@ end
--- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast. --- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast.
---@param reload boolean|nil ---@param reload boolean|nil
function LanguageTree:invalidate(reload) function LanguageTree:invalidate(reload)
self._valid = false self._valid_regions = {}
self._is_entirely_valid = false
self._parser:reset() self._parser:reset()
-- buffer was reloaded, reparse all trees -- buffer was reloaded, reparse all trees
@ -271,53 +279,6 @@ function LanguageTree:lang()
return self._lang return self._lang
end end
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
---@return boolean
function LanguageTree:is_valid(exclude_children)
local valid = self._valid
if type(valid) == 'table' then
for i, _ in pairs(self:included_regions()) do
if not valid[i] then
return false
end
end
end
if not exclude_children then
if not self._injections_processed then
return false
end
for _, child in pairs(self._children) do
if not child:is_valid(exclude_children) then
return false
end
end
end
if type(valid) == 'boolean' then
return valid
end
self._valid = true
return true
end
--- Returns a map of language to child tree.
--- @return table<string,vim.treesitter.LanguageTree>
function LanguageTree:children()
return self._children
end
--- Returns the source content of the language tree (bufnr or string).
--- @return integer|string
function LanguageTree:source()
return self._source
end
--- @param region Range6[] --- @param region Range6[]
--- @param range? boolean|Range --- @param range? boolean|Range
--- @return boolean --- @return boolean
@ -343,39 +304,88 @@ local function intercepts_region(region, range)
return false return false
end end
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
---@param exclude_children boolean? whether to ignore the validity of children (default `false`)
---@param range Range? range to check for validity
---@return boolean
function LanguageTree:is_valid(exclude_children, range)
local valid_regions = self._valid_regions
if not self._is_entirely_valid then
if not range then
return false
end
-- TODO: Efficiently search for possibly intersecting regions using a binary search
for i, region in pairs(self:included_regions()) do
if not valid_regions[i] and intercepts_region(region, range) then
return false
end
end
end
if not exclude_children then
if not self._injections_processed then
return false
end
for _, child in pairs(self._children) do
if not child:is_valid(exclude_children, range) then
return false
end
end
end
return true
end
--- Returns a map of language to child tree.
--- @return table<string,vim.treesitter.LanguageTree>
function LanguageTree:children()
return self._children
end
--- Returns the source content of the language tree (bufnr or string).
--- @return integer|string
function LanguageTree:source()
return self._source
end
--- @private --- @private
--- @param range boolean|Range? --- @param range boolean|Range?
--- @param timeout integer? --- @param thread_state ParserThreadState
--- @return Range6[] changes --- @return Range6[] changes
--- @return integer no_regions_parsed --- @return integer no_regions_parsed
--- @return number total_parse_time --- @return number total_parse_time
--- @return boolean finished whether async parsing still needs time --- @return boolean finished whether async parsing still needs time
function LanguageTree:_parse_regions(range, timeout) function LanguageTree:_parse_regions(range, thread_state)
local changes = {} local changes = {}
local no_regions_parsed = 0 local no_regions_parsed = 0
local total_parse_time = 0 local total_parse_time = 0
if type(self._valid) ~= 'table' then
self._valid = {}
end
-- If there are no ranges, set to an empty list -- If there are no ranges, set to an empty list
-- so the included ranges in the parser are cleared. -- so the included ranges in the parser are cleared.
for i, ranges in pairs(self:included_regions()) do for i, ranges in pairs(self:included_regions()) do
if if
not self._valid[i] not self._valid_regions[i]
and ( and (
intercepts_region(ranges, range) intercepts_region(ranges, range)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range)) or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
) )
then then
self._parser:set_included_ranges(ranges) self._parser:set_included_ranges(ranges)
self._parser:set_timeout(timeout and timeout * 1000 or 0) -- ms -> micros self._parser:set_timeout(thread_state.timeout and thread_state.timeout * 1000 or 0) -- ms -> micros
local parse_time, tree, tree_changes = local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true) tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
while true do
if tree then
break
end
coroutine.yield(changes, no_regions_parsed, total_parse_time, false)
if not tree then parse_time, tree, tree_changes =
return changes, no_regions_parsed, total_parse_time, false tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
end end
self:_do_callback('changedtree', tree_changes, tree) self:_do_callback('changedtree', tree_changes, tree)
@ -384,7 +394,13 @@ function LanguageTree:_parse_regions(range, timeout)
total_parse_time = total_parse_time + parse_time total_parse_time = total_parse_time + parse_time
no_regions_parsed = no_regions_parsed + 1 no_regions_parsed = no_regions_parsed + 1
self._valid[i] = true self._valid_regions[i] = true
-- _valid_regions can have holes, but that is okay because this equality is only true when it
-- has no holes (meaning all regions are valid)
if #self._valid_regions == self._num_regions then
self._is_entirely_valid = true
end
end end
end end
@ -476,7 +492,11 @@ function LanguageTree:_async_parse(range, on_parse)
local ct = is_buffer_parser and buf.changedtick or nil local ct = is_buffer_parser and buf.changedtick or nil
local total_parse_time = 0 local total_parse_time = 0
local redrawtime = vim.o.redrawtime local redrawtime = vim.o.redrawtime
local timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ms or nil
local thread_state = {} ---@type ParserThreadState
---@type fun(): table<integer, TSTree>, boolean
local parse = coroutine.wrap(self._parse)
local function step() local function step()
if is_buffer_parser then if is_buffer_parser then
@ -490,10 +510,12 @@ function LanguageTree:_async_parse(range, on_parse)
if buf.changedtick ~= ct then if buf.changedtick ~= ct then
ct = buf.changedtick ct = buf.changedtick
total_parse_time = 0 total_parse_time = 0
parse = coroutine.wrap(self._parse)
end end
end end
local parse_time, trees, finished = tcall(self._parse, self, range, timeout) thread_state.timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ms or nil
local parse_time, trees, finished = tcall(parse, self, range, thread_state)
total_parse_time = total_parse_time + parse_time total_parse_time = total_parse_time + parse_time
if finished then if finished then
@ -535,17 +557,17 @@ function LanguageTree:parse(range, on_parse)
if on_parse then if on_parse then
return self:_async_parse(range, on_parse) return self:_async_parse(range, on_parse)
end end
local trees, _ = self:_parse(range) local trees, _ = self:_parse(range, {})
return trees return trees
end end
--- @private --- @private
--- @param range boolean|Range|nil --- @param range boolean|Range|nil
--- @param timeout integer? --- @param thread_state ParserThreadState
--- @return table<integer, TSTree> trees --- @return table<integer, TSTree> trees
--- @return boolean finished --- @return boolean finished
function LanguageTree:_parse(range, timeout) function LanguageTree:_parse(range, thread_state)
if self:is_valid() then if self:is_valid(nil, type(range) == 'table' and range or nil) then
self:_log('valid') self:_log('valid')
return self._trees, true return self._trees, true
end end
@ -558,12 +580,19 @@ function LanguageTree:_parse(range, timeout)
local total_parse_time = 0 local total_parse_time = 0
-- At least 1 region is invalid -- At least 1 region is invalid
if not self:is_valid(true) then if not self:is_valid(true, type(range) == 'table' and range or nil) then
local is_finished ---@type fun(self: vim.treesitter.LanguageTree, range: boolean|Range?, thread_state: ParserThreadState): Range6[], integer, number, boolean
changes, no_regions_parsed, total_parse_time, is_finished = self:_parse_regions(range, timeout) local parse_regions = coroutine.wrap(self._parse_regions)
timeout = timeout and math.max(timeout - total_parse_time, 0) while true do
if not is_finished then local is_finished
return self._trees, false changes, no_regions_parsed, total_parse_time, is_finished =
parse_regions(self, range, thread_state)
thread_state.timeout = thread_state.timeout
and math.max(thread_state.timeout - total_parse_time, 0)
if is_finished then
break
end
coroutine.yield(self._trees, false)
end end
-- Need to run injections when we parsed something -- Need to run injections when we parsed something
if no_regions_parsed > 0 then if no_regions_parsed > 0 then
@ -585,13 +614,20 @@ function LanguageTree:_parse(range, timeout)
}) })
for _, child in pairs(self._children) do for _, child in pairs(self._children) do
if timeout == 0 then if thread_state.timeout == 0 then
return self._trees, false coroutine.yield(self._trees, false)
end end
local ctime, _, child_finished = tcall(child._parse, child, range, timeout)
timeout = timeout and math.max(timeout - ctime, 0) ---@type fun(): table<integer, TSTree>, boolean
if not child_finished then local parse = coroutine.wrap(child._parse)
return self._trees, child_finished
while true do
local ctime, _, child_finished = tcall(parse, child, range, thread_state)
if child_finished then
thread_state.timeout = thread_state.timeout and math.max(thread_state.timeout - ctime, 0)
break
end
coroutine.yield(self._trees, child_finished)
end end
end end
@ -687,38 +723,34 @@ end
---region is valid or not. ---region is valid or not.
---@param fn fun(index: integer, region: Range6[]): boolean ---@param fn fun(index: integer, region: Range6[]): boolean
function LanguageTree:_iter_regions(fn) function LanguageTree:_iter_regions(fn)
if not self._valid then if vim.deep_equal(self._valid_regions, {}) then
return return
end end
local was_valid = type(self._valid) ~= 'table' if self._is_entirely_valid then
self:_log('was valid')
if was_valid then
self:_log('was valid', self._valid)
self._valid = {}
end end
local all_valid = true local all_valid = true
for i, region in pairs(self:included_regions()) do for i, region in pairs(self:included_regions()) do
if was_valid or self._valid[i] then if self._valid_regions[i] then
self._valid[i] = fn(i, region) -- Setting this to nil rather than false allows us to determine if all regions were parsed
if not self._valid[i] then -- just by checking the length of _valid_regions.
self._valid_regions[i] = fn(i, region) and true or nil
if not self._valid_regions[i] then
self:_log(function() self:_log(function()
return 'invalidating region', i, region_tostr(region) return 'invalidating region', i, region_tostr(region)
end) end)
end end
end end
if not self._valid[i] then if not self._valid_regions[i] then
all_valid = false all_valid = false
end end
end end
-- Compress the valid value to 'true' if there are no invalid regions self._is_entirely_valid = all_valid
if all_valid then
self._valid = all_valid
end
end end
--- Sets the included regions that should be parsed by this |LanguageTree|. --- Sets the included regions that should be parsed by this |LanguageTree|.
@ -768,6 +800,7 @@ function LanguageTree:set_included_regions(new_regions)
end end
self._regions = new_regions self._regions = new_regions
self._num_regions = #new_regions
end end
---Gets the set of included regions managed by this LanguageTree. This can be different from the ---Gets the set of included regions managed by this LanguageTree. This can be different from the

View File

@ -633,7 +633,7 @@ int x = INT_MAX;
}, get_ranges()) }, get_ranges())
n.feed('7ggI//<esc>') n.feed('7ggI//<esc>')
exec_lua([[parser:parse({6, 7})]]) exec_lua([[parser:parse({5, 6})]])
eq('table', exec_lua('return type(parser:children().c)')) eq('table', exec_lua('return type(parser:children().c)'))
eq(2, exec_lua('return #parser:children().c:trees()')) eq(2, exec_lua('return #parser:children().c:trees()'))
eq({ eq({