feat(treesitter): use upstream format for injection queries

This commit is contained in:
Lewis Russell 2023-03-08 11:03:11 +00:00 committed by GitHub
parent 06aed7c177
commit ddd257f753
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 284 additions and 112 deletions

View File

@ -196,6 +196,10 @@ The following new APIs or features were added.
• Added an omnifunc implementation for lua, |vim.lua_omnifunc()| • Added an omnifunc implementation for lua, |vim.lua_omnifunc()|
• Treesitter injection queries now use the format described at
https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection .
Support for the previous format will be removed in a future release.
============================================================================== ==============================================================================
CHANGED FEATURES *news-changes* CHANGED FEATURES *news-changes*

View File

@ -441,7 +441,53 @@ individual query pattern manually by setting its `"priority"` metadata
attribute: > attribute: >
(super_important_node) @ImportantHighlight (#set! "priority" 105) (super_important_node) @ImportantHighlight (#set! "priority" 105)
==============================================================================
TREESITTER LANGUAGE INJECTIONS *treesitter-language-injections*
< <
Note the following information is adapted from:
https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
Some source files contain code written in multiple different languages.
Examples include:
• HTML files, which can contain JavaScript inside of `<script>` tags and
CSS inside of `<style>` tags
• ERB files, which contain Ruby inside of `<%` `%>` tags, and HTML outside of
those tags
• PHP files, which can contain HTML between the `<php` tags
• JavaScript files, which contain regular expression syntax within regex
literals
• Ruby, which can contain snippets of code inside of heredoc literals,
where the heredoc delimiter often indicates the language
• Lua, which can contain snippets of Vimscript inside |vim.cmd()| calls.
• Vimscript, which can contain snippets of Lua inside |:lua-heredoc|
blocks.
All of these examples can be modeled in terms of a parent syntax tree and one
or more injected syntax trees, which reside inside of certain nodes in the
parent tree. The language injection query allows you to specify these
“injections” using the following captures:
• `@injection.content` - indicates that the captured node should have its
contents re-parsed using another language.
• `@injection.language` - indicates that the captured nodes text may
contain the name of a language that should be used to re-parse the
`@injection.content`.
The language injection behavior can also be configured by some properties
associated with patterns:
• `injection.language` - can be used to hard-code the name of a specific
language.
• `injection.combined` - indicates that all of the matching nodes in the
tree should have their content parsed as one nested document.
• `injection.include-children` - indicates that the `@injection.content`
node's entire text should be re-parsed, including the text of its child
nodes. By default, child nodes' text will be excluded from the injected
document.
============================================================================== ==============================================================================
VIM.TREESITTER *lua-treesitter* VIM.TREESITTER *lua-treesitter*

View File

@ -14,7 +14,7 @@
---@field child_count fun(self: TSNode): integer ---@field child_count fun(self: TSNode): integer
---@field named_child_count fun(self: TSNode): integer ---@field named_child_count fun(self: TSNode): integer
---@field child fun(self: TSNode, integer): TSNode ---@field child fun(self: TSNode, integer): TSNode
---@field name_child fun(self: TSNode, integer): TSNode ---@field named_child fun(self: TSNode, integer): TSNode
---@field descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode ---@field descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
---@field named_descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode ---@field named_descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
---@field parent fun(self: TSNode): TSNode ---@field parent fun(self: TSNode): TSNode
@ -43,10 +43,10 @@ function TSNode:_rawquery(query, captures, start, end_) end
function TSNode:_rawquery(query, captures, start, end_) end function TSNode:_rawquery(query, captures, start, end_) end
---@class TSParser ---@class TSParser
---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, integer[] ---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, Range4[]
---@field reset fun(self: TSParser) ---@field reset fun(self: TSParser)
---@field included_ranges fun(self: TSParser): integer[] ---@field included_ranges fun(self: TSParser): Range4[]
---@field set_included_ranges fun(self: TSParser, ranges: integer[][]) ---@field set_included_ranges fun(self: TSParser, ranges: Range6[])
---@field set_timeout fun(self: TSParser, timeout: integer) ---@field set_timeout fun(self: TSParser, timeout: integer)
---@field timeout fun(self: TSParser): integer ---@field timeout fun(self: TSParser): integer

View File

@ -399,6 +399,169 @@ local function get_range_from_metadata(node, id, metadata)
return { node:range() } return { node:range() }
end end
---@private
--- TODO(lewis6991): cleanup of the node_range interface
---@param node TSNode
---@param id integer
---@param metadata TSMetadata
---@return Range4[]
local function get_node_ranges(node, id, metadata, include_children)
local range = get_range_from_metadata(node, id, metadata)
if include_children then
return { range }
end
local ranges = {} ---@type Range4[]
local srow, scol, erow, ecol = range[1], range[2], range[3], range[4]
for i = 0, node:named_child_count() - 1 do
local child = node:named_child(i)
local child_srow, child_scol, child_erow, child_ecol = child:range()
if child_srow > srow or child_scol > scol then
table.insert(ranges, { srow, scol, child_srow, child_scol })
end
srow = child_erow
scol = child_ecol
end
if erow > srow or ecol > scol then
table.insert(ranges, { srow, scol, erow, ecol })
end
return ranges
end
---@alias TSInjection table<string,table<integer,table>>
---@private
---@param t table<integer,TSInjection>
---@param tree_index integer
---@param pattern integer
---@param lang string
---@param combined boolean
---@param ranges Range4[]
local function add_injection(t, tree_index, pattern, lang, combined, ranges)
assert(type(lang) == 'string')
-- Each tree index should be isolated from the other nodes.
if not t[tree_index] then
t[tree_index] = {}
end
if not t[tree_index][lang] then
t[tree_index][lang] = {}
end
-- Key this by pattern. If combined is set to true all captures of this pattern
-- will be parsed by treesitter as the same "source".
-- If combined is false, each "region" will be parsed as a single source.
if not t[tree_index][lang][pattern] then
t[tree_index][lang][pattern] = { combined = combined, regions = {} }
end
table.insert(t[tree_index][lang][pattern].regions, ranges)
end
---@private
---Get node text
---
---Note: `query.get_node_text` returns string|string[]|nil so use this simple alias function
---to annotate it returns string.
---
---TODO(lewis6991): use [at]overload annotations on `query.get_node_text`
---@param node TSNode
---@param source integer|string
---@param metadata table
---@return string
local function get_node_text(node, source, metadata)
return query.get_node_text(node, source, { metadata = metadata }) --[[@as string]]
end
---@private
--- Extract injections according to:
--- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
---@param match table<integer,TSNode>
---@param metadata table
---@return string, boolean, Range4[]
function LanguageTree:_get_injection(match, metadata)
local ranges = {} ---@type Range4[]
local combined = metadata['injection.combined'] ~= nil
local lang = metadata['injection.language'] ---@type string
local include_children = metadata['injection.include-children'] ~= nil
for id, node in pairs(match) do
local name = self._injection_query.captures[id]
-- Lang should override any other language tag
if name == 'injection.language' then
lang = get_node_text(node, self._source, metadata[id])
elseif name == 'injection.content' then
ranges = get_node_ranges(node, id, metadata, include_children)
end
end
return lang, combined, ranges
end
---@private
---@param match table<integer,TSNode>
---@param metadata table
---@return string, boolean, Range4[]
function LanguageTree:_get_injection_deprecated(match, metadata)
local lang = nil ---@type string
local ranges = {} ---@type Range4[]
local combined = metadata.combined ~= nil
-- Directives can configure how injections are captured as well as actual node captures.
-- This allows more advanced processing for determining ranges and language resolution.
if metadata.content then
local content = metadata.content ---@type any
-- Allow for captured nodes to be used
if type(content) == 'number' then
content = { match[content]:range() }
end
if type(content) == 'table' and #content >= 4 then
vim.list_extend(ranges, content)
end
end
if metadata.language then
lang = metadata.language ---@type string
end
-- You can specify the content and language together
-- using a tag with the language, for example
-- @javascript
for id, node in pairs(match) do
local name = self._injection_query.captures[id]
-- Lang should override any other language tag
if name == 'language' and not lang then
lang = get_node_text(node, self._source, metadata[id])
elseif name == 'combined' then
combined = true
elseif name == 'content' and #ranges == 0 then
table.insert(ranges, get_range_from_metadata(node, id, metadata))
-- Ignore any tags that start with "_"
-- Allows for other tags to be used in matches
elseif string.sub(name, 1, 1) ~= '_' then
if not lang then
lang = name
end
if #ranges == 0 then
table.insert(ranges, get_range_from_metadata(node, id, metadata))
end
end
end
return lang, combined, ranges
end
--- Gets language injection points by language. --- Gets language injection points by language.
--- ---
--- This is where most of the injection processing occurs. --- This is where most of the injection processing occurs.
@ -406,13 +569,13 @@ end
--- TODO: Allow for an offset predicate to tailor the injection range --- TODO: Allow for an offset predicate to tailor the injection range
--- instead of using the entire nodes range. --- instead of using the entire nodes range.
---@private ---@private
---@return table<string, integer[][]> ---@return table<string, Range4[][]>
function LanguageTree:_get_injections() function LanguageTree:_get_injections()
if not self._injection_query then if not self._injection_query then
return {} return {}
end end
---@type table<integer,table<string,table<integer,table>>> ---@type table<integer,TSInjection>
local injections = {} local injections = {}
for tree_index, tree in ipairs(self._trees) do for tree_index, tree in ipairs(self._trees) do
@ -422,75 +585,12 @@ function LanguageTree:_get_injections()
for pattern, match, metadata in for pattern, match, metadata in
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1) self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
do do
local lang = nil ---@type string local lang, combined, ranges = self:_get_injection(match, metadata)
local ranges = {} ---@type Range4[] if not lang then
local combined = metadata.combined ---@type boolean -- TODO(lewis6991): remove after 0.9 (#20434)
lang, combined, ranges = self:_get_injection_deprecated(match, metadata)
-- Directives can configure how injections are captured as well as actual node captures.
-- This allows more advanced processing for determining ranges and language resolution.
if metadata.content then
local content = metadata.content ---@type any
-- Allow for captured nodes to be used
if type(content) == 'number' then
content = { match[content]:range() }
end
if type(content) == 'table' and #content >= 4 then
vim.list_extend(ranges, content)
end
end end
add_injection(injections, tree_index, pattern, lang, combined, ranges)
if metadata.language then
lang = metadata.language ---@type string
end
-- You can specify the content and language together
-- using a tag with the language, for example
-- @javascript
for id, node in pairs(match) do
local name = self._injection_query.captures[id]
-- Lang should override any other language tag
if name == 'language' and not lang then
---@diagnostic disable-next-line
lang = query.get_node_text(node, self._source, { metadata = metadata[id] })
elseif name == 'combined' then
combined = true
elseif name == 'content' and #ranges == 0 then
table.insert(ranges, get_range_from_metadata(node, id, metadata))
-- Ignore any tags that start with "_"
-- Allows for other tags to be used in matches
elseif string.sub(name, 1, 1) ~= '_' then
if not lang then
lang = name
end
if #ranges == 0 then
table.insert(ranges, get_range_from_metadata(node, id, metadata))
end
end
end
assert(type(lang) == 'string')
-- Each tree index should be isolated from the other nodes.
if not injections[tree_index] then
injections[tree_index] = {}
end
if not injections[tree_index][lang] then
injections[tree_index][lang] = {}
end
-- Key this by pattern. If combined is set to true all captures of this pattern
-- will be parsed by treesitter as the same "source".
-- If combined is false, each "region" will be parsed as a single source.
if not injections[tree_index][lang][pattern] then
injections[tree_index][lang][pattern] = { combined = combined, regions = {} }
end
table.insert(injections[tree_index][lang][pattern].regions, ranges)
end end
end end

View File

@ -407,7 +407,7 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
---@field [string] integer|string ---@field [string] integer|string
---@field range Range4 ---@field range Range4
---@alias TSDirective fun(match: TSMatch, _, _, predicate: any[], metadata: TSMetadata) ---@alias TSDirective fun(match: TSMatch, _, _, predicate: (string|integer)[], metadata: TSMetadata)
-- Predicate handler receive the following arguments -- Predicate handler receive the following arguments
-- (match, pattern, bufnr, predicate) -- (match, pattern, bufnr, predicate)
@ -419,24 +419,17 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
---@type table<string,TSDirective> ---@type table<string,TSDirective>
local directive_handlers = { local directive_handlers = {
['set!'] = function(_, _, _, pred, metadata) ['set!'] = function(_, _, _, pred, metadata)
if #pred == 4 then if #pred >= 3 and type(pred[2]) == 'number' then
-- (#set! @capture "key" "value") -- (#set! @capture key value)
---@diagnostic disable-next-line:no-unknown local capture_id, key, value = pred[2], pred[3], pred[4]
local _, capture_id, key, value = unpack(pred)
---@cast value integer|string
---@cast capture_id integer
---@cast key string
if not metadata[capture_id] then if not metadata[capture_id] then
metadata[capture_id] = {} metadata[capture_id] = {}
end end
metadata[capture_id][key] = value metadata[capture_id][key] = value
else else
---@diagnostic disable-next-line:no-unknown -- (#set! key value)
local _, key, value = unpack(pred) local key, value = pred[2], pred[3]
---@cast value integer|string metadata[key] = value or true
---@cast key string
-- (#set! "key" "value")
metadata[key] = value
end end
end, end,
-- Shifts the range of a node. -- Shifts the range of a node.

View File

@ -1,3 +1,5 @@
(preproc_arg) @c ((preproc_arg) @injection.content
(#set! injection.language "c"))
; (comment) @comment ; ((comment) @injection.content
; (#set! injection.language "comment"))

View File

@ -1,3 +1,4 @@
(codeblock ((codeblock
(language) @language (language) @injection.language
(code) @content) (code) @injection.content)
(#set! injection.include-children))

View File

@ -3,20 +3,26 @@
(identifier) @_cdef_identifier (identifier) @_cdef_identifier
(_ _ (identifier) @_cdef_identifier) (_ _ (identifier) @_cdef_identifier)
] ]
arguments: (arguments (string content: _ @c))) arguments: (arguments (string content: _ @injection.content)))
(#set! injection.language "c")
(#eq? @_cdef_identifier "cdef")) (#eq? @_cdef_identifier "cdef"))
((function_call ((function_call
name: (_) @_vimcmd_identifier name: (_) @_vimcmd_identifier
arguments: (arguments (string content: _ @vim))) arguments: (arguments (string content: _ @injection.content)))
(#set! injection.language "vim")
(#any-of? @_vimcmd_identifier "vim.cmd" "vim.api.nvim_command" "vim.api.nvim_exec" "vim.api.nvim_cmd")) (#any-of? @_vimcmd_identifier "vim.cmd" "vim.api.nvim_command" "vim.api.nvim_exec" "vim.api.nvim_cmd"))
((function_call ((function_call
name: (_) @_vimcmd_identifier name: (_) @_vimcmd_identifier
arguments: (arguments (string content: _ @query) .)) arguments: (arguments (string content: _ @injection.content) .))
(#set! injection.language "query")
(#eq? @_vimcmd_identifier "vim.treesitter.query.set_query")) (#eq? @_vimcmd_identifier "vim.treesitter.query.set_query"))
; ;; highlight string as query if starts with `;; query` ; ;; highlight string as query if starts with `;; query`
; ((string ("string_content") @query) (#lua-match? @query "^%s*;+%s?query")) ; ((string ("string_content") @injection.content)
; (#set! injection.language "query")
; (#lua-match? @injection.content "^%s*;+%s?query"))
; (comment) @comment ; ((comment) @injection.content
; (#set! injection.language "comment"))

View File

@ -1,18 +1,33 @@
(lua_statement (script (body) @lua)) ((lua_statement (script (body) @injection.content))
(lua_statement (chunk) @lua) (#set! injection.language "lua"))
(ruby_statement (script (body) @ruby))
(ruby_statement (chunk) @ruby)
(python_statement (script (body) @python))
(python_statement (chunk) @python)
;; If we support perl at some point...
;; (perl_statement (script (body) @perl))
;; (perl_statement (chunk) @perl)
(autocmd_statement (pattern) @regex) ((lua_statement (chunk) @injection.content)
(#set! injection.language "lua"))
((ruby_statement (script (body) @injection.content))
(#set! injection.language "ruby"))
((ruby_statement (chunk) @injection.content)
(#set! injection.language "ruby"))
((python_statement (script (body) @injection.content))
(#set! injection.language "python"))
((python_statement (chunk) @injection.content)
(#set! injection.language "python"))
;; If we support perl at some point...
;; ((perl_statement (script (body) @injection.content))
;; (#set! injection.language "perl"))
;; ((perl_statement (chunk) @injection.content)
;; (#set! injection.language "perl"))
((autocmd_statement (pattern) @injection.content)
(#set! injection.language "regex"))
((set_item ((set_item
option: (option_name) @_option option: (option_name) @_option
value: (set_value) @vim) value: (set_value) @injection.content)
(#any-of? @_option (#any-of? @_option
"includeexpr" "inex" "includeexpr" "inex"
"printexpr" "pexpr" "printexpr" "pexpr"
@ -22,7 +37,12 @@
"foldexpr" "fde" "foldexpr" "fde"
"diffexpr" "dex" "diffexpr" "dex"
"patchexpr" "pex" "patchexpr" "pex"
"charconvert" "ccv")) "charconvert" "ccv")
(#set! injection.language "vim"))
; (comment) @comment
; (line_continuation_comment) @comment ; ((comment) @injection.content
; (#set! injection.language "comment"))
; ((line_continuation_comment) @injection.content
; (#set! injection.language "comment"))