mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
feat(treesitter): use upstream format for injection queries
This commit is contained in:
parent
06aed7c177
commit
ddd257f753
@ -196,6 +196,10 @@ The following new APIs or features were added.
|
||||
|
||||
• Added an omnifunc implementation for lua, |vim.lua_omnifunc()|
|
||||
|
||||
• Treesitter injection queries now use the format described at
|
||||
https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection .
|
||||
Support for the previous format will be removed in a future release.
|
||||
|
||||
==============================================================================
|
||||
CHANGED FEATURES *news-changes*
|
||||
|
||||
|
@ -441,7 +441,53 @@ individual query pattern manually by setting its `"priority"` metadata
|
||||
attribute: >
|
||||
|
||||
(super_important_node) @ImportantHighlight (#set! "priority" 105)
|
||||
|
||||
==============================================================================
|
||||
TREESITTER LANGUAGE INJECTIONS *treesitter-language-injections*
|
||||
<
|
||||
|
||||
Note the following information is adapted from:
|
||||
https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
|
||||
|
||||
Some source files contain code written in multiple different languages.
|
||||
Examples include:
|
||||
|
||||
• HTML files, which can contain JavaScript inside of `<script>` tags and
|
||||
CSS inside of `<style>` tags
|
||||
• ERB files, which contain Ruby inside of `<%` `%>` tags, and HTML outside of
|
||||
those tags
|
||||
• PHP files, which can contain HTML between the `<php` tags
|
||||
• JavaScript files, which contain regular expression syntax within regex
|
||||
literals
|
||||
• Ruby, which can contain snippets of code inside of heredoc literals,
|
||||
where the heredoc delimiter often indicates the language
|
||||
• Lua, which can contain snippets of Vimscript inside |vim.cmd()| calls.
|
||||
• Vimscript, which can contain snippets of Lua inside |:lua-heredoc|
|
||||
blocks.
|
||||
|
||||
All of these examples can be modeled in terms of a parent syntax tree and one
|
||||
or more injected syntax trees, which reside inside of certain nodes in the
|
||||
parent tree. The language injection query allows you to specify these
|
||||
“injections” using the following captures:
|
||||
|
||||
• `@injection.content` - indicates that the captured node should have its
|
||||
contents re-parsed using another language.
|
||||
• `@injection.language` - indicates that the captured node’s text may
|
||||
contain the name of a language that should be used to re-parse the
|
||||
`@injection.content`.
|
||||
|
||||
The language injection behavior can also be configured by some properties
|
||||
associated with patterns:
|
||||
|
||||
• `injection.language` - can be used to hard-code the name of a specific
|
||||
language.
|
||||
• `injection.combined` - indicates that all of the matching nodes in the
|
||||
tree should have their content parsed as one nested document.
|
||||
• `injection.include-children` - indicates that the `@injection.content`
|
||||
node's entire text should be re-parsed, including the text of its child
|
||||
nodes. By default, child nodes' text will be excluded from the injected
|
||||
document.
|
||||
|
||||
==============================================================================
|
||||
VIM.TREESITTER *lua-treesitter*
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
---@field child_count fun(self: TSNode): integer
|
||||
---@field named_child_count fun(self: TSNode): integer
|
||||
---@field child fun(self: TSNode, integer): TSNode
|
||||
---@field name_child fun(self: TSNode, integer): TSNode
|
||||
---@field named_child fun(self: TSNode, integer): TSNode
|
||||
---@field descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
|
||||
---@field named_descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
|
||||
---@field parent fun(self: TSNode): TSNode
|
||||
@ -43,10 +43,10 @@ function TSNode:_rawquery(query, captures, start, end_) end
|
||||
function TSNode:_rawquery(query, captures, start, end_) end
|
||||
|
||||
---@class TSParser
|
||||
---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, integer[]
|
||||
---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, Range4[]
|
||||
---@field reset fun(self: TSParser)
|
||||
---@field included_ranges fun(self: TSParser): integer[]
|
||||
---@field set_included_ranges fun(self: TSParser, ranges: integer[][])
|
||||
---@field included_ranges fun(self: TSParser): Range4[]
|
||||
---@field set_included_ranges fun(self: TSParser, ranges: Range6[])
|
||||
---@field set_timeout fun(self: TSParser, timeout: integer)
|
||||
---@field timeout fun(self: TSParser): integer
|
||||
|
||||
|
@ -399,32 +399,120 @@ local function get_range_from_metadata(node, id, metadata)
|
||||
return { node:range() }
|
||||
end
|
||||
|
||||
--- Gets language injection points by language.
|
||||
---
|
||||
--- This is where most of the injection processing occurs.
|
||||
---
|
||||
--- TODO: Allow for an offset predicate to tailor the injection range
|
||||
--- instead of using the entire nodes range.
|
||||
---@private
|
||||
---@return table<string, integer[][]>
|
||||
function LanguageTree:_get_injections()
|
||||
if not self._injection_query then
|
||||
return {}
|
||||
--- TODO(lewis6991): cleanup of the node_range interface
|
||||
---@param node TSNode
|
||||
---@param id integer
|
||||
---@param metadata TSMetadata
|
||||
---@return Range4[]
|
||||
local function get_node_ranges(node, id, metadata, include_children)
|
||||
local range = get_range_from_metadata(node, id, metadata)
|
||||
|
||||
if include_children then
|
||||
return { range }
|
||||
end
|
||||
|
||||
---@type table<integer,table<string,table<integer,table>>>
|
||||
local injections = {}
|
||||
local ranges = {} ---@type Range4[]
|
||||
|
||||
for tree_index, tree in ipairs(self._trees) do
|
||||
local root_node = tree:root()
|
||||
local start_line, _, end_line, _ = root_node:range()
|
||||
local srow, scol, erow, ecol = range[1], range[2], range[3], range[4]
|
||||
|
||||
for pattern, match, metadata in
|
||||
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
|
||||
do
|
||||
for i = 0, node:named_child_count() - 1 do
|
||||
local child = node:named_child(i)
|
||||
local child_srow, child_scol, child_erow, child_ecol = child:range()
|
||||
if child_srow > srow or child_scol > scol then
|
||||
table.insert(ranges, { srow, scol, child_srow, child_scol })
|
||||
end
|
||||
srow = child_erow
|
||||
scol = child_ecol
|
||||
end
|
||||
|
||||
if erow > srow or ecol > scol then
|
||||
table.insert(ranges, { srow, scol, erow, ecol })
|
||||
end
|
||||
|
||||
return ranges
|
||||
end
|
||||
|
||||
---@alias TSInjection table<string,table<integer,table>>
|
||||
|
||||
---@private
|
||||
---@param t table<integer,TSInjection>
|
||||
---@param tree_index integer
|
||||
---@param pattern integer
|
||||
---@param lang string
|
||||
---@param combined boolean
|
||||
---@param ranges Range4[]
|
||||
local function add_injection(t, tree_index, pattern, lang, combined, ranges)
|
||||
assert(type(lang) == 'string')
|
||||
|
||||
-- Each tree index should be isolated from the other nodes.
|
||||
if not t[tree_index] then
|
||||
t[tree_index] = {}
|
||||
end
|
||||
|
||||
if not t[tree_index][lang] then
|
||||
t[tree_index][lang] = {}
|
||||
end
|
||||
|
||||
-- Key this by pattern. If combined is set to true all captures of this pattern
|
||||
-- will be parsed by treesitter as the same "source".
|
||||
-- If combined is false, each "region" will be parsed as a single source.
|
||||
if not t[tree_index][lang][pattern] then
|
||||
t[tree_index][lang][pattern] = { combined = combined, regions = {} }
|
||||
end
|
||||
|
||||
table.insert(t[tree_index][lang][pattern].regions, ranges)
|
||||
end
|
||||
|
||||
---@private
|
||||
---Get node text
|
||||
---
|
||||
---Note: `query.get_node_text` returns string|string[]|nil so use this simple alias function
|
||||
---to annotate it returns string.
|
||||
---
|
||||
---TODO(lewis6991): use [at]overload annotations on `query.get_node_text`
|
||||
---@param node TSNode
|
||||
---@param source integer|string
|
||||
---@param metadata table
|
||||
---@return string
|
||||
local function get_node_text(node, source, metadata)
|
||||
return query.get_node_text(node, source, { metadata = metadata }) --[[@as string]]
|
||||
end
|
||||
|
||||
---@private
|
||||
--- Extract injections according to:
|
||||
--- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
|
||||
---@param match table<integer,TSNode>
|
||||
---@param metadata table
|
||||
---@return string, boolean, Range4[]
|
||||
function LanguageTree:_get_injection(match, metadata)
|
||||
local ranges = {} ---@type Range4[]
|
||||
local combined = metadata['injection.combined'] ~= nil
|
||||
local lang = metadata['injection.language'] ---@type string
|
||||
local include_children = metadata['injection.include-children'] ~= nil
|
||||
|
||||
for id, node in pairs(match) do
|
||||
local name = self._injection_query.captures[id]
|
||||
|
||||
-- Lang should override any other language tag
|
||||
if name == 'injection.language' then
|
||||
lang = get_node_text(node, self._source, metadata[id])
|
||||
elseif name == 'injection.content' then
|
||||
ranges = get_node_ranges(node, id, metadata, include_children)
|
||||
end
|
||||
end
|
||||
|
||||
return lang, combined, ranges
|
||||
end
|
||||
|
||||
---@private
|
||||
---@param match table<integer,TSNode>
|
||||
---@param metadata table
|
||||
---@return string, boolean, Range4[]
|
||||
function LanguageTree:_get_injection_deprecated(match, metadata)
|
||||
local lang = nil ---@type string
|
||||
local ranges = {} ---@type Range4[]
|
||||
local combined = metadata.combined ---@type boolean
|
||||
local combined = metadata.combined ~= nil
|
||||
|
||||
-- Directives can configure how injections are captured as well as actual node captures.
|
||||
-- This allows more advanced processing for determining ranges and language resolution.
|
||||
@ -453,8 +541,7 @@ function LanguageTree:_get_injections()
|
||||
|
||||
-- Lang should override any other language tag
|
||||
if name == 'language' and not lang then
|
||||
---@diagnostic disable-next-line
|
||||
lang = query.get_node_text(node, self._source, { metadata = metadata[id] })
|
||||
lang = get_node_text(node, self._source, metadata[id])
|
||||
elseif name == 'combined' then
|
||||
combined = true
|
||||
elseif name == 'content' and #ranges == 0 then
|
||||
@ -472,25 +559,38 @@ function LanguageTree:_get_injections()
|
||||
end
|
||||
end
|
||||
|
||||
assert(type(lang) == 'string')
|
||||
|
||||
-- Each tree index should be isolated from the other nodes.
|
||||
if not injections[tree_index] then
|
||||
injections[tree_index] = {}
|
||||
return lang, combined, ranges
|
||||
end
|
||||
|
||||
if not injections[tree_index][lang] then
|
||||
injections[tree_index][lang] = {}
|
||||
--- Gets language injection points by language.
|
||||
---
|
||||
--- This is where most of the injection processing occurs.
|
||||
---
|
||||
--- TODO: Allow for an offset predicate to tailor the injection range
|
||||
--- instead of using the entire nodes range.
|
||||
---@private
|
||||
---@return table<string, Range4[][]>
|
||||
function LanguageTree:_get_injections()
|
||||
if not self._injection_query then
|
||||
return {}
|
||||
end
|
||||
|
||||
-- Key this by pattern. If combined is set to true all captures of this pattern
|
||||
-- will be parsed by treesitter as the same "source".
|
||||
-- If combined is false, each "region" will be parsed as a single source.
|
||||
if not injections[tree_index][lang][pattern] then
|
||||
injections[tree_index][lang][pattern] = { combined = combined, regions = {} }
|
||||
end
|
||||
---@type table<integer,TSInjection>
|
||||
local injections = {}
|
||||
|
||||
table.insert(injections[tree_index][lang][pattern].regions, ranges)
|
||||
for tree_index, tree in ipairs(self._trees) do
|
||||
local root_node = tree:root()
|
||||
local start_line, _, end_line, _ = root_node:range()
|
||||
|
||||
for pattern, match, metadata in
|
||||
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
|
||||
do
|
||||
local lang, combined, ranges = self:_get_injection(match, metadata)
|
||||
if not lang then
|
||||
-- TODO(lewis6991): remove after 0.9 (#20434)
|
||||
lang, combined, ranges = self:_get_injection_deprecated(match, metadata)
|
||||
end
|
||||
add_injection(injections, tree_index, pattern, lang, combined, ranges)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -407,7 +407,7 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
|
||||
---@field [string] integer|string
|
||||
---@field range Range4
|
||||
|
||||
---@alias TSDirective fun(match: TSMatch, _, _, predicate: any[], metadata: TSMetadata)
|
||||
---@alias TSDirective fun(match: TSMatch, _, _, predicate: (string|integer)[], metadata: TSMetadata)
|
||||
|
||||
-- Predicate handler receive the following arguments
|
||||
-- (match, pattern, bufnr, predicate)
|
||||
@ -419,24 +419,17 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
|
||||
---@type table<string,TSDirective>
|
||||
local directive_handlers = {
|
||||
['set!'] = function(_, _, _, pred, metadata)
|
||||
if #pred == 4 then
|
||||
-- (#set! @capture "key" "value")
|
||||
---@diagnostic disable-next-line:no-unknown
|
||||
local _, capture_id, key, value = unpack(pred)
|
||||
---@cast value integer|string
|
||||
---@cast capture_id integer
|
||||
---@cast key string
|
||||
if #pred >= 3 and type(pred[2]) == 'number' then
|
||||
-- (#set! @capture key value)
|
||||
local capture_id, key, value = pred[2], pred[3], pred[4]
|
||||
if not metadata[capture_id] then
|
||||
metadata[capture_id] = {}
|
||||
end
|
||||
metadata[capture_id][key] = value
|
||||
else
|
||||
---@diagnostic disable-next-line:no-unknown
|
||||
local _, key, value = unpack(pred)
|
||||
---@cast value integer|string
|
||||
---@cast key string
|
||||
-- (#set! "key" "value")
|
||||
metadata[key] = value
|
||||
-- (#set! key value)
|
||||
local key, value = pred[2], pred[3]
|
||||
metadata[key] = value or true
|
||||
end
|
||||
end,
|
||||
-- Shifts the range of a node.
|
||||
|
@ -1,3 +1,5 @@
|
||||
(preproc_arg) @c
|
||||
((preproc_arg) @injection.content
|
||||
(#set! injection.language "c"))
|
||||
|
||||
; (comment) @comment
|
||||
; ((comment) @injection.content
|
||||
; (#set! injection.language "comment"))
|
||||
|
@ -1,3 +1,4 @@
|
||||
(codeblock
|
||||
(language) @language
|
||||
(code) @content)
|
||||
((codeblock
|
||||
(language) @injection.language
|
||||
(code) @injection.content)
|
||||
(#set! injection.include-children))
|
||||
|
@ -3,20 +3,26 @@
|
||||
(identifier) @_cdef_identifier
|
||||
(_ _ (identifier) @_cdef_identifier)
|
||||
]
|
||||
arguments: (arguments (string content: _ @c)))
|
||||
arguments: (arguments (string content: _ @injection.content)))
|
||||
(#set! injection.language "c")
|
||||
(#eq? @_cdef_identifier "cdef"))
|
||||
|
||||
((function_call
|
||||
name: (_) @_vimcmd_identifier
|
||||
arguments: (arguments (string content: _ @vim)))
|
||||
arguments: (arguments (string content: _ @injection.content)))
|
||||
(#set! injection.language "vim")
|
||||
(#any-of? @_vimcmd_identifier "vim.cmd" "vim.api.nvim_command" "vim.api.nvim_exec" "vim.api.nvim_cmd"))
|
||||
|
||||
((function_call
|
||||
name: (_) @_vimcmd_identifier
|
||||
arguments: (arguments (string content: _ @query) .))
|
||||
arguments: (arguments (string content: _ @injection.content) .))
|
||||
(#set! injection.language "query")
|
||||
(#eq? @_vimcmd_identifier "vim.treesitter.query.set_query"))
|
||||
|
||||
; ;; highlight string as query if starts with `;; query`
|
||||
; ((string ("string_content") @query) (#lua-match? @query "^%s*;+%s?query"))
|
||||
; ((string ("string_content") @injection.content)
|
||||
; (#set! injection.language "query")
|
||||
; (#lua-match? @injection.content "^%s*;+%s?query"))
|
||||
|
||||
; (comment) @comment
|
||||
; ((comment) @injection.content
|
||||
; (#set! injection.language "comment"))
|
||||
|
@ -1,18 +1,33 @@
|
||||
(lua_statement (script (body) @lua))
|
||||
(lua_statement (chunk) @lua)
|
||||
(ruby_statement (script (body) @ruby))
|
||||
(ruby_statement (chunk) @ruby)
|
||||
(python_statement (script (body) @python))
|
||||
(python_statement (chunk) @python)
|
||||
;; If we support perl at some point...
|
||||
;; (perl_statement (script (body) @perl))
|
||||
;; (perl_statement (chunk) @perl)
|
||||
((lua_statement (script (body) @injection.content))
|
||||
(#set! injection.language "lua"))
|
||||
|
||||
(autocmd_statement (pattern) @regex)
|
||||
((lua_statement (chunk) @injection.content)
|
||||
(#set! injection.language "lua"))
|
||||
|
||||
((ruby_statement (script (body) @injection.content))
|
||||
(#set! injection.language "ruby"))
|
||||
|
||||
((ruby_statement (chunk) @injection.content)
|
||||
(#set! injection.language "ruby"))
|
||||
|
||||
((python_statement (script (body) @injection.content))
|
||||
(#set! injection.language "python"))
|
||||
|
||||
((python_statement (chunk) @injection.content)
|
||||
(#set! injection.language "python"))
|
||||
|
||||
;; If we support perl at some point...
|
||||
;; ((perl_statement (script (body) @injection.content))
|
||||
;; (#set! injection.language "perl"))
|
||||
;; ((perl_statement (chunk) @injection.content)
|
||||
;; (#set! injection.language "perl"))
|
||||
|
||||
((autocmd_statement (pattern) @injection.content)
|
||||
(#set! injection.language "regex"))
|
||||
|
||||
((set_item
|
||||
option: (option_name) @_option
|
||||
value: (set_value) @vim)
|
||||
value: (set_value) @injection.content)
|
||||
(#any-of? @_option
|
||||
"includeexpr" "inex"
|
||||
"printexpr" "pexpr"
|
||||
@ -22,7 +37,12 @@
|
||||
"foldexpr" "fde"
|
||||
"diffexpr" "dex"
|
||||
"patchexpr" "pex"
|
||||
"charconvert" "ccv"))
|
||||
"charconvert" "ccv")
|
||||
(#set! injection.language "vim"))
|
||||
|
||||
; (comment) @comment
|
||||
; (line_continuation_comment) @comment
|
||||
|
||||
; ((comment) @injection.content
|
||||
; (#set! injection.language "comment"))
|
||||
|
||||
; ((line_continuation_comment) @injection.content
|
||||
; (#set! injection.language "comment"))
|
||||
|
Loading…
Reference in New Issue
Block a user