feat(treesitter): allow injections to be configured through directives

This commit is contained in:
Steven Sojka 2021-03-02 13:51:08 -06:00
parent f2df01900e
commit 8bea39f372
5 changed files with 171 additions and 69 deletions

View File

@ -365,4 +365,91 @@ identical identifiers, highlighting both as |hl-WarningMsg|: >
((binary_expression left: (identifier) @WarningMsg.left right: (identifier) @WarningMsg.right) ((binary_expression left: (identifier) @WarningMsg.left right: (identifier) @WarningMsg.right)
(eq? @WarningMsg.left @WarningMsg.right)) (eq? @WarningMsg.left @WarningMsg.right))
Treesitter language injection (WIP) *lua-treesitter-language-injection*
NOTE: This is a partially implemented feature, and not usable as a default
solution yet. What is documented here is a temporary interface intended
for those who want to experiment with this feature and contribute to
its development.
Languages can have nested languages within them, for example javascript inside
HTML. We can "inject" a treesitter parser for a child language by configuring
injection queries. Here is an example of Javascript and CSS injected into
HTML. >
local query = [[
(script_element (raw_text) @javascript)
(style_element (raw_text) @css)
]];
local parser = vim.treesitter.get_parser(nil, nil, {
injections = {html = query}
})
parser:parse()
Any capture will be treated as the node treesitter will use for the injected
language. The capture name will be used as the language. There are a couple
reserved captures that do not have this behavior
`@language`
This will use a nodes text content as the language to be injected.
`@content`
This will use the captured nodes content as the injected content.
`@combined`
This will combine all matches of a pattern as one single block of content.
By default, each match of a pattern is treated as it's own block of content
and parsed independent of each other.
`@<language>`
Any other capture name will be treated as both the language and the content.
`@_<name>`
Any capture with a leading "_" will not be treated as a language and will have
no special processing and is useful for capturing nodes for directives.
Injections can be configured using `directives` instead of using capture
names. Here is an example of a directive that resolves the language based on a
buffer variable instead of statically in the query. >
local query = require("vim.treesitter.query")
query.add_directive("inject-preprocessor!", function(_, bufnr, _, _, data)
local success, lang = pcall(vim.api.nvim_buf_get_var, bufnr, "css_preprocessor")
data.language = success and lang or "css"
end)
Here is the same HTML query using this directive. >
local query = [[
(script_element (raw_text) @javascript)
(style_element
((raw_text) @content
(#inject-preprocessor!)))
]];
local parser = vim.treesitter.get_parser(nil, nil, {
injections = {html = query}
})
parser:parse()
The following properties can be attached to the metadata object provided to
the directive.
`language`
Same as the language capture.
`content`
A list of ranges or nodes to inject as content. These ranges and/or nodes will
be treated as combined source and will be parsed within the same context. This
differs from the `@content` capture which only captures a single node as
content. This can also be a single number that references a captured node.
`combined`
Same as the combined capture.
vim:tw=78:ts=8:ft=help:norl: vim:tw=78:ts=8:ft=help:norl:

View File

@ -12,14 +12,19 @@ LanguageTree.__index = LanguageTree
-- @param source Can be a bufnr or a string of text to parse -- @param source Can be a bufnr or a string of text to parse
-- @param lang The language this tree represents -- @param lang The language this tree represents
-- @param opts Options table -- @param opts Options table
-- @param opts.queries A table of language to injection query strings. -- @param opts.injections A table of language to injection query strings.
-- This is useful for overriding the built-in runtime file -- This is useful for overriding the built-in runtime file
-- searching for the injection language query per language. -- searching for the injection language query per language.
function LanguageTree.new(source, lang, opts) function LanguageTree.new(source, lang, opts)
language.require_language(lang) language.require_language(lang)
opts = opts or {} opts = opts or {}
local custom_queries = opts.queries or {} if opts.queries then
a.nvim_err_writeln("'queries' is no longer supported. Use 'injections' now")
opts.injections = opts.queries
end
local injections = opts.injections or {}
local self = setmetatable({ local self = setmetatable({
_source = source, _source = source,
_lang = lang, _lang = lang,
@ -27,8 +32,8 @@ function LanguageTree.new(source, lang, opts)
_regions = {}, _regions = {},
_trees = {}, _trees = {},
_opts = opts, _opts = opts,
_injection_query = custom_queries[lang] _injection_query = injections[lang]
and query.parse_query(lang, custom_queries[lang]) and query.parse_query(lang, injections[lang])
or query.get_query(lang, "injections"), or query.get_query(lang, "injections"),
_valid = false, _valid = false,
_parser = vim._create_ts_parser(lang), _parser = vim._create_ts_parser(lang),
@ -297,33 +302,50 @@ function LanguageTree:_get_injections()
for pattern, match, metadata in self._injection_query:iter_matches(root_node, self._source, start_line, end_line+1) do for pattern, match, metadata in self._injection_query:iter_matches(root_node, self._source, start_line, end_line+1) do
local lang = nil local lang = nil
local injection_node = nil local ranges = {}
local combined = false local combined = metadata.combined
-- Directives can configure how injections are captured as well as actual node captures.
-- This allows more advanced processing for determining ranges and language resolution.
if metadata.content then
local content = metadata.content
-- Allow for captured nodes to be used
if type(content) == "number" then
content = {match[content]}
end
if content then
vim.list_extend(ranges, content)
end
end
if metadata.language then
lang = metadata.language
end
-- You can specify the content and language together -- You can specify the content and language together
-- using a tag with the language, for example -- using a tag with the language, for example
-- @javascript -- @javascript
for id, node in pairs(match) do for id, node in pairs(match) do
local data = metadata[id]
local name = self._injection_query.captures[id] local name = self._injection_query.captures[id]
local offset_range = data and data.offset
-- Lang should override any other language tag -- Lang should override any other language tag
if name == "language" then if name == "language" and not lang then
lang = query.get_node_text(node, self._source) lang = query.get_node_text(node, self._source)
elseif name == "combined" then elseif name == "combined" then
combined = true combined = true
elseif name == "content" then elseif name == "content" and #ranges == 0 then
injection_node = offset_range or node table.insert(ranges, node)
-- Ignore any tags that start with "_" -- Ignore any tags that start with "_"
-- Allows for other tags to be used in matches -- Allows for other tags to be used in matches
elseif string.sub(name, 1, 1) ~= "_" then elseif string.sub(name, 1, 1) ~= "_" then
if lang == nil then if not lang then
lang = name lang = name
end end
if not injection_node then if #ranges == 0 then
injection_node = offset_range or node table.insert(ranges, node)
end end
end end
end end
@ -337,21 +359,21 @@ function LanguageTree:_get_injections()
injections[tree_index][lang] = {} injections[tree_index][lang] = {}
end end
-- Key by pattern so we can either combine each node to parse in the same -- Key this by pattern. If combined is set to true all captures of this pattern
-- context or treat each node independently. -- will be parsed by treesitter as the same "source".
-- If combined is false, each "region" will be parsed as a single source.
if not injections[tree_index][lang][pattern] then if not injections[tree_index][lang][pattern] then
injections[tree_index][lang][pattern] = { combined = combined, nodes = {} } injections[tree_index][lang][pattern] = { combined = combined, regions = {} }
end end
table.insert(injections[tree_index][lang][pattern].nodes, injection_node) table.insert(injections[tree_index][lang][pattern].regions, ranges)
end end
end end
local result = {} local result = {}
-- Generate a map by lang of node lists. -- Generate a map by lang of node lists.
-- Each list is a set of ranges that should be parsed -- Each list is a set of ranges that should be parsed together.
-- together.
for _, lang_map in ipairs(injections) do for _, lang_map in ipairs(injections) do
for lang, patterns in pairs(lang_map) do for lang, patterns in pairs(lang_map) do
if not result[lang] then if not result[lang] then
@ -360,10 +382,10 @@ function LanguageTree:_get_injections()
for _, entry in pairs(patterns) do for _, entry in pairs(patterns) do
if entry.combined then if entry.combined then
table.insert(result[lang], entry.nodes) table.insert(result[lang], vim.tbl_flatten(entry.regions))
else else
for _, node in ipairs(entry.nodes) do for _, ranges in ipairs(entry.regions) do
table.insert(result[lang], {node}) table.insert(result[lang], ranges)
end end
end end
end end

View File

@ -79,17 +79,6 @@ local function read_query_files(filenames)
return table.concat(contents, '') return table.concat(contents, '')
end end
local match_metatable = {
__index = function(tbl, key)
rawset(tbl, key, {})
return tbl[key]
end
}
local function new_match_metadata()
return setmetatable({}, match_metatable)
end
--- The explicitly set queries from |vim.treesitter.query.set_query()| --- The explicitly set queries from |vim.treesitter.query.set_query()|
local explicit_queries = setmetatable({}, { local explicit_queries = setmetatable({}, {
__index = function(t, k) __index = function(t, k)
@ -249,7 +238,7 @@ predicate_handlers["vim-match?"] = predicate_handlers["match?"]
-- Directives store metadata or perform side effects against a match. -- Directives store metadata or perform side effects against a match.
-- Directives should always end with a `!`. -- Directives should always end with a `!`.
-- Directive handler receive the following arguments -- Directive handler receive the following arguments
-- (match, pattern, bufnr, predicate) -- (match, pattern, bufnr, predicate, metadata)
local directive_handlers = { local directive_handlers = {
["set!"] = function(_, _, _, pred, metadata) ["set!"] = function(_, _, _, pred, metadata)
if #pred == 4 then if #pred == 4 then
@ -269,7 +258,6 @@ local directive_handlers = {
local start_col_offset = pred[4] or 0 local start_col_offset = pred[4] or 0
local end_row_offset = pred[5] or 0 local end_row_offset = pred[5] or 0
local end_col_offset = pred[6] or 0 local end_col_offset = pred[6] or 0
local key = pred[7] or "offset"
range[1] = range[1] + start_row_offset range[1] = range[1] + start_row_offset
range[2] = range[2] + start_col_offset range[2] = range[2] + start_col_offset
@ -278,7 +266,7 @@ local directive_handlers = {
-- If this produces an invalid range, we just skip it. -- If this produces an invalid range, we just skip it.
if range[1] < range[3] or (range[1] == range[3] and range[2] <= range[4]) then if range[1] < range[3] or (range[1] == range[3] and range[2] <= range[4]) then
metadata[pred[2]][key] = range metadata.content = {range}
end end
end end
} }
@ -410,7 +398,7 @@ function Query:iter_captures(node, source, start, stop)
local raw_iter = node:_rawquery(self.query, true, start, stop) local raw_iter = node:_rawquery(self.query, true, start, stop)
local function iter() local function iter()
local capture, captured_node, match = raw_iter() local capture, captured_node, match = raw_iter()
local metadata = new_match_metadata() local metadata = {}
if match ~= nil then if match ~= nil then
local active = self:match_preds(match, match.pattern, source) local active = self:match_preds(match, match.pattern, source)
@ -445,7 +433,7 @@ function Query:iter_matches(node, source, start, stop)
local raw_iter = node:_rawquery(self.query, false, start, stop) local raw_iter = node:_rawquery(self.query, false, start, stop)
local function iter() local function iter()
local pattern, match = raw_iter() local pattern, match = raw_iter()
local metadata = new_match_metadata() local metadata = {}
if match ~= nil then if match ~= nil then
local active = self:match_preds(match, pattern, source) local active = self:match_preds(match, pattern, source)

View File

@ -445,7 +445,7 @@ describe('treesitter highlighting', function()
exec_lua [[ exec_lua [[
local parser = vim.treesitter.get_parser(0, "c", { local parser = vim.treesitter.get_parser(0, "c", {
queries = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"} injections = {c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}
}) })
local highlighter = vim.treesitter.highlighter local highlighter = vim.treesitter.highlighter
test_hl = highlighter.new(parser, {queries = {c = hl_query}}) test_hl = highlighter.new(parser, {queries = {c = hl_query}})

View File

@ -468,7 +468,7 @@ int x = INT_MAX;
it("should inject a language", function() it("should inject a language", function()
exec_lua([[ exec_lua([[
parser = vim.treesitter.get_parser(0, "c", { parser = vim.treesitter.get_parser(0, "c", {
queries = { injections = {
c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}}) c = "(preproc_def (preproc_arg) @c) (preproc_function_def value: (preproc_arg) @c)"}})
]]) ]])
@ -489,7 +489,7 @@ int x = INT_MAX;
it("should inject a language", function() it("should inject a language", function()
exec_lua([[ exec_lua([[
parser = vim.treesitter.get_parser(0, "c", { parser = vim.treesitter.get_parser(0, "c", {
queries = { injections = {
c = "(preproc_def (preproc_arg) @c @combined) (preproc_function_def value: (preproc_arg) @c @combined)"}}) c = "(preproc_def (preproc_arg) @c @combined) (preproc_function_def value: (preproc_arg) @c @combined)"}})
]]) ]])
@ -506,11 +506,39 @@ int x = INT_MAX;
end) end)
end) end)
describe("when providing parsing information through a directive", function()
it("should inject a language", function()
exec_lua([=[
vim.treesitter.add_directive("inject-clang!", function(match, _, _, pred, metadata)
metadata.language = "c"
metadata.combined = true
metadata.content = pred[2]
end)
parser = vim.treesitter.get_parser(0, "c", {
injections = {
c = "(preproc_def ((preproc_arg) @_c (#inject-clang! @_c)))" ..
"(preproc_function_def value: ((preproc_arg) @_a (#inject-clang! @_a)))"}})
]=])
eq("table", exec_lua("return type(parser:children().c)"))
eq(2, exec_lua("return #parser:children().c:trees()"))
eq({
{0, 0, 7, 0}, -- root tree
{3, 14, 5, 18}, -- VALUE 123
-- VALUE1 123
-- VALUE2 123
{1, 26, 2, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
end)
end)
describe("when using the offset directive", function() describe("when using the offset directive", function()
it("should shift the range by the directive amount", function() it("should shift the range by the directive amount", function()
exec_lua([[ exec_lua([[
parser = vim.treesitter.get_parser(0, "c", { parser = vim.treesitter.get_parser(0, "c", {
queries = { injections = {
c = "(preproc_def ((preproc_arg) @c (#offset! @c 0 2 0 -1))) (preproc_function_def value: (preproc_arg) @c)"}}) c = "(preproc_def ((preproc_arg) @c (#offset! @c 0 2 0 -1))) (preproc_function_def value: (preproc_arg) @c)"}})
]]) ]])
@ -538,7 +566,7 @@ int x = INT_MAX;
it("should return the correct language tree", function() it("should return the correct language tree", function()
local result = exec_lua([[ local result = exec_lua([[
parser = vim.treesitter.get_parser(0, "c", { parser = vim.treesitter.get_parser(0, "c", {
queries = { c = "(preproc_def (preproc_arg) @c)"}}) injections = { c = "(preproc_def (preproc_arg) @c)"}})
local sub_tree = parser:language_for_range({1, 18, 1, 19}) local sub_tree = parser:language_for_range({1, 18, 1, 19})
@ -572,28 +600,5 @@ int x = INT_MAX;
eq(result, "value") eq(result, "value")
end) end)
end) end)
describe("when setting for a capture match", function()
it("should set/get the data correctly", function()
insert([[
int x = 3;
]])
local result = exec_lua([[
local result
query = vim.treesitter.parse_query("c", '((number_literal) @number (#set! @number "key" "value"))')
parser = vim.treesitter.get_parser(0, "c")
for pattern, match, metadata in query:iter_matches(parser:parse()[1]:root(), 0) do
result = metadata[pattern].key
end
return result
]])
eq(result, "value")
end)
end)
end) end)
end) end)