lua: minimal UTF-16 support needed for LSP

This commit is contained in:
Björn Linse 2019-08-04 15:12:07 +02:00
parent ce628e1187
commit 1f54f68732
4 changed files with 148 additions and 15 deletions

View File

@ -459,6 +459,24 @@ vim.stricmp({a}, {b}) *vim.stricmp()*
are equal, {a} is greater than {b} or {a} is lesser than {b},
respectively.
vim.str_utfindex({str}[, {index}]) *vim.str_utfindex()*
Convert byte index to UTF-32 and UTF-16 indicies. If {index} is not
supplied, the length of the string is used. All indicies are zero-based.
Returns two values: the UTF-32 and UTF-16 indicies respectively.
Embedded NUL bytes are treated as terminating the string. Invalid
UTF-8 bytes, and embedded surrogates are counted as one code
point each. An {index} in the middle of a UTF-8 sequence is rounded
upwards to the end of that sequence.
vim.str_byteindex({str}, {index}[, {use_utf16}]) *vim.str_byteindex()*
Convert UTF-32 or UTF-16 {index} to byte index. If {use_utf16} is not
supplied, it defaults to false (use UTF-32). Returns the byte index.
Invalid UTF-8 and NUL is treated like by |vim.str_byteindex()|. An {index}
in the middle of a UTF-16 sequence is rounded upwards to the end of that
sequence.
vim.schedule({callback}) *vim.schedule()*
Schedules {callback} to be invoked soon by the main event-loop. Useful
to avoid |textlock| or other temporary restrictions.

View File

@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
return 1;
}
/// convert byte index to UTF-32 and UTF-16 indicies
///
/// Expects a string and an optional index. If no index is supplied, the length
/// of the string is returned.
///
/// Returns two values: the UTF-32 and UTF-16 indicies.
static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx;
if (lua_gettop(lstate) >= 2) {
idx = luaL_checkinteger(lstate, 2);
if (idx < 0 || idx > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
} else {
idx = (intptr_t)s1_len;
}
size_t codepoints = 0, codeunits = 0;
mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits);
lua_pushinteger(lstate, (long)codepoints);
lua_pushinteger(lstate, (long)codeunits);
return 2;
}
/// convert UTF-32 or UTF-16 indicies to byte index.
///
/// Expects up to three args: string, index and use_utf16.
/// If use_utf16 is not supplied it defaults to false (use UTF-32)
///
/// Returns the byte index.
static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx = luaL_checkinteger(lstate, 2);
if (idx < 0) {
return luaL_error(lstate, "index out of range");
}
bool use_utf16 = false;
if (lua_gettop(lstate) >= 3) {
use_utf16 = lua_toboolean(lstate, 3);
}
ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len,
(size_t)idx, use_utf16);
if (byteidx == -1) {
return luaL_error(lstate, "index out of range");
}
lua_pushinteger(lstate, (long)byteidx);
return 1;
}
static void nlua_luv_error_event(void **argv)
{
char *error = (char *)argv[0];
@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
// stricmp
lua_pushcfunction(lstate, &nlua_stricmp);
lua_setfield(lstate, -2, "stricmp");
// str_utfindex
lua_pushcfunction(lstate, &nlua_str_utfindex);
lua_setfield(lstate, -2, "str_utfindex");
// str_byteindex
lua_pushcfunction(lstate, &nlua_str_byteindex);
lua_setfield(lstate, -2, "str_byteindex");
// schedule
lua_pushcfunction(lstate, &nlua_schedule);
lua_setfield(lstate, -2, "schedule");

View File

@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
*codeunits += count + extra;
}
ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len,
size_t index, bool use_utf16_units)
FUNC_ATTR_NONNULL_ALL
{
size_t count = 0;
size_t clen, i;
if (index == 0) {
return 0;
}
for (i = 0; i < len && s[i] != NUL; i += clen) {
clen = utf_ptr2len_len(s+i, len-i);
// NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not
int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
count++;
if (use_utf16_units && c > 0xFFFF) {
count++;
}
if (count >= index) {
return i+clen;
}
}
return -1;
}
/*
* Version of strnicmp() that handles multi-byte characters.

View File

@ -2,12 +2,12 @@
local helpers = require('test.functional.helpers')(after_each)
local funcs = helpers.funcs
local meths = helpers.meths
local clear = helpers.clear
local eq = helpers.eq
local eval = helpers.eval
local feed = helpers.feed
local meth_pcall = helpers.meth_pcall
local exec_lua = helpers.exec_lua
before_each(clear)
@ -110,28 +110,53 @@ describe('lua function', function()
eq(1, funcs.luaeval('vim.stricmp("\\0C\\0", "\\0B\\0")'))
end)
it("vim.str_utfindex/str_byteindex", function()
exec_lua([[_G.test_text = "xy åäö ɧ 汉语 ↥ 🤦x🦄 å بِيَّ"]])
local indicies32 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,29,33,34,35,37,38,40,42,44,46,48}
local indicies16 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,28,29,33,33,34,35,37,38,40,42,44,46,48}
for i,k in pairs(indicies32) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ...)", i), i)
end
for i,k in pairs(indicies16) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ..., true)", i), i)
end
local i32, i16 = 0, 0
for k = 0,48 do
if indicies32[i32] < k then
i32 = i32 + 1
end
if indicies16[i16] < k then
i16 = i16 + 1
if indicies16[i16+1] == indicies16[i16] then
i16 = i16 + 1
end
end
eq({i32, i16}, exec_lua("return {vim.str_utfindex(_G.test_text, ...)}", k), k)
end
end)
it("vim.schedule", function()
meths.execute_lua([[
exec_lua([[
test_table = {}
vim.schedule(function()
table.insert(test_table, "xx")
end)
table.insert(test_table, "yy")
]], {})
eq({"yy","xx"}, meths.execute_lua("return test_table", {}))
]])
eq({"yy","xx"}, exec_lua("return test_table"))
-- type checked args
eq({false, 'Error executing lua: vim.schedule: expected function'},
meth_pcall(meths.execute_lua, "vim.schedule('stringly')", {}))
meth_pcall(exec_lua, "vim.schedule('stringly')"))
eq({false, 'Error executing lua: vim.schedule: expected function'},
meth_pcall(meths.execute_lua, "vim.schedule()", {}))
meth_pcall(exec_lua, "vim.schedule()"))
meths.execute_lua([[
exec_lua([[
vim.schedule(function()
error("big failure\nvery async")
end)
]], {})
]])
feed("<cr>")
eq('Error executing vim.schedule lua callback: [string "<nvim>"]:2: big failure\nvery async', eval("v:errmsg"))
@ -139,7 +164,7 @@ describe('lua function', function()
it("vim.split", function()
local split = function(str, sep)
return meths.execute_lua('return vim.split(...)', {str, sep})
return exec_lua('return vim.split(...)', str, sep)
end
local tests = {
@ -172,7 +197,7 @@ describe('lua function', function()
it('vim.trim', function()
local trim = function(s)
return meths.execute_lua('return vim.trim(...)', { s })
return exec_lua('return vim.trim(...)', s)
end
local trims = {
@ -194,7 +219,7 @@ describe('lua function', function()
it('vim.inspect', function()
-- just make sure it basically works, it has its own test suite
local inspect = function(t, opts)
return meths.execute_lua('return vim.inspect(...)', { t, opts })
return exec_lua('return vim.inspect(...)', t, opts)
end
eq('2', inspect(2))
@ -202,18 +227,18 @@ describe('lua function', function()
inspect({ a = { b = 1 } }, { newline = '+', indent = '' }))
-- special value vim.inspect.KEY works
eq('{ KEY_a = "x", KEY_b = "y"}', meths.execute_lua([[
eq('{ KEY_a = "x", KEY_b = "y"}', exec_lua([[
return vim.inspect({a="x", b="y"}, {newline = '', process = function(item, path)
if path[#path] == vim.inspect.KEY then
return 'KEY_'..item
end
return item
end})
]], {}))
]]))
end)
it("vim.deepcopy", function()
local is_dc = meths.execute_lua([[
local is_dc = exec_lua([[
local a = { x = { 1, 2 }, y = 5}
local b = vim.deepcopy(a)
@ -222,7 +247,7 @@ describe('lua function', function()
return b.x[1] == 1 and b.x[2] == 2 and b.y == 5 and count == 2
and tostring(a) ~= tostring(b)
]], {})
]])
assert(is_dc)
end)