lua: minimal UTF-16 support needed for LSP

This commit is contained in:
Björn Linse 2019-08-04 15:12:07 +02:00
parent ce628e1187
commit 1f54f68732
4 changed files with 148 additions and 15 deletions

View File

@ -459,6 +459,24 @@ vim.stricmp({a}, {b}) *vim.stricmp()*
are equal, {a} is greater than {b} or {a} is lesser than {b}, are equal, {a} is greater than {b} or {a} is lesser than {b},
respectively. respectively.
vim.str_utfindex({str}[, {index}]) *vim.str_utfindex()*
Convert byte index to UTF-32 and UTF-16 indicies. If {index} is not
supplied, the length of the string is used. All indicies are zero-based.
Returns two values: the UTF-32 and UTF-16 indicies respectively.
Embedded NUL bytes are treated as terminating the string. Invalid
UTF-8 bytes, and embedded surrogates are counted as one code
point each. An {index} in the middle of a UTF-8 sequence is rounded
upwards to the end of that sequence.
vim.str_byteindex({str}, {index}[, {use_utf16}]) *vim.str_byteindex()*
Convert UTF-32 or UTF-16 {index} to byte index. If {use_utf16} is not
supplied, it defaults to false (use UTF-32). Returns the byte index.
Invalid UTF-8 and NUL is treated like by |vim.str_byteindex()|. An {index}
in the middle of a UTF-16 sequence is rounded upwards to the end of that
sequence.
vim.schedule({callback}) *vim.schedule()* vim.schedule({callback}) *vim.schedule()*
Schedules {callback} to be invoked soon by the main event-loop. Useful Schedules {callback} to be invoked soon by the main event-loop. Useful
to avoid |textlock| or other temporary restrictions. to avoid |textlock| or other temporary restrictions.

View File

@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
return 1; return 1;
} }
/// convert byte index to UTF-32 and UTF-16 indicies
///
/// Expects a string and an optional index. If no index is supplied, the length
/// of the string is returned.
///
/// Returns two values: the UTF-32 and UTF-16 indicies.
static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx;
if (lua_gettop(lstate) >= 2) {
idx = luaL_checkinteger(lstate, 2);
if (idx < 0 || idx > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
} else {
idx = (intptr_t)s1_len;
}
size_t codepoints = 0, codeunits = 0;
mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits);
lua_pushinteger(lstate, (long)codepoints);
lua_pushinteger(lstate, (long)codeunits);
return 2;
}
/// convert UTF-32 or UTF-16 indicies to byte index.
///
/// Expects up to three args: string, index and use_utf16.
/// If use_utf16 is not supplied it defaults to false (use UTF-32)
///
/// Returns the byte index.
static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx = luaL_checkinteger(lstate, 2);
if (idx < 0) {
return luaL_error(lstate, "index out of range");
}
bool use_utf16 = false;
if (lua_gettop(lstate) >= 3) {
use_utf16 = lua_toboolean(lstate, 3);
}
ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len,
(size_t)idx, use_utf16);
if (byteidx == -1) {
return luaL_error(lstate, "index out of range");
}
lua_pushinteger(lstate, (long)byteidx);
return 1;
}
static void nlua_luv_error_event(void **argv) static void nlua_luv_error_event(void **argv)
{ {
char *error = (char *)argv[0]; char *error = (char *)argv[0];
@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
// stricmp // stricmp
lua_pushcfunction(lstate, &nlua_stricmp); lua_pushcfunction(lstate, &nlua_stricmp);
lua_setfield(lstate, -2, "stricmp"); lua_setfield(lstate, -2, "stricmp");
// str_utfindex
lua_pushcfunction(lstate, &nlua_str_utfindex);
lua_setfield(lstate, -2, "str_utfindex");
// str_byteindex
lua_pushcfunction(lstate, &nlua_str_byteindex);
lua_setfield(lstate, -2, "str_byteindex");
// schedule // schedule
lua_pushcfunction(lstate, &nlua_schedule); lua_pushcfunction(lstate, &nlua_schedule);
lua_setfield(lstate, -2, "schedule"); lua_setfield(lstate, -2, "schedule");

View File

@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
*codeunits += count + extra; *codeunits += count + extra;
} }
ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len,
size_t index, bool use_utf16_units)
FUNC_ATTR_NONNULL_ALL
{
size_t count = 0;
size_t clen, i;
if (index == 0) {
return 0;
}
for (i = 0; i < len && s[i] != NUL; i += clen) {
clen = utf_ptr2len_len(s+i, len-i);
// NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not
int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
count++;
if (use_utf16_units && c > 0xFFFF) {
count++;
}
if (count >= index) {
return i+clen;
}
}
return -1;
}
/* /*
* Version of strnicmp() that handles multi-byte characters. * Version of strnicmp() that handles multi-byte characters.

View File

@ -2,12 +2,12 @@
local helpers = require('test.functional.helpers')(after_each) local helpers = require('test.functional.helpers')(after_each)
local funcs = helpers.funcs local funcs = helpers.funcs
local meths = helpers.meths
local clear = helpers.clear local clear = helpers.clear
local eq = helpers.eq local eq = helpers.eq
local eval = helpers.eval local eval = helpers.eval
local feed = helpers.feed local feed = helpers.feed
local meth_pcall = helpers.meth_pcall local meth_pcall = helpers.meth_pcall
local exec_lua = helpers.exec_lua
before_each(clear) before_each(clear)
@ -110,28 +110,53 @@ describe('lua function', function()
eq(1, funcs.luaeval('vim.stricmp("\\0C\\0", "\\0B\\0")')) eq(1, funcs.luaeval('vim.stricmp("\\0C\\0", "\\0B\\0")'))
end) end)
it("vim.str_utfindex/str_byteindex", function()
exec_lua([[_G.test_text = "xy åäö ɧ 汉语 ↥ 🤦x🦄 å بِيَّ"]])
local indicies32 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,29,33,34,35,37,38,40,42,44,46,48}
local indicies16 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,28,29,33,33,34,35,37,38,40,42,44,46,48}
for i,k in pairs(indicies32) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ...)", i), i)
end
for i,k in pairs(indicies16) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ..., true)", i), i)
end
local i32, i16 = 0, 0
for k = 0,48 do
if indicies32[i32] < k then
i32 = i32 + 1
end
if indicies16[i16] < k then
i16 = i16 + 1
if indicies16[i16+1] == indicies16[i16] then
i16 = i16 + 1
end
end
eq({i32, i16}, exec_lua("return {vim.str_utfindex(_G.test_text, ...)}", k), k)
end
end)
it("vim.schedule", function() it("vim.schedule", function()
meths.execute_lua([[ exec_lua([[
test_table = {} test_table = {}
vim.schedule(function() vim.schedule(function()
table.insert(test_table, "xx") table.insert(test_table, "xx")
end) end)
table.insert(test_table, "yy") table.insert(test_table, "yy")
]], {}) ]])
eq({"yy","xx"}, meths.execute_lua("return test_table", {})) eq({"yy","xx"}, exec_lua("return test_table"))
-- type checked args -- type checked args
eq({false, 'Error executing lua: vim.schedule: expected function'}, eq({false, 'Error executing lua: vim.schedule: expected function'},
meth_pcall(meths.execute_lua, "vim.schedule('stringly')", {})) meth_pcall(exec_lua, "vim.schedule('stringly')"))
eq({false, 'Error executing lua: vim.schedule: expected function'}, eq({false, 'Error executing lua: vim.schedule: expected function'},
meth_pcall(meths.execute_lua, "vim.schedule()", {})) meth_pcall(exec_lua, "vim.schedule()"))
meths.execute_lua([[ exec_lua([[
vim.schedule(function() vim.schedule(function()
error("big failure\nvery async") error("big failure\nvery async")
end) end)
]], {}) ]])
feed("<cr>") feed("<cr>")
eq('Error executing vim.schedule lua callback: [string "<nvim>"]:2: big failure\nvery async', eval("v:errmsg")) eq('Error executing vim.schedule lua callback: [string "<nvim>"]:2: big failure\nvery async', eval("v:errmsg"))
@ -139,7 +164,7 @@ describe('lua function', function()
it("vim.split", function() it("vim.split", function()
local split = function(str, sep) local split = function(str, sep)
return meths.execute_lua('return vim.split(...)', {str, sep}) return exec_lua('return vim.split(...)', str, sep)
end end
local tests = { local tests = {
@ -172,7 +197,7 @@ describe('lua function', function()
it('vim.trim', function() it('vim.trim', function()
local trim = function(s) local trim = function(s)
return meths.execute_lua('return vim.trim(...)', { s }) return exec_lua('return vim.trim(...)', s)
end end
local trims = { local trims = {
@ -194,7 +219,7 @@ describe('lua function', function()
it('vim.inspect', function() it('vim.inspect', function()
-- just make sure it basically works, it has its own test suite -- just make sure it basically works, it has its own test suite
local inspect = function(t, opts) local inspect = function(t, opts)
return meths.execute_lua('return vim.inspect(...)', { t, opts }) return exec_lua('return vim.inspect(...)', t, opts)
end end
eq('2', inspect(2)) eq('2', inspect(2))
@ -202,18 +227,18 @@ describe('lua function', function()
inspect({ a = { b = 1 } }, { newline = '+', indent = '' })) inspect({ a = { b = 1 } }, { newline = '+', indent = '' }))
-- special value vim.inspect.KEY works -- special value vim.inspect.KEY works
eq('{ KEY_a = "x", KEY_b = "y"}', meths.execute_lua([[ eq('{ KEY_a = "x", KEY_b = "y"}', exec_lua([[
return vim.inspect({a="x", b="y"}, {newline = '', process = function(item, path) return vim.inspect({a="x", b="y"}, {newline = '', process = function(item, path)
if path[#path] == vim.inspect.KEY then if path[#path] == vim.inspect.KEY then
return 'KEY_'..item return 'KEY_'..item
end end
return item return item
end}) end})
]], {})) ]]))
end) end)
it("vim.deepcopy", function() it("vim.deepcopy", function()
local is_dc = meths.execute_lua([[ local is_dc = exec_lua([[
local a = { x = { 1, 2 }, y = 5} local a = { x = { 1, 2 }, y = 5}
local b = vim.deepcopy(a) local b = vim.deepcopy(a)
@ -222,7 +247,7 @@ describe('lua function', function()
return b.x[1] == 1 and b.x[2] == 2 and b.y == 5 and count == 2 return b.x[1] == 1 and b.x[2] == 2 and b.y == 5 and count == 2
and tostring(a) ~= tostring(b) and tostring(a) ~= tostring(b)
]], {}) ]])
assert(is_dc) assert(is_dc)
end) end)