mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
eval/decode: Fix surrogate pairs processing
This commit is contained in:
parent
ea82270d30
commit
5814e29cdb
@ -340,12 +340,12 @@ int json_decode_string(const char *const buf, const size_t len,
|
||||
goto json_decode_string_fail;
|
||||
}
|
||||
char *str = xmalloc(len + 1);
|
||||
uint16_t fst_in_pair = 0;
|
||||
int fst_in_pair = 0;
|
||||
char *str_end = str;
|
||||
for (const char *t = s; t < p; t++) {
|
||||
if (t[0] != '\\' || t[1] != 'u') {
|
||||
if (fst_in_pair != 0) {
|
||||
str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end);
|
||||
str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end);
|
||||
fst_in_pair = 0;
|
||||
}
|
||||
}
|
||||
@ -353,20 +353,21 @@ int json_decode_string(const char *const buf, const size_t len,
|
||||
t++;
|
||||
switch (*t) {
|
||||
case 'u': {
|
||||
char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
|
||||
const char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
|
||||
t += 4;
|
||||
unsigned long ch;
|
||||
vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch);
|
||||
if (0xD800UL <= ch && ch <= 0xDB7FUL) {
|
||||
fst_in_pair = (uint16_t) ch;
|
||||
} else if (0xDC00ULL <= ch && ch <= 0xDB7FUL) {
|
||||
if (fst_in_pair != 0) {
|
||||
int full_char = (
|
||||
(int) (ch - 0xDC00UL)
|
||||
+ (((int) (fst_in_pair - 0xD800)) << 10)
|
||||
);
|
||||
str_end += utf_char2bytes(full_char, (char_u *) str_end);
|
||||
}
|
||||
if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
|
||||
fst_in_pair = (int) ch;
|
||||
} else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
|
||||
&& fst_in_pair != 0) {
|
||||
const int full_char = (
|
||||
(int) (ch - SURROGATE_LO_START)
|
||||
+ ((fst_in_pair - SURROGATE_HI_START) << 10)
|
||||
+ SURROGATE_FIRST_CHAR
|
||||
);
|
||||
str_end += utf_char2bytes(full_char, (char_u *) str_end);
|
||||
fst_in_pair = 0;
|
||||
} else {
|
||||
str_end += utf_char2bytes((int) ch, (char_u *) str_end);
|
||||
}
|
||||
|
@ -970,7 +970,7 @@ static inline int convert_to_json_string(garray_T *const gap,
|
||||
default: {
|
||||
if (vim_isprintc(ch)) {
|
||||
ga_concat_len(gap, buf + i, shift);
|
||||
} else if (ch <= 0xFFFF) {
|
||||
} else if (ch < SURROGATE_FIRST_CHAR) {
|
||||
ga_concat_len(gap, ((const char []) {
|
||||
'\\', 'u',
|
||||
xdigits[(ch >> (4 * 3)) & 0xF],
|
||||
@ -979,9 +979,9 @@ static inline int convert_to_json_string(garray_T *const gap,
|
||||
xdigits[(ch >> (4 * 0)) & 0xF],
|
||||
}), sizeof("\\u1234") - 1);
|
||||
} else {
|
||||
uint32_t tmp = (uint32_t) ch - 0x010000;
|
||||
uint16_t hi = 0xD800 + ((tmp >> 10) & 0x03FF);
|
||||
uint16_t lo = 0xDC00 + ((tmp >> 0) & 0x03FF);
|
||||
uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR;
|
||||
uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1));
|
||||
uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1));
|
||||
ga_concat_len(gap, ((const char []) {
|
||||
'\\', 'u',
|
||||
xdigits[(hi >> (4 * 3)) & 0xF],
|
||||
|
@ -54,6 +54,21 @@ static inline ListReaderState encode_init_lrstate(const list_T *const list)
|
||||
/// Array mapping values from SpecialVarValue enum to names
|
||||
extern const char *const encode_special_var_names[];
|
||||
|
||||
/// First codepoint in high surrogates block
|
||||
#define SURROGATE_HI_START 0xD800
|
||||
|
||||
/// Last codepoint in high surrogates block
|
||||
#define SURROGATE_HI_END 0xDBFF
|
||||
|
||||
/// First codepoint in low surrogates block
|
||||
#define SURROGATE_LO_START 0xDC00
|
||||
|
||||
/// Last codepoint in low surrogates block
|
||||
#define SURROGATE_LO_END 0xDFFF
|
||||
|
||||
/// First character that needs to be encoded as surrogate pair
|
||||
#define SURROGATE_FIRST_CHAR 0x10000
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "eval/encode.h.generated.h"
|
||||
#endif
|
||||
|
@ -297,6 +297,24 @@ describe('jsondecode() function', function()
|
||||
-- '"\xF9\x80\x80\x80\x80"',
|
||||
-- '"\xFC\x90\x80\x80\x80\x80"',
|
||||
end)
|
||||
|
||||
it('parses surrogate pairs properly', function()
|
||||
eq('\xF0\x90\x80\x80', funcs.jsondecode('"\\uD800\\uDC00"'))
|
||||
eq('\xED\xA0\x80a\xED\xB0\x80', funcs.jsondecode('"\\uD800a\\uDC00"'))
|
||||
eq('\xED\xA0\x80\t\xED\xB0\x80', funcs.jsondecode('"\\uD800\\t\\uDC00"'))
|
||||
|
||||
eq('\xED\xA0\x80', funcs.jsondecode('"\\uD800"'))
|
||||
eq('\xED\xA0\x80a', funcs.jsondecode('"\\uD800a"'))
|
||||
eq('\xED\xA0\x80\t', funcs.jsondecode('"\\uD800\\t"'))
|
||||
|
||||
eq('\xED\xB0\x80', funcs.jsondecode('"\\uDC00"'))
|
||||
eq('\xED\xB0\x80a', funcs.jsondecode('"\\uDC00a"'))
|
||||
eq('\xED\xB0\x80\t', funcs.jsondecode('"\\uDC00\\t"'))
|
||||
|
||||
eq('\xED\xB0\x80', funcs.jsondecode('"\\uDC00"'))
|
||||
eq('a\xED\xB0\x80', funcs.jsondecode('"a\\uDC00"'))
|
||||
eq('\t\xED\xB0\x80', funcs.jsondecode('"\\t\\uDC00"'))
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('jsonencode() function', function()
|
||||
|
Loading…
Reference in New Issue
Block a user