mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
eval/decode: Fix surrogate pairs processing
This commit is contained in:
parent
ea82270d30
commit
5814e29cdb
@ -340,12 +340,12 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
goto json_decode_string_fail;
|
goto json_decode_string_fail;
|
||||||
}
|
}
|
||||||
char *str = xmalloc(len + 1);
|
char *str = xmalloc(len + 1);
|
||||||
uint16_t fst_in_pair = 0;
|
int fst_in_pair = 0;
|
||||||
char *str_end = str;
|
char *str_end = str;
|
||||||
for (const char *t = s; t < p; t++) {
|
for (const char *t = s; t < p; t++) {
|
||||||
if (t[0] != '\\' || t[1] != 'u') {
|
if (t[0] != '\\' || t[1] != 'u') {
|
||||||
if (fst_in_pair != 0) {
|
if (fst_in_pair != 0) {
|
||||||
str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end);
|
str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end);
|
||||||
fst_in_pair = 0;
|
fst_in_pair = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -353,20 +353,21 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
t++;
|
t++;
|
||||||
switch (*t) {
|
switch (*t) {
|
||||||
case 'u': {
|
case 'u': {
|
||||||
char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
|
const char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
|
||||||
t += 4;
|
t += 4;
|
||||||
unsigned long ch;
|
unsigned long ch;
|
||||||
vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch);
|
vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch);
|
||||||
if (0xD800UL <= ch && ch <= 0xDB7FUL) {
|
if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
|
||||||
fst_in_pair = (uint16_t) ch;
|
fst_in_pair = (int) ch;
|
||||||
} else if (0xDC00ULL <= ch && ch <= 0xDB7FUL) {
|
} else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
|
||||||
if (fst_in_pair != 0) {
|
&& fst_in_pair != 0) {
|
||||||
int full_char = (
|
const int full_char = (
|
||||||
(int) (ch - 0xDC00UL)
|
(int) (ch - SURROGATE_LO_START)
|
||||||
+ (((int) (fst_in_pair - 0xD800)) << 10)
|
+ ((fst_in_pair - SURROGATE_HI_START) << 10)
|
||||||
);
|
+ SURROGATE_FIRST_CHAR
|
||||||
str_end += utf_char2bytes(full_char, (char_u *) str_end);
|
);
|
||||||
}
|
str_end += utf_char2bytes(full_char, (char_u *) str_end);
|
||||||
|
fst_in_pair = 0;
|
||||||
} else {
|
} else {
|
||||||
str_end += utf_char2bytes((int) ch, (char_u *) str_end);
|
str_end += utf_char2bytes((int) ch, (char_u *) str_end);
|
||||||
}
|
}
|
||||||
|
@ -970,7 +970,7 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
default: {
|
default: {
|
||||||
if (vim_isprintc(ch)) {
|
if (vim_isprintc(ch)) {
|
||||||
ga_concat_len(gap, buf + i, shift);
|
ga_concat_len(gap, buf + i, shift);
|
||||||
} else if (ch <= 0xFFFF) {
|
} else if (ch < SURROGATE_FIRST_CHAR) {
|
||||||
ga_concat_len(gap, ((const char []) {
|
ga_concat_len(gap, ((const char []) {
|
||||||
'\\', 'u',
|
'\\', 'u',
|
||||||
xdigits[(ch >> (4 * 3)) & 0xF],
|
xdigits[(ch >> (4 * 3)) & 0xF],
|
||||||
@ -979,9 +979,9 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
xdigits[(ch >> (4 * 0)) & 0xF],
|
xdigits[(ch >> (4 * 0)) & 0xF],
|
||||||
}), sizeof("\\u1234") - 1);
|
}), sizeof("\\u1234") - 1);
|
||||||
} else {
|
} else {
|
||||||
uint32_t tmp = (uint32_t) ch - 0x010000;
|
uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR;
|
||||||
uint16_t hi = 0xD800 + ((tmp >> 10) & 0x03FF);
|
uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1));
|
||||||
uint16_t lo = 0xDC00 + ((tmp >> 0) & 0x03FF);
|
uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1));
|
||||||
ga_concat_len(gap, ((const char []) {
|
ga_concat_len(gap, ((const char []) {
|
||||||
'\\', 'u',
|
'\\', 'u',
|
||||||
xdigits[(hi >> (4 * 3)) & 0xF],
|
xdigits[(hi >> (4 * 3)) & 0xF],
|
||||||
|
@ -54,6 +54,21 @@ static inline ListReaderState encode_init_lrstate(const list_T *const list)
|
|||||||
/// Array mapping values from SpecialVarValue enum to names
|
/// Array mapping values from SpecialVarValue enum to names
|
||||||
extern const char *const encode_special_var_names[];
|
extern const char *const encode_special_var_names[];
|
||||||
|
|
||||||
|
/// First codepoint in high surrogates block
|
||||||
|
#define SURROGATE_HI_START 0xD800
|
||||||
|
|
||||||
|
/// Last codepoint in high surrogates block
|
||||||
|
#define SURROGATE_HI_END 0xDBFF
|
||||||
|
|
||||||
|
/// First codepoint in low surrogates block
|
||||||
|
#define SURROGATE_LO_START 0xDC00
|
||||||
|
|
||||||
|
/// Last codepoint in low surrogates block
|
||||||
|
#define SURROGATE_LO_END 0xDFFF
|
||||||
|
|
||||||
|
/// First character that needs to be encoded as surrogate pair
|
||||||
|
#define SURROGATE_FIRST_CHAR 0x10000
|
||||||
|
|
||||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||||
# include "eval/encode.h.generated.h"
|
# include "eval/encode.h.generated.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -297,6 +297,24 @@ describe('jsondecode() function', function()
|
|||||||
-- '"\xF9\x80\x80\x80\x80"',
|
-- '"\xF9\x80\x80\x80\x80"',
|
||||||
-- '"\xFC\x90\x80\x80\x80\x80"',
|
-- '"\xFC\x90\x80\x80\x80\x80"',
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('parses surrogate pairs properly', function()
|
||||||
|
eq('\xF0\x90\x80\x80', funcs.jsondecode('"\\uD800\\uDC00"'))
|
||||||
|
eq('\xED\xA0\x80a\xED\xB0\x80', funcs.jsondecode('"\\uD800a\\uDC00"'))
|
||||||
|
eq('\xED\xA0\x80\t\xED\xB0\x80', funcs.jsondecode('"\\uD800\\t\\uDC00"'))
|
||||||
|
|
||||||
|
eq('\xED\xA0\x80', funcs.jsondecode('"\\uD800"'))
|
||||||
|
eq('\xED\xA0\x80a', funcs.jsondecode('"\\uD800a"'))
|
||||||
|
eq('\xED\xA0\x80\t', funcs.jsondecode('"\\uD800\\t"'))
|
||||||
|
|
||||||
|
eq('\xED\xB0\x80', funcs.jsondecode('"\\uDC00"'))
|
||||||
|
eq('\xED\xB0\x80a', funcs.jsondecode('"\\uDC00a"'))
|
||||||
|
eq('\xED\xB0\x80\t', funcs.jsondecode('"\\uDC00\\t"'))
|
||||||
|
|
||||||
|
eq('\xED\xB0\x80', funcs.jsondecode('"\\uDC00"'))
|
||||||
|
eq('a\xED\xB0\x80', funcs.jsondecode('"a\\uDC00"'))
|
||||||
|
eq('\t\xED\xB0\x80', funcs.jsondecode('"\\t\\uDC00"'))
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
describe('jsonencode() function', function()
|
describe('jsonencode() function', function()
|
||||||
|
Loading…
Reference in New Issue
Block a user