mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
Merge pull request #25934 from bfredl/screenlinechar
refactor(grid): make screen rendering more multibyte than ever before
This commit is contained in:
commit
7af89ef464
@ -646,7 +646,8 @@ widespread as file format.
|
||||
A composing or combining character is used to change the meaning of the
|
||||
character before it. The combining characters are drawn on top of the
|
||||
preceding character.
|
||||
Up to six combining characters can be displayed.
|
||||
Too big combined characters cannot be displayed, but they can still be
|
||||
inspected using the |g8| and |ga| commands described below.
|
||||
When editing text a composing character is mostly considered part of the
|
||||
preceding character. For example "x" will delete a character and its
|
||||
following composing characters by default.
|
||||
|
@ -294,6 +294,13 @@ The following changes to existing APIs or features add new behavior.
|
||||
Note that syntax highlighting of code examples requires a matching parser
|
||||
and may be affected by custom queries.
|
||||
|
||||
• Support for rendering multibyte characters using composing characters has been
|
||||
enhanced. The maximum limit have been increased from 1+6 codepoints to
|
||||
31 bytes, which is guaranteed to fit all chars from before but often more.
|
||||
|
||||
NOTE: the regexp engine still has a hard-coded limit of considering
|
||||
6 composing chars only.
|
||||
|
||||
==============================================================================
|
||||
REMOVED FEATURES *news-removed*
|
||||
|
||||
|
@ -722,9 +722,16 @@ Options:
|
||||
<
|
||||
*'macatsui'*
|
||||
*'maxcombine'* *'mco'*
|
||||
Nvim always displays up to 6 combining characters. You can still edit
|
||||
text with more than 6 combining characters, you just can't see them.
|
||||
Use |g8| or |ga|. See |mbyte-combining|.
|
||||
Nvim counts maximum character sizes in bytes, not codepoints. This is
|
||||
guaranteed to be big enough to always fit all chars properly displayed
|
||||
in vim with 'maxcombine' set to 6.
|
||||
|
||||
You can still edit text with larger characters than fits in the screen buffer,
|
||||
you just can't see them. Use |g8| or |ga|. See |mbyte-combining|.
|
||||
|
||||
NOTE: the rexexp engine still has a hard-coded limit of considering
|
||||
6 composing chars only.
|
||||
|
||||
*'maxmem'* Nvim delegates memory-management to the OS.
|
||||
*'maxmemtot'* Nvim delegates memory-management to the OS.
|
||||
printoptions
|
||||
|
2
runtime/lua/vim/_meta/options.lua
generated
2
runtime/lua/vim/_meta/options.lua
generated
@ -2576,7 +2576,7 @@ vim.go.fp = vim.go.formatprg
|
||||
--- security reasons.
|
||||
---
|
||||
--- @type boolean
|
||||
vim.o.fsync = false
|
||||
vim.o.fsync = true
|
||||
vim.o.fs = vim.o.fsync
|
||||
vim.go.fsync = vim.o.fsync
|
||||
vim.go.fs = vim.go.fsync
|
||||
|
@ -665,7 +665,7 @@ void ins_bytes_len(char *p, size_t len)
|
||||
/// convert bytes to a character.
|
||||
void ins_char(int c)
|
||||
{
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
size_t n = (size_t)utf_char2bytes(c, buf);
|
||||
|
||||
// When "c" is 0x100, 0x200, etc. we don't want to insert a NUL byte.
|
||||
@ -869,12 +869,9 @@ int del_bytes(colnr_T count, bool fixpos_arg, bool use_delcombine)
|
||||
|
||||
// If 'delcombine' is set and deleting (less than) one character, only
|
||||
// delete the last combining character.
|
||||
if (p_deco && use_delcombine
|
||||
&& utfc_ptr2len(oldp + col) >= count) {
|
||||
int cc[MAX_MCO];
|
||||
|
||||
(void)utfc_ptr2char(oldp + col, cc);
|
||||
if (cc[0] != NUL) {
|
||||
if (p_deco && use_delcombine && utfc_ptr2len(oldp + col) >= count) {
|
||||
char *p0 = oldp + col;
|
||||
if (utf_composinglike(p0, p0 + utf_ptr2len(p0))) {
|
||||
// Find the last composing char, there can be several.
|
||||
int n = col;
|
||||
do {
|
||||
|
@ -302,15 +302,13 @@ size_t transstr_len(const char *const s, bool untab)
|
||||
while (*p) {
|
||||
const size_t l = (size_t)utfc_ptr2len(p);
|
||||
if (l > 1) {
|
||||
int pcc[MAX_MCO + 1];
|
||||
pcc[0] = utfc_ptr2char(p, &pcc[1]);
|
||||
|
||||
if (vim_isprintc(pcc[0])) {
|
||||
if (vim_isprintc(utf_ptr2char(p))) {
|
||||
len += l;
|
||||
} else {
|
||||
for (size_t i = 0; i < ARRAY_SIZE(pcc) && pcc[i]; i++) {
|
||||
for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) {
|
||||
int c = utf_ptr2char(p + off);
|
||||
char hexbuf[9];
|
||||
len += transchar_hex(hexbuf, pcc[i]);
|
||||
len += transchar_hex(hexbuf, c);
|
||||
}
|
||||
}
|
||||
p += l;
|
||||
@ -349,16 +347,15 @@ size_t transstr_buf(const char *const s, const ssize_t slen, char *const buf, co
|
||||
if (buf_p + l > buf_e) {
|
||||
break; // Exceeded `buf` size.
|
||||
}
|
||||
int pcc[MAX_MCO + 1];
|
||||
pcc[0] = utfc_ptr2char(p, &pcc[1]);
|
||||
|
||||
if (vim_isprintc(pcc[0])) {
|
||||
if (vim_isprintc(utf_ptr2char(p))) {
|
||||
memmove(buf_p, p, l);
|
||||
buf_p += l;
|
||||
} else {
|
||||
for (size_t i = 0; i < ARRAY_SIZE(pcc) && pcc[i]; i++) {
|
||||
for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) {
|
||||
int c = utf_ptr2char(p + off);
|
||||
char hexbuf[9]; // <up to 6 bytes>NUL
|
||||
const size_t hexlen = transchar_hex(hexbuf, pcc[i]);
|
||||
const size_t hexlen = transchar_hex(hexbuf, c);
|
||||
if (buf_p + hexlen > buf_e) {
|
||||
break;
|
||||
}
|
||||
|
@ -1654,7 +1654,7 @@ static void registerdigraph(int char1, int char2, int n)
|
||||
bool check_digraph_chars_valid(int char1, int char2)
|
||||
{
|
||||
if (char2 == 0) {
|
||||
char msg[MB_MAXBYTES + 1];
|
||||
char msg[MB_MAXCHAR + 1];
|
||||
msg[utf_char2bytes(char1, msg)] = NUL;
|
||||
semsg(_(e_digraph_must_be_just_two_characters_str), msg);
|
||||
return false;
|
||||
|
@ -228,14 +228,12 @@ static int line_putchar(buf_T *buf, const char **pp, schar_T *dest, int maxcells
|
||||
const char *p = *pp;
|
||||
int cells = utf_ptr2cells(p);
|
||||
int c_len = utfc_ptr2len(p);
|
||||
int u8c, u8cc[MAX_MCO];
|
||||
assert(maxcells > 0);
|
||||
if (cells > maxcells) {
|
||||
dest[0] = schar_from_ascii(' ');
|
||||
return 1;
|
||||
}
|
||||
|
||||
u8c = utfc_ptr2char(p, u8cc);
|
||||
if (*p == TAB) {
|
||||
cells = MIN(tabstop_padding(vcol, buf->b_p_ts, buf->b_p_vts_array), maxcells);
|
||||
}
|
||||
@ -247,16 +245,14 @@ static int line_putchar(buf_T *buf, const char **pp, schar_T *dest, int maxcells
|
||||
for (int c = 0; c < cells; c++) {
|
||||
dest[c] = schar_from_ascii(' ');
|
||||
}
|
||||
goto done;
|
||||
} else if ((uint8_t)(*p) < 0x80 && u8cc[0] == 0) {
|
||||
dest[0] = schar_from_ascii(*p);
|
||||
} else {
|
||||
dest[0] = schar_from_cc(u8c, u8cc);
|
||||
}
|
||||
int u8c;
|
||||
dest[0] = utfc_ptr2schar(p, &u8c);
|
||||
if (cells > 1) {
|
||||
dest[1] = 0;
|
||||
}
|
||||
done:
|
||||
}
|
||||
|
||||
*pp += c_len;
|
||||
return cells;
|
||||
}
|
||||
@ -946,16 +942,6 @@ static void handle_inline_virtual_text(win_T *wp, winlinevars_T *wlv, ptrdiff_t
|
||||
}
|
||||
}
|
||||
|
||||
static bool check_mb_utf8(int *c, int *u8cc)
|
||||
{
|
||||
if (utf_char2len(*c) > 1) {
|
||||
*u8cc = 0;
|
||||
*c = 0xc0;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static colnr_T get_trailcol(win_T *wp, const char *ptr, const char *line)
|
||||
{
|
||||
colnr_T trailcol = MAXCOL;
|
||||
@ -1051,7 +1037,6 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
{
|
||||
winlinevars_T wlv; // variables passed between functions
|
||||
|
||||
int c = 0; // init for GCC
|
||||
colnr_T vcol_prev = -1; // "wlv.vcol" of previous character
|
||||
char *line; // current line
|
||||
char *ptr; // current position in "line"
|
||||
@ -1096,8 +1081,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
int multi_attr = 0; // attributes desired by multibyte
|
||||
int mb_l = 1; // multi-byte byte length
|
||||
int mb_c = 0; // decoded multi-byte character
|
||||
bool mb_utf8 = false; // screen char is UTF-8 char
|
||||
int u8cc[MAX_MCO]; // composing UTF-8 chars
|
||||
schar_T mb_schar; // complete screen char
|
||||
int change_start = MAXCOL; // first col of changed area
|
||||
int change_end = -1; // last col of changed area
|
||||
bool in_multispace = false; // in multiple consecutive spaces
|
||||
@ -1951,34 +1935,25 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
// For the '$' of the 'list' option, n_extra == 1, p_extra == "".
|
||||
if (wlv.n_extra > 0) {
|
||||
if (wlv.c_extra != NUL || (wlv.n_extra == 1 && wlv.c_final != NUL)) {
|
||||
c = (wlv.n_extra == 1 && wlv.c_final != NUL) ? wlv.c_final : wlv.c_extra;
|
||||
mb_c = c; // doesn't handle non-utf-8 multi-byte!
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_c = (wlv.n_extra == 1 && wlv.c_final != NUL) ? wlv.c_final : wlv.c_extra;
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
wlv.n_extra--;
|
||||
} else {
|
||||
assert(wlv.p_extra != NULL);
|
||||
c = (uint8_t)(*wlv.p_extra);
|
||||
mb_c = c;
|
||||
// If the UTF-8 character is more than one byte:
|
||||
// Decode it into "mb_c".
|
||||
mb_l = utfc_ptr2len(wlv.p_extra);
|
||||
mb_utf8 = false;
|
||||
if (mb_l > wlv.n_extra) {
|
||||
mb_l = 1;
|
||||
} else if (mb_l > 1) {
|
||||
mb_c = utfc_ptr2char(wlv.p_extra, u8cc);
|
||||
mb_utf8 = true;
|
||||
c = 0xc0;
|
||||
}
|
||||
if (mb_l == 0) { // at the NUL at end-of-line
|
||||
mb_schar = utfc_ptr2schar(wlv.p_extra, &mb_c);
|
||||
// mb_l=0 at the end-of-line NUL
|
||||
if (mb_l > wlv.n_extra || mb_l == 0) {
|
||||
mb_l = 1;
|
||||
}
|
||||
|
||||
// If a double-width char doesn't fit display a '>' in the last column.
|
||||
// Don't advance the pointer but put the character at the start of the next line.
|
||||
if (wlv.col >= grid->cols - 1 && utf_char2cells(mb_c) == 2) {
|
||||
c = '>';
|
||||
mb_c = c;
|
||||
mb_c = '>';
|
||||
mb_l = 1;
|
||||
(void)mb_l;
|
||||
mb_schar = schar_from_ascii(mb_c);
|
||||
multi_attr = win_hl_attr(wp, HLF_AT);
|
||||
|
||||
if (wlv.cul_attr) {
|
||||
@ -1986,18 +1961,11 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
? hl_combine_attr(wlv.cul_attr, multi_attr)
|
||||
: hl_combine_attr(multi_attr, wlv.cul_attr);
|
||||
}
|
||||
|
||||
// put the pointer back to output the double-width
|
||||
// character at the start of the next line.
|
||||
wlv.n_extra++;
|
||||
wlv.p_extra--;
|
||||
} else {
|
||||
wlv.n_extra -= mb_l - 1;
|
||||
wlv.p_extra += mb_l - 1;
|
||||
wlv.n_extra -= mb_l;
|
||||
wlv.p_extra += mb_l;
|
||||
}
|
||||
wlv.p_extra++;
|
||||
}
|
||||
wlv.n_extra--;
|
||||
|
||||
// Only restore search_attr and area_attr after "n_extra" in
|
||||
// the next screen line is also done.
|
||||
@ -2026,58 +1994,40 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
}
|
||||
} else if (has_fold) {
|
||||
// skip writing the buffer line itself
|
||||
c = NUL;
|
||||
mb_c = NUL;
|
||||
} else {
|
||||
int c0;
|
||||
char *prev_ptr = ptr;
|
||||
|
||||
// Get a character from the line itself.
|
||||
c0 = c = (uint8_t)(*ptr);
|
||||
mb_c = c;
|
||||
|
||||
if (c == NUL) {
|
||||
// first byte of next char
|
||||
int c0 = (uint8_t)(*ptr);
|
||||
if (c0 == NUL) {
|
||||
// no more cells to skip
|
||||
wlv.skip_cells = 0;
|
||||
}
|
||||
|
||||
// If the UTF-8 character is more than one byte: Decode it
|
||||
// into "mb_c".
|
||||
// Get a character from the line itself.
|
||||
mb_l = utfc_ptr2len(ptr);
|
||||
mb_utf8 = false;
|
||||
if (mb_l > 1) {
|
||||
mb_c = utfc_ptr2char(ptr, u8cc);
|
||||
mb_schar = utfc_ptr2schar(ptr, &mb_c);
|
||||
|
||||
// Overlong encoded ASCII or ASCII with composing char
|
||||
// is displayed normally, except a NUL.
|
||||
if (mb_c < 0x80) {
|
||||
c0 = c = mb_c;
|
||||
}
|
||||
mb_utf8 = true;
|
||||
|
||||
// At start of the line we can have a composing char.
|
||||
// Draw it as a space with a composing char.
|
||||
if (utf_iscomposing(mb_c)) {
|
||||
for (int i = MAX_MCO - 1; i > 0; i--) {
|
||||
u8cc[i] = u8cc[i - 1];
|
||||
}
|
||||
u8cc[0] = mb_c;
|
||||
mb_c = ' ';
|
||||
}
|
||||
if (mb_l > 1 && mb_c < 0x80) {
|
||||
c0 = mb_c;
|
||||
}
|
||||
|
||||
if ((mb_l == 1 && c >= 0x80)
|
||||
if ((mb_l == 1 && c0 >= 0x80)
|
||||
|| (mb_l >= 1 && mb_c == 0)
|
||||
|| (mb_l > 1 && (!vim_isprintc(mb_c)))) {
|
||||
// Illegal UTF-8 byte: display as <xx>.
|
||||
// Non-BMP character : display as ? or fullwidth ?.
|
||||
// Non-printable character : display as ? or fullwidth ?.
|
||||
transchar_hex(wlv.extra, mb_c);
|
||||
if (wp->w_p_rl) { // reverse
|
||||
rl_mirror_ascii(wlv.extra, NULL);
|
||||
}
|
||||
|
||||
wlv.p_extra = wlv.extra;
|
||||
c = (uint8_t)(*wlv.p_extra);
|
||||
mb_c = mb_ptr2char_adv((const char **)&wlv.p_extra);
|
||||
mb_utf8 = (c >= 0x80);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
wlv.n_extra = (int)strlen(wlv.p_extra);
|
||||
wlv.c_extra = NUL;
|
||||
wlv.c_final = NUL;
|
||||
@ -2093,10 +2043,9 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
// last column; the character is displayed at the start of the
|
||||
// next line.
|
||||
if (wlv.col >= grid->cols - 1 && utf_char2cells(mb_c) == 2) {
|
||||
c = '>';
|
||||
mb_c = c;
|
||||
mb_utf8 = false;
|
||||
mb_c = '>';
|
||||
mb_l = 1;
|
||||
mb_schar = schar_from_ascii(mb_c);
|
||||
multi_attr = win_hl_attr(wp, HLF_AT);
|
||||
// Put pointer back so that the character will be
|
||||
// displayed at the start of the next line.
|
||||
@ -2112,15 +2061,14 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
wlv.n_extra = 1;
|
||||
wlv.c_extra = MB_FILLER_CHAR;
|
||||
wlv.c_final = NUL;
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
mb_l = 1;
|
||||
mb_schar = schar_from_ascii(mb_c);
|
||||
if (area_attr == 0 && search_attr == 0) {
|
||||
wlv.n_attr = wlv.n_extra + 1;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_AT);
|
||||
saved_attr2 = wlv.char_attr; // save current attr
|
||||
}
|
||||
mb_c = c;
|
||||
mb_utf8 = false;
|
||||
mb_l = 1;
|
||||
}
|
||||
ptr++;
|
||||
|
||||
@ -2159,11 +2107,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
|
||||
// no concealing past the end of the line, it interferes
|
||||
// with line highlighting.
|
||||
if (c == NUL) {
|
||||
syntax_flags = 0;
|
||||
} else {
|
||||
syntax_flags = get_syntax_info(&syntax_seqnr);
|
||||
}
|
||||
syntax_flags = (mb_c == 0) ? 0 : get_syntax_info(&syntax_seqnr);
|
||||
}
|
||||
|
||||
if (has_decor && v > 0) {
|
||||
@ -2198,7 +2142,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
spell_attr = 0;
|
||||
// do not calculate cap_col at the end of the line or when
|
||||
// only white space is following
|
||||
if (c != 0 && (*skipwhite(prev_ptr) != NUL) && can_spell) {
|
||||
if (mb_c != 0 && (*skipwhite(prev_ptr) != NUL) && can_spell) {
|
||||
char *p;
|
||||
hlf_T spell_hlf = HLF_COUNT;
|
||||
v -= mb_l - 1;
|
||||
@ -2272,13 +2216,13 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
//
|
||||
// So only allow to linebreak, once we have found chars not in
|
||||
// 'breakat' in the line.
|
||||
if (wp->w_p_lbr && !wlv.need_lbr && c != NUL
|
||||
if (wp->w_p_lbr && !wlv.need_lbr && mb_c != NUL
|
||||
&& !vim_isbreak((uint8_t)(*ptr))) {
|
||||
wlv.need_lbr = true;
|
||||
}
|
||||
// Found last space before word: check for line break.
|
||||
if (wp->w_p_lbr && c0 == c && wlv.need_lbr
|
||||
&& vim_isbreak(c) && !vim_isbreak((uint8_t)(*ptr))) {
|
||||
if (wp->w_p_lbr && c0 == mb_c && mb_c < 128 && wlv.need_lbr
|
||||
&& vim_isbreak(mb_c) && !vim_isbreak((uint8_t)(*ptr))) {
|
||||
int mb_off = utf_head_off(line, ptr - 1);
|
||||
char *p = ptr - (mb_off + 1);
|
||||
chartabsize_T cts;
|
||||
@ -2289,33 +2233,33 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
wlv.n_extra = win_lbr_chartabsize(&cts, NULL) - 1;
|
||||
clear_chartabsize_arg(&cts);
|
||||
|
||||
if (on_last_col && c != TAB) {
|
||||
if (on_last_col && mb_c != TAB) {
|
||||
// Do not continue search/match highlighting over the
|
||||
// line break, but for TABs the highlighting should
|
||||
// include the complete width of the character
|
||||
search_attr = 0;
|
||||
}
|
||||
|
||||
if (c == TAB && wlv.n_extra + wlv.col > grid->cols) {
|
||||
if (mb_c == TAB && wlv.n_extra + wlv.col > grid->cols) {
|
||||
wlv.n_extra = tabstop_padding(wlv.vcol, wp->w_buffer->b_p_ts,
|
||||
wp->w_buffer->b_p_vts_array) - 1;
|
||||
}
|
||||
wlv.c_extra = mb_off > 0 ? MB_FILLER_CHAR : ' ';
|
||||
wlv.c_final = NUL;
|
||||
if (ascii_iswhite(c)) {
|
||||
if (c == TAB) {
|
||||
if (mb_c < 128 && ascii_iswhite(mb_c)) {
|
||||
if (mb_c == TAB) {
|
||||
// See "Tab alignment" below.
|
||||
FIX_FOR_BOGUSCOLS;
|
||||
}
|
||||
if (!wp->w_p_list) {
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
mb_schar = schar_from_ascii(mb_c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (wp->w_p_list) {
|
||||
in_multispace = c == ' ' && (*ptr == ' '
|
||||
|| (prev_ptr > line && prev_ptr[-1] == ' '));
|
||||
in_multispace = mb_c == ' ' && (*ptr == ' ' || (prev_ptr > line && prev_ptr[-1] == ' '));
|
||||
if (!in_multispace) {
|
||||
multispace_pos = 0;
|
||||
}
|
||||
@ -2325,61 +2269,56 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
// But not when the character is followed by a composing
|
||||
// character (use mb_l to check that).
|
||||
if (wp->w_p_list
|
||||
&& ((((c == 160 && mb_l == 1)
|
||||
|| (mb_utf8
|
||||
&& ((mb_c == 160 && mb_l == 2)
|
||||
|| (mb_c == 0x202f && mb_l == 3))))
|
||||
&& ((((mb_c == 160 && mb_l == 2) || (mb_c == 0x202f && mb_l == 3))
|
||||
&& wp->w_p_lcs_chars.nbsp)
|
||||
|| (c == ' '
|
||||
|| (mb_c == ' '
|
||||
&& mb_l == 1
|
||||
&& (wp->w_p_lcs_chars.space
|
||||
|| (in_multispace && wp->w_p_lcs_chars.multispace != NULL))
|
||||
&& ptr - line >= leadcol
|
||||
&& ptr - line <= trailcol))) {
|
||||
if (in_multispace && wp->w_p_lcs_chars.multispace != NULL) {
|
||||
c = wp->w_p_lcs_chars.multispace[multispace_pos++];
|
||||
mb_c = wp->w_p_lcs_chars.multispace[multispace_pos++];
|
||||
if (wp->w_p_lcs_chars.multispace[multispace_pos] == NUL) {
|
||||
multispace_pos = 0;
|
||||
}
|
||||
} else {
|
||||
c = (c == ' ') ? wp->w_p_lcs_chars.space : wp->w_p_lcs_chars.nbsp;
|
||||
mb_c = (mb_c == ' ') ? wp->w_p_lcs_chars.space : wp->w_p_lcs_chars.nbsp;
|
||||
}
|
||||
wlv.n_attr = 1;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_0);
|
||||
saved_attr2 = wlv.char_attr; // save current attr
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
}
|
||||
|
||||
if (c == ' ' && ((trailcol != MAXCOL && ptr > line + trailcol)
|
||||
if (mb_c == ' ' && mb_l == 1 && ((trailcol != MAXCOL && ptr > line + trailcol)
|
||||
|| (leadcol != 0 && ptr < line + leadcol))) {
|
||||
if (leadcol != 0 && in_multispace && ptr < line + leadcol
|
||||
&& wp->w_p_lcs_chars.leadmultispace != NULL) {
|
||||
c = wp->w_p_lcs_chars.leadmultispace[multispace_pos++];
|
||||
mb_c = wp->w_p_lcs_chars.leadmultispace[multispace_pos++];
|
||||
if (wp->w_p_lcs_chars.leadmultispace[multispace_pos] == NUL) {
|
||||
multispace_pos = 0;
|
||||
}
|
||||
} else if (ptr > line + trailcol && wp->w_p_lcs_chars.trail) {
|
||||
c = wp->w_p_lcs_chars.trail;
|
||||
mb_c = wp->w_p_lcs_chars.trail;
|
||||
} else if (ptr < line + leadcol && wp->w_p_lcs_chars.lead) {
|
||||
c = wp->w_p_lcs_chars.lead;
|
||||
mb_c = wp->w_p_lcs_chars.lead;
|
||||
} else if (leadcol != 0 && wp->w_p_lcs_chars.space) {
|
||||
c = wp->w_p_lcs_chars.space;
|
||||
mb_c = wp->w_p_lcs_chars.space;
|
||||
}
|
||||
|
||||
wlv.n_attr = 1;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_0);
|
||||
saved_attr2 = wlv.char_attr; // save current attr
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
}
|
||||
}
|
||||
|
||||
// Handling of non-printable characters.
|
||||
if (!vim_isprintc(c)) {
|
||||
if (!vim_isprintc(mb_c)) {
|
||||
// when getting a character from the file, we may have to
|
||||
// turn it into something else on the way to putting it on the screen.
|
||||
if (c == TAB && (!wp->w_p_list || wp->w_p_lcs_chars.tab1)) {
|
||||
if (mb_c == TAB && (!wp->w_p_list || wp->w_p_lcs_chars.tab1)) {
|
||||
int tab_len = 0;
|
||||
colnr_T vcol_adjusted = wlv.vcol; // removed showbreak length
|
||||
char *const sbr = get_showbreak_value(wp);
|
||||
@ -2422,7 +2361,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
if (wlv.n_extra > 0) {
|
||||
len += wlv.n_extra - tab_len;
|
||||
}
|
||||
c = wp->w_p_lcs_chars.tab1;
|
||||
mb_c = wp->w_p_lcs_chars.tab1;
|
||||
p = get_extra_buf((size_t)len + 1);
|
||||
memset(p, ' ', (size_t)len);
|
||||
p[len] = NUL;
|
||||
@ -2470,11 +2409,9 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
}
|
||||
}
|
||||
|
||||
mb_utf8 = false; // don't draw as UTF-8
|
||||
if (wp->w_p_list) {
|
||||
c = (wlv.n_extra == 0 && wp->w_p_lcs_chars.tab3)
|
||||
? wp->w_p_lcs_chars.tab3
|
||||
: wp->w_p_lcs_chars.tab1;
|
||||
mb_c = (wlv.n_extra == 0 && wp->w_p_lcs_chars.tab3)
|
||||
? wp->w_p_lcs_chars.tab3 : wp->w_p_lcs_chars.tab1;
|
||||
if (wp->w_p_lbr && wlv.p_extra != NULL && *wlv.p_extra != NUL) {
|
||||
wlv.c_extra = NUL; // using p_extra from above
|
||||
} else {
|
||||
@ -2484,14 +2421,13 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
wlv.n_attr = tab_len + 1;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_0);
|
||||
saved_attr2 = wlv.char_attr; // save current attr
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
} else {
|
||||
wlv.c_final = NUL;
|
||||
wlv.c_extra = ' ';
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
}
|
||||
} else if (c == NUL
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
} else if (mb_c == NUL
|
||||
&& (wp->w_p_list
|
||||
|| ((wlv.fromcol >= 0 || fromcol_prev >= 0)
|
||||
&& wlv.tocol > wlv.vcol
|
||||
@ -2515,20 +2451,19 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
wlv.n_extra = 0;
|
||||
}
|
||||
if (wp->w_p_list && wp->w_p_lcs_chars.eol > 0) {
|
||||
c = wp->w_p_lcs_chars.eol;
|
||||
mb_c = wp->w_p_lcs_chars.eol;
|
||||
} else {
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
}
|
||||
lcs_eol_one = -1;
|
||||
ptr--; // put it back at the NUL
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_AT);
|
||||
wlv.n_attr = 1;
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
} else if (c != NUL) {
|
||||
wlv.p_extra = transchar_buf(wp->w_buffer, c);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
} else if (mb_c != NUL) {
|
||||
wlv.p_extra = transchar_buf(wp->w_buffer, mb_c);
|
||||
if (wlv.n_extra == 0) {
|
||||
wlv.n_extra = byte2cells(c) - 1;
|
||||
wlv.n_extra = byte2cells(mb_c) - 1;
|
||||
}
|
||||
if ((dy_flags & DY_UHEX) && wp->w_p_rl) {
|
||||
rl_mirror_ascii(wlv.p_extra, NULL); // reverse "<12>"
|
||||
@ -2538,7 +2473,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
if (wp->w_p_lbr) {
|
||||
char *p;
|
||||
|
||||
c = (uint8_t)(*wlv.p_extra);
|
||||
mb_c = (uint8_t)(*wlv.p_extra);
|
||||
p = get_extra_buf((size_t)wlv.n_extra + 1);
|
||||
memset(p, ' ', (size_t)wlv.n_extra);
|
||||
strncpy(p, // NOLINT(runtime/printf)
|
||||
@ -2547,20 +2482,21 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
p[wlv.n_extra] = NUL;
|
||||
wlv.p_extra = p;
|
||||
} else {
|
||||
wlv.n_extra = byte2cells(c) - 1;
|
||||
c = (uint8_t)(*wlv.p_extra++);
|
||||
wlv.n_extra = byte2cells(mb_c) - 1;
|
||||
mb_c = (uint8_t)(*wlv.p_extra++);
|
||||
}
|
||||
wlv.n_attr = wlv.n_extra + 1;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_8);
|
||||
saved_attr2 = wlv.char_attr; // save current attr
|
||||
mb_utf8 = false; // don't draw as UTF-8
|
||||
mb_schar = schar_from_ascii(mb_c);
|
||||
} else if (VIsual_active
|
||||
&& (VIsual_mode == Ctrl_V || VIsual_mode == 'v')
|
||||
&& virtual_active()
|
||||
&& wlv.tocol != MAXCOL
|
||||
&& wlv.vcol < wlv.tocol
|
||||
&& wlv.col < grid->cols) {
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
ptr--; // put it back at the NUL
|
||||
}
|
||||
}
|
||||
@ -2580,18 +2516,18 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
// First time at this concealed item: display one
|
||||
// character.
|
||||
if (has_match_conc && match_conc) {
|
||||
c = match_conc;
|
||||
mb_c = match_conc;
|
||||
} else if (decor_conceal && decor_state.conceal_char) {
|
||||
c = decor_state.conceal_char;
|
||||
mb_c = decor_state.conceal_char;
|
||||
if (decor_state.conceal_attr) {
|
||||
wlv.char_attr = decor_state.conceal_attr;
|
||||
}
|
||||
} else if (syn_get_sub_char() != NUL) {
|
||||
c = syn_get_sub_char();
|
||||
mb_c = syn_get_sub_char();
|
||||
} else if (wp->w_p_lcs_chars.conceal != NUL) {
|
||||
c = wp->w_p_lcs_chars.conceal;
|
||||
mb_c = wp->w_p_lcs_chars.conceal;
|
||||
} else {
|
||||
c = ' ';
|
||||
mb_c = ' ';
|
||||
}
|
||||
|
||||
prev_syntax_id = syntax_seqnr;
|
||||
@ -2610,8 +2546,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
is_concealing = true;
|
||||
wlv.skip_cells = 1;
|
||||
}
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
} else {
|
||||
prev_syntax_id = 0;
|
||||
is_concealing = false;
|
||||
@ -2654,8 +2589,8 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
&& (wp->w_p_wrap ? (wp->w_skipcol > 0 && wlv.row == 0) : wp->w_leftcol > 0)
|
||||
&& wlv.filler_todo <= 0
|
||||
&& wlv.draw_state > WL_STC
|
||||
&& c != NUL) {
|
||||
c = wp->w_p_lcs_chars.prec;
|
||||
&& mb_c != NUL) {
|
||||
mb_c = wp->w_p_lcs_chars.prec;
|
||||
lcs_prec_todo = NUL;
|
||||
if (utf_char2cells(mb_c) > 1) {
|
||||
// Double-width character being overwritten by the "precedes"
|
||||
@ -2666,15 +2601,14 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
wlv.n_attr = 2;
|
||||
wlv.extra_attr = win_hl_attr(wp, HLF_AT);
|
||||
}
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
saved_attr3 = wlv.char_attr; // save current attr
|
||||
wlv.char_attr = win_hl_attr(wp, HLF_AT); // overwriting char_attr
|
||||
n_attr3 = 1;
|
||||
}
|
||||
|
||||
// At end of the text line or just after the last character.
|
||||
if (c == NUL && eol_hl_off == 0) {
|
||||
if (mb_c == NUL && eol_hl_off == 0) {
|
||||
// flag to indicate whether prevcol equals startcol of search_hl or
|
||||
// one of the matches
|
||||
bool prevcol_hl_flag = get_prevcol_hl_flag(wp, &screen_search_hl,
|
||||
@ -2728,7 +2662,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
}
|
||||
|
||||
// At end of the text line.
|
||||
if (c == NUL) {
|
||||
if (mb_c == NUL) {
|
||||
// Highlight 'cursorcolumn' & 'colorcolumn' past end of the line.
|
||||
if (wp->w_p_wrap) {
|
||||
v = wlv.startrow == 0 ? wp->w_skipcol : 0;
|
||||
@ -2874,10 +2808,9 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
|| lcs_eol_one > 0
|
||||
|| (wlv.n_extra > 0 && (wlv.c_extra != NUL || *wlv.p_extra != NUL))
|
||||
|| has_more_inline_virt(&wlv, v)) {
|
||||
c = wp->w_p_lcs_chars.ext;
|
||||
mb_c = wp->w_p_lcs_chars.ext;
|
||||
wlv.char_attr = win_hl_attr(wp, HLF_AT);
|
||||
mb_c = c;
|
||||
mb_utf8 = check_mb_utf8(&c, u8cc);
|
||||
mb_schar = schar_from_char(mb_c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2923,11 +2856,7 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool number_onl
|
||||
// Skip characters that are left of the screen for 'nowrap'.
|
||||
if (wlv.draw_state < WL_LINE || wlv.skip_cells <= 0) {
|
||||
// Store the character.
|
||||
if (mb_utf8) {
|
||||
linebuf_char[wlv.off] = schar_from_cc(mb_c, u8cc);
|
||||
} else {
|
||||
linebuf_char[wlv.off] = schar_from_ascii((char)c);
|
||||
}
|
||||
linebuf_char[wlv.off] = mb_schar;
|
||||
if (multi_attr) {
|
||||
linebuf_attr[wlv.off] = multi_attr;
|
||||
multi_attr = 0;
|
||||
|
@ -1462,7 +1462,7 @@ void edit_putchar(int c, bool highlight)
|
||||
pc_status = PC_STATUS_SET;
|
||||
}
|
||||
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
grid_line_puts(pc_col, buf, utf_char2bytes(c, buf), attr);
|
||||
grid_line_flush();
|
||||
}
|
||||
@ -2176,7 +2176,7 @@ void insertchar(int c, int flags, int second_indent)
|
||||
int cc;
|
||||
|
||||
if ((cc = utf_char2len(c)) > 1) {
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
|
||||
utf_char2bytes(c, buf);
|
||||
buf[cc] = NUL;
|
||||
@ -3681,7 +3681,6 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
|
||||
int cc;
|
||||
int temp = 0; // init for GCC
|
||||
bool did_backspace = false;
|
||||
int cpc[MAX_MCO]; // composing characters
|
||||
bool call_fix_indent = false;
|
||||
|
||||
// can't delete anything in an empty file
|
||||
@ -3910,15 +3909,15 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
|
||||
if (State & REPLACE_FLAG) {
|
||||
replace_do_bs(-1);
|
||||
} else {
|
||||
const int l_p_deco = p_deco;
|
||||
if (l_p_deco) {
|
||||
(void)utfc_ptr2char(get_cursor_pos_ptr(), cpc);
|
||||
bool has_composing = false;
|
||||
if (p_deco) {
|
||||
char *p0 = get_cursor_pos_ptr();
|
||||
has_composing = utf_composinglike(p0, p0 + utf_ptr2len(p0));
|
||||
}
|
||||
(void)del_char(false);
|
||||
// If there are combining characters and 'delcombine' is set
|
||||
// move the cursor back. Don't back up before the base
|
||||
// character.
|
||||
if (l_p_deco && cpc[0] != NUL) {
|
||||
// move the cursor back. Don't back up before the base character.
|
||||
if (has_composing) {
|
||||
inc_cursor();
|
||||
}
|
||||
if (revins_chars) {
|
||||
|
@ -7117,7 +7117,7 @@ dict_T *get_vim_var_dict(int idx) FUNC_ATTR_PURE
|
||||
/// Set v:char to character "c".
|
||||
void set_vim_var_char(int c)
|
||||
{
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
|
||||
buf[utf_char2bytes(c, buf)] = NUL;
|
||||
set_vim_var_string(VV_CHAR, buf, -1);
|
||||
|
@ -5134,7 +5134,7 @@ static void f_nr2char(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[MB_MAXBYTES];
|
||||
char buf[MB_MAXCHAR];
|
||||
const int len = utf_char2bytes((int)num, buf);
|
||||
|
||||
rettv->v_type = VAR_STRING;
|
||||
@ -6891,7 +6891,7 @@ static void f_screenchar(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
if (row < 0 || row >= grid->rows || col < 0 || col >= grid->cols) {
|
||||
c = -1;
|
||||
} else {
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MAX_SCHAR_SIZE + 1];
|
||||
schar_get(buf, grid_getchar(grid, row, col, NULL));
|
||||
c = utf_ptr2char(buf);
|
||||
}
|
||||
@ -6907,24 +6907,22 @@ static void f_screenchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
ScreenGrid *grid;
|
||||
screenchar_adjust(&grid, &row, &col);
|
||||
|
||||
tv_list_alloc_ret(rettv, kListLenMayKnow);
|
||||
if (row < 0 || row >= grid->rows || col < 0 || col >= grid->cols) {
|
||||
tv_list_alloc_ret(rettv, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MAX_SCHAR_SIZE + 1];
|
||||
schar_get(buf, grid_getchar(grid, row, col, NULL));
|
||||
int pcc[MAX_MCO];
|
||||
int c = utfc_ptr2char(buf, pcc);
|
||||
int composing_len = 0;
|
||||
while (composing_len < MAX_MCO && pcc[composing_len] != 0) {
|
||||
composing_len++;
|
||||
}
|
||||
tv_list_alloc_ret(rettv, composing_len + 1);
|
||||
|
||||
// schar values are already processed chars which are always NUL-terminated.
|
||||
// A single [0] is expected when char is NUL.
|
||||
size_t i = 0;
|
||||
do {
|
||||
int c = utf_ptr2char(buf + i);
|
||||
tv_list_append_number(rettv->vval.v_list, c);
|
||||
for (int i = 0; i < composing_len; i++) {
|
||||
tv_list_append_number(rettv->vval.v_list, pcc[i]);
|
||||
}
|
||||
i += (size_t)utf_ptr2len(buf + i);
|
||||
} while (buf[i] != NUL);
|
||||
}
|
||||
|
||||
/// "screencol()" function
|
||||
@ -6957,7 +6955,7 @@ static void f_screenstring(typval_T *argvars, typval_T *rettv, EvalFuncData fptr
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MAX_SCHAR_SIZE + 1];
|
||||
schar_get(buf, grid_getchar(grid, row, col, NULL));
|
||||
rettv->vval.v_string = xstrdup(buf);
|
||||
}
|
||||
@ -7413,8 +7411,7 @@ static void f_setcharsearch(typval_T *argvars, typval_T *rettv, EvalFuncData fpt
|
||||
|
||||
char *const csearch = tv_dict_get_string(d, "char", false);
|
||||
if (csearch != NULL) {
|
||||
int pcc[MAX_MCO];
|
||||
const int c = utfc_ptr2char(csearch, pcc);
|
||||
int c = utf_ptr2char(csearch);
|
||||
set_last_csearch(c, csearch, utfc_ptr2len(csearch));
|
||||
}
|
||||
|
||||
|
@ -131,17 +131,22 @@ static const char e_non_numeric_argument_to_z[]
|
||||
/// ":ascii" and "ga" implementation
|
||||
void do_ascii(exarg_T *eap)
|
||||
{
|
||||
char *dig;
|
||||
int cc[MAX_MCO];
|
||||
int c = utfc_ptr2char(get_cursor_pos_ptr(), cc);
|
||||
if (c == NUL) {
|
||||
char *data = get_cursor_pos_ptr();
|
||||
size_t len = (size_t)utfc_ptr2len(data);
|
||||
|
||||
if (len == 0) {
|
||||
msg("NUL", 0);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t iobuff_len = 0;
|
||||
bool need_clear = true;
|
||||
msg_sb_eol();
|
||||
msg_start();
|
||||
|
||||
int ci = 0;
|
||||
int c = utf_ptr2char(data);
|
||||
size_t off = 0;
|
||||
|
||||
// TODO(bfredl): merge this with the main loop
|
||||
if (c < 0x80) {
|
||||
if (c == NL) { // NUL is stored as NL.
|
||||
c = NUL;
|
||||
@ -160,46 +165,29 @@ void do_ascii(exarg_T *eap)
|
||||
char buf2[20];
|
||||
buf2[0] = NUL;
|
||||
|
||||
dig = get_digraph_for_char(cval);
|
||||
char *dig = get_digraph_for_char(cval);
|
||||
if (dig != NULL) {
|
||||
iobuff_len += (size_t)vim_snprintf(IObuff + iobuff_len,
|
||||
sizeof(IObuff) - iobuff_len,
|
||||
vim_snprintf(IObuff, sizeof(IObuff),
|
||||
_("<%s>%s%s %d, Hex %02x, Oct %03o, Digr %s"),
|
||||
transchar(c), buf1, buf2, cval, cval, cval, dig);
|
||||
} else {
|
||||
iobuff_len += (size_t)vim_snprintf(IObuff + iobuff_len,
|
||||
sizeof(IObuff) - iobuff_len,
|
||||
vim_snprintf(IObuff, sizeof(IObuff),
|
||||
_("<%s>%s%s %d, Hex %02x, Octal %03o"),
|
||||
transchar(c), buf1, buf2, cval, cval, cval);
|
||||
}
|
||||
|
||||
c = cc[ci++];
|
||||
msg_multiline(IObuff, 0, true, &need_clear);
|
||||
|
||||
off += (size_t)utf_ptr2len(data); // needed for overlong ascii?
|
||||
}
|
||||
|
||||
#define SPACE_FOR_DESC (1 + 1 + 1 + MB_MAXBYTES + 16 + 4 + 3 + 3 + 1)
|
||||
// Space for description:
|
||||
// - 1 byte for separator (starting from second entry)
|
||||
// - 1 byte for "<"
|
||||
// - 1 byte for space to draw composing character on (optional, but really
|
||||
// mostly required)
|
||||
// - up to MB_MAXBYTES bytes for character itself
|
||||
// - 16 bytes for raw text ("> , Hex , Octal ").
|
||||
// - at least 4 bytes for hexadecimal representation
|
||||
// - at least 3 bytes for decimal representation
|
||||
// - at least 3 bytes for octal representation
|
||||
// - 1 byte for NUL
|
||||
//
|
||||
// Taking into account MAX_MCO and characters which need 8 bytes for
|
||||
// hexadecimal representation, but not taking translation into account:
|
||||
// resulting string will occupy less then 400 bytes (conservative estimate).
|
||||
//
|
||||
// Less then 1000 bytes if translation multiplies number of bytes needed for
|
||||
// raw text by 6, so it should always fit into 1025 bytes reserved for IObuff.
|
||||
|
||||
// Repeat for combining characters, also handle multiby here.
|
||||
while (c >= 0x80 && iobuff_len < sizeof(IObuff) - SPACE_FOR_DESC) {
|
||||
while (off < len) {
|
||||
c = utf_ptr2char(data + off);
|
||||
|
||||
size_t iobuff_len = 0;
|
||||
// This assumes every multi-byte char is printable...
|
||||
if (iobuff_len > 0) {
|
||||
if (off > 0) {
|
||||
IObuff[iobuff_len++] = ' ';
|
||||
}
|
||||
IObuff[iobuff_len++] = '<';
|
||||
@ -208,32 +196,30 @@ void do_ascii(exarg_T *eap)
|
||||
}
|
||||
iobuff_len += (size_t)utf_char2bytes(c, IObuff + iobuff_len);
|
||||
|
||||
dig = get_digraph_for_char(c);
|
||||
char *dig = get_digraph_for_char(c);
|
||||
if (dig != NULL) {
|
||||
iobuff_len += (size_t)vim_snprintf(IObuff + iobuff_len,
|
||||
sizeof(IObuff) - iobuff_len,
|
||||
vim_snprintf(IObuff + iobuff_len, sizeof(IObuff) - iobuff_len,
|
||||
(c < 0x10000
|
||||
? _("> %d, Hex %04x, Oct %o, Digr %s")
|
||||
: _("> %d, Hex %08x, Oct %o, Digr %s")),
|
||||
c, c, c, dig);
|
||||
} else {
|
||||
iobuff_len += (size_t)vim_snprintf(IObuff + iobuff_len,
|
||||
sizeof(IObuff) - iobuff_len,
|
||||
vim_snprintf(IObuff + iobuff_len, sizeof(IObuff) - iobuff_len,
|
||||
(c < 0x10000
|
||||
? _("> %d, Hex %04x, Octal %o")
|
||||
: _("> %d, Hex %08x, Octal %o")),
|
||||
c, c, c);
|
||||
}
|
||||
if (ci == MAX_MCO) {
|
||||
break;
|
||||
}
|
||||
c = cc[ci++];
|
||||
}
|
||||
if (ci != MAX_MCO && c != 0) {
|
||||
xstrlcpy(IObuff + iobuff_len, " ...", sizeof(IObuff) - iobuff_len);
|
||||
|
||||
msg_multiline(IObuff, 0, true, &need_clear);
|
||||
|
||||
off += (size_t)utf_ptr2len(data + off); // needed for overlong ascii?
|
||||
}
|
||||
|
||||
msg(IObuff, 0);
|
||||
if (need_clear) {
|
||||
msg_clr_eos();
|
||||
}
|
||||
msg_end();
|
||||
}
|
||||
|
||||
/// ":left", ":center" and ":right": align text.
|
||||
|
@ -68,21 +68,6 @@ void grid_adjust(ScreenGrid **grid, int *row_off, int *col_off)
|
||||
}
|
||||
}
|
||||
|
||||
/// Put a unicode char, and up to MAX_MCO composing chars, in a screen cell.
|
||||
schar_T schar_from_cc(int c, int u8cc[MAX_MCO])
|
||||
{
|
||||
char buf[MAX_SCHAR_SIZE];
|
||||
int len = utf_char2bytes(c, buf);
|
||||
for (int i = 0; i < MAX_MCO; i++) {
|
||||
if (u8cc[i] == 0) {
|
||||
break;
|
||||
}
|
||||
len += utf_char2bytes(u8cc[i], buf + len);
|
||||
}
|
||||
buf[len] = 0;
|
||||
return schar_from_buf(buf, (size_t)len);
|
||||
}
|
||||
|
||||
schar_T schar_from_str(char *str)
|
||||
{
|
||||
if (str == NULL) {
|
||||
@ -243,22 +228,21 @@ void line_do_arabic_shape(schar_T *buf, int cols)
|
||||
schar_get(scbuf, buf[i]);
|
||||
|
||||
char scbuf_new[MAX_SCHAR_SIZE];
|
||||
int len = utf_char2bytes(c0new, scbuf_new);
|
||||
size_t len = (size_t)utf_char2bytes(c0new, scbuf_new);
|
||||
if (c1new) {
|
||||
len += utf_char2bytes(c1new, scbuf_new + len);
|
||||
len += (size_t)utf_char2bytes(c1new, scbuf_new + len);
|
||||
}
|
||||
|
||||
int off = utf_char2len(c0) + (c1 ? utf_char2len(c1) : 0);
|
||||
size_t rest = strlen(scbuf + off);
|
||||
if (rest + (size_t)off + 1 > MAX_SCHAR_SIZE) {
|
||||
// TODO(bfredl): this cannot happen just yet, as we only construct
|
||||
// schar_T values with up to MAX_MCO+1 composing codepoints. When code
|
||||
// is improved so that MAX_SCHAR_SIZE becomes the only/sharp limit,
|
||||
// we need be able to peel off a composing char which doesn't fit anymore.
|
||||
abort();
|
||||
if (rest + len + 1 > MAX_SCHAR_SIZE) {
|
||||
// Too bigly, discard one code-point.
|
||||
// This should be enough as c0 cannot grow more than from 2 to 4 bytes
|
||||
// (base arabic to extended arabic)
|
||||
rest -= (size_t)utf_cp_head_off(scbuf + off, scbuf + off + rest - 1) + 1;
|
||||
}
|
||||
memcpy(scbuf_new + len, scbuf + off, rest);
|
||||
buf[i] = schar_from_buf(scbuf_new, (size_t)len + rest);
|
||||
buf[i] = schar_from_buf(scbuf_new, len + rest);
|
||||
|
||||
next:
|
||||
c0prev = c0;
|
||||
@ -289,9 +273,9 @@ static bool grid_invalid_row(ScreenGrid *grid, int row)
|
||||
return grid->attrs[grid->line_offset[row]] < 0;
|
||||
}
|
||||
|
||||
/// Get a single character directly from grid.chars into "bytes", which must
|
||||
/// have a size of "MB_MAXBYTES + 1".
|
||||
/// If "attrp" is not NULL, return the character's attribute in "*attrp".
|
||||
/// Get a single character directly from grid.chars
|
||||
///
|
||||
/// @param[out] attrp set to the character's attribute (optional)
|
||||
schar_T grid_getchar(ScreenGrid *grid, int row, int col, int *attrp)
|
||||
{
|
||||
grid_adjust(&grid, &row, &col);
|
||||
@ -385,42 +369,35 @@ int grid_line_puts(int col, const char *text, int textlen, int attr)
|
||||
{
|
||||
const char *ptr = text;
|
||||
int len = textlen;
|
||||
int u8cc[MAX_MCO];
|
||||
|
||||
assert(grid_line_grid);
|
||||
|
||||
int start_col = col;
|
||||
|
||||
int max_col = grid_line_maxcol;
|
||||
while (col < max_col
|
||||
&& (len < 0 || (int)(ptr - text) < len)
|
||||
&& *ptr != NUL) {
|
||||
while (col < max_col && (len < 0 || (int)(ptr - text) < len) && *ptr != NUL) {
|
||||
// check if this is the first byte of a multibyte
|
||||
int mbyte_blen = len > 0
|
||||
? utfc_ptr2len_len(ptr, (int)((text + len) - ptr))
|
||||
: utfc_ptr2len(ptr);
|
||||
int u8c = len >= 0
|
||||
? utfc_ptr2char_len(ptr, u8cc, (int)((text + len) - ptr))
|
||||
: utfc_ptr2char(ptr, u8cc);
|
||||
int mbyte_cells = utf_char2cells(u8c);
|
||||
int firstc;
|
||||
schar_T schar = len >= 0
|
||||
? utfc_ptr2schar_len(ptr, (int)((text + len) - ptr), &firstc)
|
||||
: utfc_ptr2schar(ptr, &firstc);
|
||||
int mbyte_cells = utf_char2cells(firstc);
|
||||
if (mbyte_cells > 2) {
|
||||
mbyte_cells = 1;
|
||||
u8c = 0xFFFD;
|
||||
u8cc[0] = 0;
|
||||
|
||||
schar = schar_from_char(0xFFFD);
|
||||
}
|
||||
|
||||
if (col + mbyte_cells > max_col) {
|
||||
// Only 1 cell left, but character requires 2 cells:
|
||||
// display a '>' in the last column to avoid wrapping. */
|
||||
u8c = '>';
|
||||
u8cc[0] = 0;
|
||||
schar = schar_from_ascii('>');
|
||||
mbyte_cells = 1;
|
||||
}
|
||||
|
||||
schar_T buf;
|
||||
// TODO(bfredl): why not just keep the original byte sequence.
|
||||
buf = schar_from_cc(u8c, u8cc);
|
||||
|
||||
// When at the start of the text and overwriting the right half of a
|
||||
// two-cell character in the same grid, truncate that into a '>'.
|
||||
if (ptr == text && col > grid_line_first && col < grid_line_last
|
||||
@ -428,7 +405,7 @@ int grid_line_puts(int col, const char *text, int textlen, int attr)
|
||||
linebuf_char[col - 1] = schar_from_ascii('>');
|
||||
}
|
||||
|
||||
linebuf_char[col] = buf;
|
||||
linebuf_char[col] = schar;
|
||||
linebuf_attr[col] = attr;
|
||||
linebuf_vcol[col] = -1;
|
||||
if (mbyte_cells == 2) {
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include "nvim/pos.h"
|
||||
#include "nvim/types.h"
|
||||
|
||||
#define MAX_MCO 6 // fixed value for 'maxcombine'
|
||||
// Includes final NUL. at least 4*(MAX_MCO+1)+1
|
||||
// Includes final NUL. MAX_MCO is no longer used, but at least 4*(MAX_MCO+1)+1=29
|
||||
// ensures we can fit all composed chars which did fit before.
|
||||
#define MAX_SCHAR_SIZE 32
|
||||
|
||||
// if data[0] is 0xFF, then data[1..4] is a 24-bit index (in machine endianness)
|
||||
@ -35,7 +35,7 @@ enum {
|
||||
/// we can avoid sending bigger updates than necessary to the Ul layer.
|
||||
///
|
||||
/// Screen cells are stored as NUL-terminated UTF-8 strings, and a cell can
|
||||
/// contain up to MAX_MCO composing characters after the base character.
|
||||
/// contain composing characters as many as fits in MAX_SCHAR_SIZE-1 bytes
|
||||
/// The composing characters are to be drawn on top of the original character.
|
||||
/// The content after the NUL is not defined (so comparison must be done a
|
||||
/// single cell at a time). Double-width characters are stored in the left cell,
|
||||
|
@ -1743,7 +1743,7 @@ void ins_compl_addleader(int c)
|
||||
return;
|
||||
}
|
||||
if ((cc = utf_char2len(c)) > 1) {
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
|
||||
utf_char2bytes(c, buf);
|
||||
buf[cc] = NUL;
|
||||
|
@ -224,7 +224,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
|
||||
if (offset < 0 || offset > (intptr_t)s1_len) {
|
||||
return luaL_error(lstate, "index out of range");
|
||||
}
|
||||
int head_offset = utf_cp_head_off(s1, s1 + offset - 1);
|
||||
int head_offset = -utf_cp_head_off(s1, s1 + offset - 1);
|
||||
lua_pushinteger(lstate, head_offset);
|
||||
return 1;
|
||||
}
|
||||
|
@ -939,7 +939,7 @@ void f_getmatches(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
tv_dict_add_nr(dict, S_LEN("id"), (varnumber_T)cur->mit_id);
|
||||
|
||||
if (cur->mit_conceal_char) {
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
|
||||
buf[utf_char2bytes(cur->mit_conceal_char, buf)] = NUL;
|
||||
tv_dict_add_str(dict, S_LEN("conceal"), buf);
|
||||
|
123
src/nvim/mbyte.c
123
src/nvim/mbyte.c
@ -48,6 +48,7 @@
|
||||
#include "nvim/getchar.h"
|
||||
#include "nvim/gettext.h"
|
||||
#include "nvim/globals.h"
|
||||
#include "nvim/grid.h"
|
||||
#include "nvim/grid_defs.h"
|
||||
#include "nvim/iconv.h"
|
||||
#include "nvim/keycodes.h"
|
||||
@ -722,80 +723,68 @@ bool utf_composinglike(const char *p1, const char *p2)
|
||||
return arabic_combine(utf_ptr2char(p1), c2);
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 string to a wide character
|
||||
/// Get the screen char at the beginning of a string
|
||||
///
|
||||
/// Also gets up to #MAX_MCO composing characters.
|
||||
/// Caller is expected to check for things like unprintable chars etc
|
||||
/// If first char in string is a composing char, prepend a space to display it correctly.
|
||||
///
|
||||
/// @param[out] pcc Location where to store composing characters. Must have
|
||||
/// space at least for #MAX_MCO + 1 elements.
|
||||
/// If "p" starts with an invalid sequence, zero is returned.
|
||||
///
|
||||
/// @return leading character.
|
||||
int utfc_ptr2char(const char *p, int *pcc)
|
||||
/// @param[out] firstc (required) The first codepoint of the screen char,
|
||||
/// or the first byte of an invalid sequence
|
||||
///
|
||||
/// @return the char
|
||||
schar_T utfc_ptr2schar(const char *p, int *firstc)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
int c = utf_ptr2char(p);
|
||||
int len = utf_ptr2len(p);
|
||||
*firstc = c; // NOT optional, you are gonna need it
|
||||
bool first_compose = utf_iscomposing(c);
|
||||
size_t maxlen = MAX_SCHAR_SIZE - 1 - first_compose;
|
||||
size_t len = (size_t)utfc_ptr2len_len(p, (int)maxlen);
|
||||
|
||||
// Only accept a composing char when the first char isn't illegal.
|
||||
if ((len > 1 || (uint8_t)(*p) < 0x80)
|
||||
&& (uint8_t)p[len] >= 0x80
|
||||
&& utf_composinglike(p, p + len)) {
|
||||
int cc = utf_ptr2char(p + len);
|
||||
while (true) {
|
||||
pcc[i++] = cc;
|
||||
if (i == MAX_MCO) {
|
||||
break;
|
||||
}
|
||||
len += utf_ptr2len(p + len);
|
||||
if ((uint8_t)p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char(p + len))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (len == 1 && (uint8_t)(*p) >= 0x80) {
|
||||
return 0; // invalid sequence
|
||||
}
|
||||
|
||||
if (i < MAX_MCO) { // last composing char must be 0
|
||||
pcc[i] = 0;
|
||||
return schar_from_buf_first(p, len, first_compose);
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
// Convert a UTF-8 byte string to a wide character. Also get up to MAX_MCO
|
||||
// composing characters. Use no more than p[maxlen].
|
||||
//
|
||||
// @param [out] pcc: composing chars, last one is 0
|
||||
int utfc_ptr2char_len(const char *p, int *pcc, int maxlen)
|
||||
/// Get the screen char at the beginning of a string with length
|
||||
///
|
||||
/// Like utfc_ptr2schar but use no more than p[maxlen].
|
||||
schar_T utfc_ptr2schar_len(const char *p, int maxlen, int *firstc)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
assert(maxlen > 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
int len = utf_ptr2len_len(p, maxlen);
|
||||
// Is it safe to use utf_ptr2char()?
|
||||
bool safe = len > 1 && len <= maxlen;
|
||||
int c = safe ? utf_ptr2char(p) : (uint8_t)(*p);
|
||||
|
||||
// Only accept a composing char when the first char isn't illegal.
|
||||
if ((safe || c < 0x80) && len < maxlen && (uint8_t)p[len] >= 0x80) {
|
||||
for (; i < MAX_MCO; i++) {
|
||||
int len_cc = utf_ptr2len_len(p + len, maxlen - len);
|
||||
safe = len_cc > 1 && len_cc <= maxlen - len;
|
||||
if (!safe || (pcc[i] = utf_ptr2char(p + len)) < 0x80
|
||||
|| !(i == 0 ? utf_composinglike(p, p + len) : utf_iscomposing(pcc[i]))) {
|
||||
break;
|
||||
}
|
||||
len += len_cc;
|
||||
}
|
||||
size_t len = (size_t)utf_ptr2len_len(p, maxlen);
|
||||
if (len > (size_t)maxlen || (len == 1 && (uint8_t)(*p) >= 0x80) || len == 0) {
|
||||
// invalid or truncated sequence
|
||||
*firstc = (uint8_t)(*p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i < MAX_MCO) {
|
||||
// last composing char must be 0
|
||||
pcc[i] = 0;
|
||||
int c = utf_ptr2char(p);
|
||||
*firstc = c;
|
||||
bool first_compose = utf_iscomposing(c);
|
||||
maxlen = MIN(maxlen, MAX_SCHAR_SIZE - 1 - first_compose);
|
||||
len = (size_t)utfc_ptr2len_len(p, maxlen);
|
||||
|
||||
return schar_from_buf_first(p, len, first_compose);
|
||||
}
|
||||
|
||||
return c;
|
||||
#undef ISCOMPOSING
|
||||
/// Caller must ensure there is space for `first_compose`
|
||||
static schar_T schar_from_buf_first(const char *buf, size_t len, bool first_compose)
|
||||
{
|
||||
if (first_compose) {
|
||||
char cbuf[MAX_SCHAR_SIZE];
|
||||
cbuf[0] = ' ';
|
||||
memcpy(cbuf + 1, buf, len);
|
||||
return schar_from_buf(cbuf, len + 1);
|
||||
} else {
|
||||
return schar_from_buf(buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the length of a UTF-8 byte sequence representing a single codepoint
|
||||
@ -878,8 +867,7 @@ int utfc_ptr2len(const char *const p)
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Check for composing characters. We can handle only the first six, but
|
||||
// skip all of them (otherwise the cursor would get stuck).
|
||||
// Check for composing characters.
|
||||
int prevlen = 0;
|
||||
while (true) {
|
||||
if ((uint8_t)p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len)) {
|
||||
@ -1815,12 +1803,12 @@ int utf_cp_tail_off(const char *base, const char *p_in)
|
||||
/// Return the offset from "p" to the first byte of the codepoint it points
|
||||
/// to. Can start anywhere in a stream of bytes.
|
||||
/// Note: Unlike `utf_head_off`, this counts individual codepoints of composed characters
|
||||
/// separately and returns a negative offset.
|
||||
/// separately.
|
||||
///
|
||||
/// @param[in] base Pointer to start of string
|
||||
/// @param[in] p Pointer to byte for which to return the offset to the previous codepoint
|
||||
//
|
||||
/// @return 0 if invalid sequence, else offset to previous codepoint
|
||||
/// @return 0 if invalid sequence, else number of bytes to previous codepoint
|
||||
int utf_cp_head_off(const char *base, const char *p)
|
||||
{
|
||||
int i;
|
||||
@ -1830,17 +1818,20 @@ int utf_cp_head_off(const char *base, const char *p)
|
||||
}
|
||||
|
||||
// Find the first character that is not 10xx.xxxx
|
||||
for (i = 0; p - i > base; i--) {
|
||||
if (((uint8_t)p[i] & 0xc0) != 0x80) {
|
||||
for (i = 0; p - i >= base; i++) {
|
||||
if (((uint8_t)p[-i] & 0xc0) != 0x80) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the last character that is 10xx.xxxx
|
||||
for (int j = 0; ((uint8_t)p[j + 1] & 0xc0) == 0x80; j++) {}
|
||||
// Find the last character that is 10xx.xxxx (condition terminates on NUL)
|
||||
int j = 1;
|
||||
while (((uint8_t)p[j] & 0xc0) == 0x80) {
|
||||
j++;
|
||||
}
|
||||
|
||||
// Check for illegal sequence.
|
||||
if (utf8len_tab[(uint8_t)p[i]] == 1) {
|
||||
if (utf8len_tab[(uint8_t)p[-i]] != j + i) {
|
||||
return 0;
|
||||
}
|
||||
return i;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "nvim/cmdexpand_defs.h"
|
||||
#include "nvim/eval/typval_defs.h"
|
||||
#include "nvim/func_attr.h"
|
||||
#include "nvim/grid_defs.h"
|
||||
#include "nvim/mbyte_defs.h"
|
||||
#include "nvim/os/os_defs.h"
|
||||
#include "nvim/types.h"
|
||||
|
@ -139,7 +139,7 @@ static int msg_grid_pos_at_flush = 0;
|
||||
|
||||
static void ui_ext_msg_set_pos(int row, bool scrolled)
|
||||
{
|
||||
char buf[MAX_MCO + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
size_t size = (size_t)utf_char2bytes(curwin->w_p_fcs_chars.msgsep, buf);
|
||||
buf[size] = '\0';
|
||||
ui_call_msg_set_pos(msg_grid.handle, row, scrolled,
|
||||
@ -1471,7 +1471,7 @@ void msg_putchar(int c)
|
||||
|
||||
void msg_putchar_attr(int c, int attr)
|
||||
{
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
|
||||
if (IS_SPECIAL(c)) {
|
||||
buf[0] = (char)K_SPECIAL;
|
||||
@ -1560,12 +1560,6 @@ int msg_outtrans_len(const char *msgstr, int len, int attr)
|
||||
mode_displayed = false;
|
||||
}
|
||||
|
||||
// If the string starts with a composing character first draw a space on
|
||||
// which the composing char can be drawn.
|
||||
if (utf_iscomposing(utf_ptr2char(msgstr))) {
|
||||
msg_puts_attr(" ", attr);
|
||||
}
|
||||
|
||||
// Go over the string. Special characters are translated and printed.
|
||||
// Normal characters are printed several at a time.
|
||||
while (--len >= 0 && !got_int) {
|
||||
|
@ -556,6 +556,7 @@ EXTERN char *p_mp; ///< 'makeprg'
|
||||
EXTERN char *p_mps; ///< 'matchpairs'
|
||||
EXTERN OptInt p_mat; ///< 'matchtime'
|
||||
EXTERN OptInt p_mco; ///< 'maxcombine'
|
||||
#define MAX_MCO 6 // fixed value for 'maxcombine'
|
||||
EXTERN OptInt p_mfd; ///< 'maxfuncdepth'
|
||||
EXTERN OptInt p_mmd; ///< 'maxmapdepth'
|
||||
EXTERN OptInt p_mmp; ///< 'maxmempattern'
|
||||
|
@ -3019,7 +3019,7 @@ static int soundfold_find(slang_T *slang, char *word)
|
||||
static bool similar_chars(slang_T *slang, int c1, int c2)
|
||||
{
|
||||
int m1, m2;
|
||||
char buf[MB_MAXBYTES + 1];
|
||||
char buf[MB_MAXCHAR + 1];
|
||||
hashitem_T *hi;
|
||||
|
||||
if (c1 >= 256) {
|
||||
|
@ -1102,8 +1102,6 @@ describe("folded lines", function()
|
||||
end)
|
||||
|
||||
it("works with multibyte text", function()
|
||||
-- Currently the only allowed value of 'maxcombine'
|
||||
eq(6, meths.get_option_value('maxcombine', {}))
|
||||
eq(true, meths.get_option_value('arabicshape', {}))
|
||||
insert([[
|
||||
å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢͟ العَرَبِيَّة
|
||||
@ -1120,7 +1118,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
|
||||
å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
|
||||
möre tex^t |
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1132,7 +1130,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
|
||||
å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
|
||||
möre tex^t |
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1156,7 +1154,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
|
||||
{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1168,7 +1166,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
|
||||
{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1192,7 +1190,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
|
||||
{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1204,7 +1202,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
|
||||
{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1228,7 +1226,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
|
||||
{7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1240,7 +1238,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
|
||||
{7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
@ -1265,7 +1263,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}{8: 1 }{7: +}|
|
||||
{5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}{8: 1 }{7: +}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1277,7 +1275,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}{8: 1 }{7: +}|
|
||||
{5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}{8: 1 }{7: +}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1301,7 +1299,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
|
||||
{5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1313,7 +1311,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
|
||||
{5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1337,7 +1335,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
{5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
|
||||
{5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1349,7 +1347,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
{5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
|
||||
{5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1373,7 +1371,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
|
||||
ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
|
||||
txet eröm|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1385,7 +1383,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
|
||||
ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
|
||||
txet eröm|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1409,7 +1407,7 @@ describe("folded lines", function()
|
||||
[2:---------------------------------------------]|
|
||||
[3:---------------------------------------------]|
|
||||
## grid 2
|
||||
ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
|
||||
ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
|
||||
txet eröm|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
@ -1421,7 +1419,7 @@ describe("folded lines", function()
|
||||
]])
|
||||
else
|
||||
screen:expect([[
|
||||
ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
|
||||
ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
|
||||
txet eröm|
|
||||
{1: ~}|
|
||||
{1: ~}|
|
||||
|
@ -228,6 +228,36 @@ describe("multibyte rendering", function()
|
||||
]]}
|
||||
|
||||
end)
|
||||
|
||||
it('works with arabicshape and multiple composing chars', function()
|
||||
-- this tests an important edge case: arabicshape might increase the byte size of the base
|
||||
-- character in a way so that the last composing char no longer fits. use "g8" on the text
|
||||
-- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape')
|
||||
-- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the
|
||||
-- test text with even more zalgo energy to still touch this edge case.
|
||||
|
||||
meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"})
|
||||
command('set noarabicshape')
|
||||
|
||||
screen:expect{grid=[[
|
||||
^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌ |
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
|
|
||||
]]}
|
||||
|
||||
command('set arabicshape')
|
||||
screen:expect{grid=[[
|
||||
^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌ |
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
{1:~ }|
|
||||
|
|
||||
]]}
|
||||
end)
|
||||
end)
|
||||
|
||||
describe('multibyte rendering: statusline', function()
|
||||
|
@ -4,17 +4,9 @@ local itp = helpers.gen_itp(it)
|
||||
local ffi = helpers.ffi
|
||||
local eq = helpers.eq
|
||||
|
||||
local mbyte = helpers.cimport("./src/nvim/mbyte.h")
|
||||
local charset = helpers.cimport('./src/nvim/charset.h')
|
||||
local lib = helpers.cimport('./src/nvim/mbyte.h', './src/nvim/charset.h', './src/nvim/grid.h')
|
||||
|
||||
describe('mbyte', function()
|
||||
-- Array for composing characters
|
||||
local intp = ffi.typeof('int[?]')
|
||||
local function to_intp()
|
||||
-- how to get MAX_MCO from globals.h?
|
||||
return intp(7, 1)
|
||||
end
|
||||
|
||||
-- Convert from bytes to string
|
||||
local function to_string(bytes)
|
||||
local s = {}
|
||||
@ -30,14 +22,14 @@ describe('mbyte', function()
|
||||
itp('utf_ptr2char', function()
|
||||
-- For strings with length 1 the first byte is returned.
|
||||
for c = 0, 255 do
|
||||
eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
|
||||
eq(c, lib.utf_ptr2char(to_string({c, 0})))
|
||||
end
|
||||
|
||||
-- Some ill formed byte sequences that should not be recognized as UTF-8
|
||||
-- First byte: 0xc0 or 0xc1
|
||||
-- Second byte: 0x80 .. 0xbf
|
||||
--eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
|
||||
--eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
|
||||
--eq(0x00c0, lib.utf_ptr2char(to_string({0xc0, 0x80})))
|
||||
--eq(0x00c1, lib.utf_ptr2char(to_string({0xc1, 0xbf})))
|
||||
--
|
||||
-- Sequences with more than four bytes
|
||||
end)
|
||||
@ -47,240 +39,133 @@ describe('mbyte', function()
|
||||
local char_p = ffi.typeof('char[?]')
|
||||
for c = n * 0x1000, n * 0x1000 + 0xFFF do
|
||||
local p = char_p(4, 0)
|
||||
mbyte.utf_char2bytes(c, p)
|
||||
eq(c, mbyte.utf_ptr2char(p))
|
||||
eq(charset.vim_iswordc(c), charset.vim_iswordp(p))
|
||||
lib.utf_char2bytes(c, p)
|
||||
eq(c, lib.utf_ptr2char(p))
|
||||
eq(lib.vim_iswordc(c), lib.vim_iswordp(p))
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
describe('utfc_ptr2char_len', function()
|
||||
describe('utfc_ptr2schar_len', function()
|
||||
local function test_seq(seq)
|
||||
local firstc = ffi.new("int[1]")
|
||||
local buf = ffi.new("char[32]")
|
||||
lib.schar_get(buf, lib.utfc_ptr2schar_len(to_string(seq), #seq, firstc))
|
||||
return {ffi.string(buf), firstc[0]}
|
||||
end
|
||||
|
||||
local function byte(val)
|
||||
return {string.char(val), val}
|
||||
end
|
||||
|
||||
itp('1-byte sequences', function()
|
||||
local pcc = to_intp()
|
||||
for c = 0, 255 do
|
||||
eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0}, test_seq{0})
|
||||
for c = 1, 127 do
|
||||
eq(byte(c), test_seq{c})
|
||||
end
|
||||
for c = 128, 255 do
|
||||
eq({'', c}, test_seq{c})
|
||||
end
|
||||
end)
|
||||
|
||||
itp('2-byte sequences', function()
|
||||
local pcc = to_intp()
|
||||
-- No combining characters
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0x7f})
|
||||
-- No combining characters
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0x80})
|
||||
|
||||
-- No UTF-8 sequence
|
||||
pcc = to_intp()
|
||||
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0xc2}, test_seq{0xc2, 0x7f})
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
|
||||
eq(0, pcc[0])
|
||||
eq({'\xc2\x80', 0x80}, test_seq{0xc2, 0x80})
|
||||
-- No UTF-8 sequence
|
||||
pcc = to_intp()
|
||||
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0xc2}, test_seq{0xc2, 0xc0})
|
||||
end)
|
||||
|
||||
itp('3-byte sequences', function()
|
||||
local pcc = to_intp()
|
||||
|
||||
-- No second UTF-8 character
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0x80, 0x80})
|
||||
-- No combining character
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0xc2, 0x80})
|
||||
|
||||
-- Combining character is U+0300
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80})
|
||||
|
||||
-- No UTF-8 sequence
|
||||
pcc = to_intp()
|
||||
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc})
|
||||
-- Incomplete combining character
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc})
|
||||
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
|
||||
eq(0, pcc[0])
|
||||
-- One UTF-8 character (composing only)
|
||||
eq({" \xe2\x83\x90", 0x20d0}, test_seq{0xe2, 0x83, 0x90})
|
||||
end)
|
||||
|
||||
itp('4-byte sequences', function()
|
||||
local pcc = to_intp()
|
||||
|
||||
-- No following combining character
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80})
|
||||
-- No second UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80})
|
||||
|
||||
-- Combining character U+0300
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc})
|
||||
|
||||
-- No UTF-8 sequence
|
||||
pcc = to_intp()
|
||||
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80})
|
||||
-- No following UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc})
|
||||
-- Combining character U+0301
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
|
||||
eq(0x0301, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81})
|
||||
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80})
|
||||
end)
|
||||
|
||||
itp('5+-byte sequences', function()
|
||||
local pcc = to_intp()
|
||||
|
||||
-- No following combining character
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80, 0x80})
|
||||
-- No second UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80, 0x80})
|
||||
|
||||
-- Combining character U+0300
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x00})
|
||||
|
||||
-- Combining characters U+0300 and U+0301
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0000, pcc[2])
|
||||
eq({"\x7f\xcc\x80\xcc\x81", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81})
|
||||
-- Combining characters U+0300, U+0301, U+0302
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0000, pcc[3])
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82})
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0303, pcc[3])
|
||||
eq(0x0000, pcc[4])
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83})
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
||||
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0303, pcc[3])
|
||||
eq(0x0304, pcc[4])
|
||||
eq(0x0000, pcc[5])
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
||||
-- U+0305
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
||||
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0303, pcc[3])
|
||||
eq(0x0304, pcc[4])
|
||||
eq(0x0305, pcc[5])
|
||||
eq(1, pcc[6])
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84})
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85})
|
||||
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
||||
-- U+0305, U+0306, but only save six (= MAX_MCO).
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
||||
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0303, pcc[3])
|
||||
eq(0x0304, pcc[4])
|
||||
eq(0x0305, pcc[5])
|
||||
eq(0x0001, pcc[6])
|
||||
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305, U+0306
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85\xcc\x86", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86})
|
||||
|
||||
-- Only three following combining characters U+0300, U+0301, U+0302
|
||||
pcc = to_intp()
|
||||
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
||||
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
||||
eq(0x0300, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0302, pcc[2])
|
||||
eq(0x0000, pcc[3])
|
||||
|
||||
eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85})
|
||||
|
||||
-- No UTF-8 sequence
|
||||
pcc = to_intp()
|
||||
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80, 0x80})
|
||||
-- No following UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc, 0x80})
|
||||
-- Combining character U+0301
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
|
||||
eq(0x0301, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0x7f})
|
||||
-- Combining character U+0301
|
||||
pcc = to_intp()
|
||||
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
|
||||
eq(0x0301, pcc[0])
|
||||
eq(0x0000, pcc[1])
|
||||
eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0xcc})
|
||||
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x7f})
|
||||
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x80})
|
||||
-- One UTF-8 character
|
||||
pcc = to_intp()
|
||||
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
|
||||
eq(0, pcc[0])
|
||||
eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xcc})
|
||||
|
||||
-- Combining characters U+1AB0 and U+0301
|
||||
pcc = to_intp()
|
||||
eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
|
||||
{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
|
||||
eq(0x1ab0, pcc[0])
|
||||
eq(0x0301, pcc[1])
|
||||
eq(0x0000, pcc[2])
|
||||
eq({"\xf4\x80\x80\x80\xe1\xaa\xb0\xcc\x81", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81})
|
||||
end)
|
||||
|
||||
end)
|
||||
|
Loading…
Reference in New Issue
Block a user