screen: use UTF-8 representation

Store text in ScreenLines as UTF-8, so it can be sent as-is to the UI
layer. `utfc_char2bytes(off,buf)` is removed, as `ScreenLines[off]` now
already contains this representation.

To recover the codepoints that the screen arrays previously contained, use
utfc_ptr2char (or utf_ptr2char to ignore composing chars).

NB: This commit does NOT change how screen.c processes incoming UTF-8 data
from buffers, cmdline, messages etc. Any algorithm that operates on UCS-4
(like arabic shaping, treatment of non-printable chars)
is left unchanged for now.
This commit is contained in:
Björn Linse 2018-02-10 11:03:59 +01:00
parent 315b7f8632
commit d8e18c96a9
5 changed files with 365 additions and 667 deletions

View File

@ -13994,10 +13994,7 @@ static void f_screenchar(typval_T *argvars, typval_T *rettv, FunPtr fptr)
c = -1;
} else {
off = LineOffset[row] + col;
if (enc_utf8 && ScreenLinesUC[off] != 0)
c = ScreenLinesUC[off];
else
c = ScreenLines[off];
c = utf_ptr2char(ScreenLines[off]);
}
rettv->vval.v_number = c;
}

View File

@ -131,38 +131,39 @@ typedef off_t off_T;
/*
* The characters and attributes cached for the screen.
*/
typedef char_u schar_T;
typedef unsigned short sattr_T;
typedef char_u schar_T[(MAX_MCO+1) * 4 + 1];
typedef int16_t sattr_T;
/*
* The characters that are currently on the screen are kept in ScreenLines[].
* It is a single block of characters, the size of the screen plus one line.
* The attributes for those characters are kept in ScreenAttrs[].
*
* "LineOffset[n]" is the offset from ScreenLines[] for the start of line 'n'.
* The same value is used for ScreenLinesUC[] and ScreenAttrs[].
*
* Note: before the screen is initialized and when out of memory these can be
* NULL.
*/
/// ScreenLines[] contains a copy of the whole screen, as it currently is
/// displayed. It is a single block of screen cells, the size of the screen
/// plus one line. The extra line used as a buffer while redrawing a window
/// line, so it can be compared with the previous state of that line. This way
/// we can avoid sending bigger updates than neccessary to the Ul layer.
///
/// Screen cells are stored as NUL-terminated UTF-8 strings, and a cell can
/// contain up to MAX_MCO composing characters after the base character.
/// The composing characters are to be drawn on top of the original character.
/// The content after the NUL is not defined (so comparison must be done a
/// single cell at a time). Double-width characters are stored in the left cell,
/// and the right cell should only contain the empty string. When a part of the
/// screen is cleared, the cells should be filled with a single whitespace char.
///
/// ScreenAttrs[] contains the highlighting attribute for each cell.
/// LineOffset[n] is the offset from ScreenLines[] and ScreenAttrs[] for the
/// start of line 'n'. These offsets are in general not linear, as full screen
/// scrolling is implemented by rotating the offsets in the LineOffset array.
/// LineWraps[] is an array of boolean flags indicating if the screen line wraps
/// to the next line. It can only be true if a window occupies the entire screen
/// width.
///
///
/// Note: before the screen is initialized and when out of memory these can be
/// NULL.
EXTERN schar_T *ScreenLines INIT(= NULL);
EXTERN sattr_T *ScreenAttrs INIT(= NULL);
EXTERN unsigned *LineOffset INIT(= NULL);
EXTERN char_u *LineWraps INIT(= NULL); /* line wraps to next line */
/*
* When using Unicode characters (in UTF-8 encoding) the character in
* ScreenLinesUC[] contains the Unicode for the character at this position, or
* NUL when the character in ScreenLines[] is to be used (ASCII char).
* The composing characters are to be drawn on top of the original character.
* ScreenLinesC[0][off] is only to be used when ScreenLinesUC[off] != 0.
* Note: These three are only allocated when enc_utf8 is set!
*/
EXTERN u8char_T *ScreenLinesUC INIT(= NULL); /* decoded UTF-8 characters */
EXTERN u8char_T *ScreenLinesC[MAX_MCO]; /* composing characters */
EXTERN int Screen_mco INIT(= 0); /* value of p_mco used when
allocating ScreenLinesC[] */
EXTERN int screen_Rows INIT(= 0); /* actual size of ScreenLines[] */
EXTERN int screen_Columns INIT(= 0); /* actual size of ScreenLines[] */

View File

@ -560,7 +560,7 @@ size_t mb_string2cells(const char_u *str)
/// We make sure that the offset used is less than "max_off".
int utf_off2cells(unsigned off, unsigned max_off)
{
return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1;
return (off + 1 < max_off && ScreenLines[off + 1][0] == 0) ? 2 : 1;
}
/// Convert a UTF-8 byte sequence to a wide character
@ -790,27 +790,6 @@ int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen)
#undef ISCOMPOSING
}
/*
* Convert the character at screen position "off" to a sequence of bytes.
* Includes the composing characters.
* "buf" must at least have the length MB_MAXBYTES + 1.
* Only to be used when ScreenLinesUC[off] != 0.
* Returns the produced number of bytes.
*/
int utfc_char2bytes(int off, char_u *buf)
{
int len;
int i;
len = utf_char2bytes(ScreenLinesUC[off], buf);
for (i = 0; i < Screen_mco; ++i) {
if (ScreenLinesC[i][off] == 0)
break;
len += utf_char2bytes(ScreenLinesC[i][off], buf + len);
}
return len;
}
/// Get the length of a UTF-8 byte sequence representing a single codepoint
///
/// @param[in] p UTF-8 string.
@ -1853,7 +1832,7 @@ int mb_fix_col(int col, int row)
col = check_col(col);
row = check_row(row);
if (ScreenLines != NULL && col > 0
&& ScreenLines[LineOffset[row] + col] == 0) {
&& ScreenLines[LineOffset[row] + col][0] == 0) {
return col - 1;
}
return col;

View File

@ -108,12 +108,13 @@ retnomove:
goto retnomove; // ugly goto...
// Remember the character under the mouse, it might be a '-' or '+' in the
// fold column.
// fold column. NB: only works for ASCII chars!
if (row >= 0 && row < Rows && col >= 0 && col <= Columns
&& ScreenLines != NULL)
mouse_char = ScreenLines[LineOffset[row] + (unsigned)col];
else
&& ScreenLines != NULL) {
mouse_char = ScreenLines[LineOffset[row] + (unsigned)col][0];
} else {
mouse_char = ' ';
}
old_curwin = curwin;
old_cursor = curwin->w_cursor;

File diff suppressed because it is too large Load Diff