mbyte: Lint some functions which are to be copied for symbolic tests

This commit is contained in:
ZyX 2017-10-08 21:19:10 +03:00
parent bd3a4166b2
commit 6f22b5afad
2 changed files with 137 additions and 134 deletions

View File

@ -725,29 +725,6 @@ EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */
/// Encoding used when 'fencs' is set to "default" /// Encoding used when 'fencs' is set to "default"
EXTERN char_u *fenc_default INIT(= NULL); EXTERN char_u *fenc_default INIT(= NULL);
// To speed up BYTELEN(); keep a lookup table to quickly get the length in
// bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes
// which are illegal when used as the first byte have a 1. The NUL byte has
// length 1.
EXTERN char utf8len_tab[256] INIT(= {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
});
# if defined(USE_ICONV) && defined(DYNAMIC_ICONV) # if defined(USE_ICONV) && defined(DYNAMIC_ICONV)
/* Pointers to functions and variables to be loaded at runtime */ /* Pointers to functions and variables to be loaded at runtime */
EXTERN size_t (*iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, EXTERN size_t (*iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft,

View File

@ -72,19 +72,41 @@ struct interval {
# include "unicode_tables.generated.h" # include "unicode_tables.generated.h"
#endif #endif
/* // To speed up BYTELEN(); keep a lookup table to quickly get the length in
* Like utf8len_tab above, but using a zero for illegal lead bytes. // bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes
*/ // which are illegal when used as the first byte have a 1. The NUL byte has
const uint8_t utf8len_tab_zero[256] = // length 1.
{ const uint8_t utf8len_tab[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?A ?B ?C ?D ?E ?F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0?
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1?
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2?
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3?
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4?
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5?
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A?
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B?
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C?
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D?
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E?
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, // F?
};
// Like utf8len_tab above, but using a zero for illegal lead bytes.
const uint8_t utf8len_tab_zero[] = {
//1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 2
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 4
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 6
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 8
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // A
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // C
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, // E
}; };
/* /*
@ -528,45 +550,52 @@ int utf_off2cells(unsigned off, unsigned max_off)
return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1; return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1;
} }
/* /// Convert a UTF-8 byte sequence to a wide character
* Convert a UTF-8 byte sequence to a wide character. ///
* If the sequence is illegal or truncated by a NUL the first byte is /// If the sequence is illegal or truncated by a NUL then the first byte is
* returned. /// returned. Does not include composing characters for obvious reasons.
* Does not include composing characters, of course. ///
*/ /// @param[in] p String to convert.
int utf_ptr2char(const char_u *p) ///
/// @return Unicode codepoint or byte value.
int utf_ptr2char(const char_u *const p)
FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
{ {
uint8_t len; if (p[0] < 0x80) { // Be quick for ASCII.
if (p[0] < 0x80) /* be quick for ASCII */
return p[0]; return p[0];
}
len = utf8len_tab_zero[p[0]]; const uint8_t len = utf8len_tab_zero[p[0]];
if (len > 1 && (p[1] & 0xc0) == 0x80) { if (len > 1 && (p[1] & 0xc0) == 0x80) {
if (len == 2) if (len == 2) {
return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
}
if ((p[2] & 0xc0) == 0x80) { if ((p[2] & 0xc0) == 0x80) {
if (len == 3) if (len == 3) {
return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) return (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
+ (p[2] & 0x3f); + (p[2] & 0x3f));
}
if ((p[3] & 0xc0) == 0x80) { if ((p[3] & 0xc0) == 0x80) {
if (len == 4) if (len == 4) {
return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) return (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
+ ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f));
}
if ((p[4] & 0xc0) == 0x80) { if ((p[4] & 0xc0) == 0x80) {
if (len == 5) if (len == 5) {
return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) return (((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
+ ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
+ (p[4] & 0x3f); + (p[4] & 0x3f));
if ((p[5] & 0xc0) == 0x80 && len == 6) }
return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) if ((p[5] & 0xc0) == 0x80 && len == 6) {
return (((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
+ ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
+ ((p[4] & 0x3f) << 6) + (p[5] & 0x3f); + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f));
} }
} }
} }
} }
/* Illegal value, just return the first byte */ }
// Illegal value: just return the first byte.
return p[0]; return p[0];
} }
@ -767,23 +796,24 @@ int utfc_char2bytes(int off, char_u *buf)
return len; return len;
} }
/* /// Get the length of a UTF-8 byte sequence representing a single codepoint
* Get the length of a UTF-8 byte sequence, not including any following ///
* composing characters. /// @param[in] p UTF-8 string.
* Returns 0 for "". ///
* Returns 1 for an illegal byte sequence. /// @return Sequence length, 0 for empty string and 1 for non-UTF-8 byte
*/ /// sequence.
int utf_ptr2len(const char_u *p) int utf_ptr2len(const char_u *const p)
FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
{ {
int len; if (*p == NUL) {
int i;
if (*p == NUL)
return 0; return 0;
len = utf8len_tab[*p]; }
for (i = 1; i < len; ++i) const int len = utf8len_tab[*p];
if ((p[i] & 0xc0) != 0x80) for (int i = 1; i < len; i++) {
if ((p[i] & 0xc0) != 0x80) {
return 1; return 1;
}
}
return len; return len;
} }
@ -824,38 +854,38 @@ int utf_ptr2len_len(const char_u *p, int size)
return len; return len;
} }
/* /// Return the number of bytes occupied by a UTF-8 character in a string
* Return the number of bytes the UTF-8 encoding of the character at "p" takes. ///
* This includes following composing characters. /// This includes following composing characters.
*/ int utfc_ptr2len(const char_u *const p)
int utfc_ptr2len(const char_u *p) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
{ {
int len; uint8_t b0 = (uint8_t)(*p);
int b0 = *p;
int prevlen;
if (b0 == NUL) if (b0 == NUL) {
return 0; return 0;
if (b0 < 0x80 && p[1] < 0x80) /* be quick for ASCII */ }
if (b0 < 0x80 && p[1] < 0x80) { // be quick for ASCII
return 1; return 1;
}
/* Skip over first UTF-8 char, stopping at a NUL byte. */ // Skip over first UTF-8 char, stopping at a NUL byte.
len = utf_ptr2len(p); int len = utf_ptr2len(p);
/* Check for illegal byte. */ // Check for illegal byte.
if (len == 1 && b0 >= 0x80) if (len == 1 && b0 >= 0x80) {
return 1; return 1;
}
/* // Check for composing characters. We can handle only the first six, but
* Check for composing characters. We can handle only the first six, but // skip all of them (otherwise the cursor would get stuck).
* skip all of them (otherwise the cursor would get stuck). int prevlen = 0;
*/ for (;;) {
prevlen = 0; if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len)) {
for (;; ) {
if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len))
return len; return len;
}
/* Skip over composing char */ // Skip over composing char.
prevlen = len; prevlen = len;
len += utf_ptr2len(p + len); len += utf_ptr2len(p + len);
} }
@ -913,23 +943,22 @@ int utfc_ptr2len_len(const char_u *p, int size)
return len; return len;
} }
/* /// Determine how many bytes certain unicode codepoint will occupy
* Return the number of bytes the UTF-8 encoding of character "c" takes. int utf_char2len(const int c)
* This does not include composing characters.
*/
int utf_char2len(int c)
{ {
if (c < 0x80) if (c < 0x80) {
return 1; return 1;
if (c < 0x800) } else if (c < 0x800) {
return 2; return 2;
if (c < 0x10000) } else if (c < 0x10000) {
return 3; return 3;
if (c < 0x200000) } else if (c < 0x200000) {
return 4; return 4;
if (c < 0x4000000) } else if (c < 0x4000000) {
return 5; return 5;
} else {
return 6; return 6;
}
} }
/// Convert Unicode character to UTF-8 string /// Convert Unicode character to UTF-8 string
@ -937,39 +966,34 @@ int utf_char2len(int c)
/// @param c character to convert to \p buf /// @param c character to convert to \p buf
/// @param[out] buf UTF-8 string generated from \p c, does not add \0 /// @param[out] buf UTF-8 string generated from \p c, does not add \0
/// @return Number of bytes (1-6). Does not include composing characters. /// @return Number of bytes (1-6). Does not include composing characters.
int utf_char2bytes(int c, char_u *const buf) int utf_char2bytes(const int c, char_u *const buf)
{ {
if (c < 0x80) { /* 7 bits */ if (c < 0x80) { // 7 bits
buf[0] = c; buf[0] = c;
return 1; return 1;
} } else if (c < 0x800) { // 11 bits
if (c < 0x800) { /* 11 bits */
buf[0] = 0xc0 + ((unsigned)c >> 6); buf[0] = 0xc0 + ((unsigned)c >> 6);
buf[1] = 0x80 + (c & 0x3f); buf[1] = 0x80 + (c & 0x3f);
return 2; return 2;
} } else if (c < 0x10000) { // 16 bits
if (c < 0x10000) { /* 16 bits */
buf[0] = 0xe0 + ((unsigned)c >> 12); buf[0] = 0xe0 + ((unsigned)c >> 12);
buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f);
buf[2] = 0x80 + (c & 0x3f); buf[2] = 0x80 + (c & 0x3f);
return 3; return 3;
} } else if (c < 0x200000) { // 21 bits
if (c < 0x200000) { /* 21 bits */
buf[0] = 0xf0 + ((unsigned)c >> 18); buf[0] = 0xf0 + ((unsigned)c >> 18);
buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f); buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f);
buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f);
buf[3] = 0x80 + (c & 0x3f); buf[3] = 0x80 + (c & 0x3f);
return 4; return 4;
} } else if (c < 0x4000000) { // 26 bits
if (c < 0x4000000) { /* 26 bits */
buf[0] = 0xf8 + ((unsigned)c >> 24); buf[0] = 0xf8 + ((unsigned)c >> 24);
buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f); buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f);
buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f); buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f);
buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f);
buf[4] = 0x80 + (c & 0x3f); buf[4] = 0x80 + (c & 0x3f);
return 5; return 5;
} } else { // 31 bits
/* 31 bits */
buf[0] = 0xfc + ((unsigned)c >> 30); buf[0] = 0xfc + ((unsigned)c >> 30);
buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f); buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f);
buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f); buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f);
@ -977,6 +1001,7 @@ int utf_char2bytes(int c, char_u *const buf)
buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f);
buf[5] = 0x80 + (c & 0x3f); buf[5] = 0x80 + (c & 0x3f);
return 6; return 6;
}
} }
/* /*
@ -1513,14 +1538,15 @@ int utf_head_off(const char_u *base, const char_u *p)
return (int)(p - q); return (int)(p - q);
} }
/* /// Copy a character, advancing the pointers
* Copy a character from "*fp" to "*tp" and advance the pointers. ///
*/ /// @param[in,out] fp Source of the character to copy.
void mb_copy_char(const char_u **fp, char_u **tp) /// @param[in,out] tp Destination to copy to.
void mb_copy_char(const char_u **const fp, char_u **const tp)
{ {
int l = (*mb_ptr2len)(*fp); const size_t l = (size_t)utfc_ptr2len(*fp);
memmove(*tp, *fp, (size_t)l); memmove(*tp, *fp, l);
*tp += l; *tp += l;
*fp += l; *fp += l;
} }