mbyte: Lint some functions which are to be copied for symbolic tests

2025-02-25 18:55:25 -06:00 · 2017-10-08 21:19:10 +03:00 · 2017-10-08 21:19:10 +03:00 · 6f22b5afad
commit 6f22b5afad
parent bd3a4166b2
2 changed files with 137 additions and 134 deletions
--- a/src/nvim/globals.h
+++ b/src/nvim/globals.h
@ -725,29 +725,6 @@ EXTERN int vr_lines_changed INIT(= 0);      /* #Lines changed by "gR" so far */
 /// Encoding used when 'fencs' is set to "default"
 EXTERN char_u *fenc_default INIT(= NULL);
 // To speed up BYTELEN(); keep a lookup table to quickly get the length in
 // bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
 // which are illegal when used as the first byte have a 1.  The NUL byte has
 // length 1.
 EXTERN char utf8len_tab[256] INIT(= {
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
 });
 # if defined(USE_ICONV) && defined(DYNAMIC_ICONV)
 /* Pointers to functions and variables to be loaded at runtime */
 EXTERN size_t (*iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft,
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@ -72,19 +72,41 @@ struct interval {
 # include "unicode_tables.generated.h"
 #endif
-/*
+// To speed up BYTELEN(); keep a lookup table to quickly get the length in
- * Like utf8len_tab above, but using a zero for illegal lead bytes.
+// bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
- */
+// which are illegal when used as the first byte have a 1.  The NUL byte has
-const uint8_t utf8len_tab_zero[256] =
+// length 1.
-{
+const uint8_t utf8len_tab[] = {
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  // ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?A ?B ?C ?D ?E ?F
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0?
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 1?
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 2?
-  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 3?
-  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 4?
-  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 5?
-  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 6?
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 7?
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 8?
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 9?
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // A?
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // B?
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // C?
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // D?
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // E?
  4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,  // F?
 };
 // Like utf8len_tab above, but using a zero for illegal lead bytes.
 const uint8_t utf8len_tab_zero[] = {
  //1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  // 0
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  // 2
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  // 4
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  // 6
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  // 8
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  // A
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  // C
  3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,  // E
 };
 /*
@ -528,45 +550,52 @@ int utf_off2cells(unsigned off, unsigned max_off)
  return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1;
 }
-/*
+/// Convert a UTF-8 byte sequence to a wide character
- * Convert a UTF-8 byte sequence to a wide character.
+///
- * If the sequence is illegal or truncated by a NUL the first byte is
+/// If the sequence is illegal or truncated by a NUL then the first byte is
- * returned.
+/// returned. Does not include composing characters for obvious reasons.
- * Does not include composing characters, of course.
+///
- */
+/// @param[in]  p  String to convert.
-int utf_ptr2char(const char_u *p)
+///
 /// @return Unicode codepoint or byte value.
 int utf_ptr2char(const char_u *const p)
  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
 {
-  uint8_t len;
+  if (p[0] < 0x80) {  // Be quick for ASCII.
  if (p[0] < 0x80)      /* be quick for ASCII */
    return p[0];
  }
-  len = utf8len_tab_zero[p[0]];
+  const uint8_t len = utf8len_tab_zero[p[0]];
  if (len > 1 && (p[1] & 0xc0) == 0x80) {
-    if (len == 2)
+    if (len == 2) {
      return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
    }
    if ((p[2] & 0xc0) == 0x80) {
-      if (len == 3)
+      if (len == 3) {
-        return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
+        return (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
-          + (p[2] & 0x3f);
+                + (p[2] & 0x3f));
      }
      if ((p[3] & 0xc0) == 0x80) {
-        if (len == 4)
+        if (len == 4) {
-          return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
+          return (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
-            + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f);
+                  + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f));
        }
        if ((p[4] & 0xc0) == 0x80) {
-          if (len == 5)
+          if (len == 5) {
-            return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
+            return (((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
                    + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
-              + (p[4] & 0x3f);
+                    + (p[4] & 0x3f));
-          if ((p[5] & 0xc0) == 0x80 && len == 6)
+          }
-            return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
+          if ((p[5] & 0xc0) == 0x80 && len == 6) {
            return (((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
                    + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
-              + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
+                    + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f));
          }
        }
      }
    }
-  /* Illegal value, just return the first byte */
+  }
  // Illegal value: just return the first byte.
  return p[0];
 }
@ -767,23 +796,24 @@ int utfc_char2bytes(int off, char_u *buf)
  return len;
 }
-/*
+/// Get the length of a UTF-8 byte sequence representing a single codepoint
- * Get the length of a UTF-8 byte sequence, not including any following
+///
- * composing characters.
+/// @param[in]  p  UTF-8 string.
- * Returns 0 for "".
+///
- * Returns 1 for an illegal byte sequence.
+/// @return Sequence length, 0 for empty string and 1 for non-UTF-8 byte
- */
+///         sequence.
-int utf_ptr2len(const char_u *p)
+int utf_ptr2len(const char_u *const p)
  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 {
-  int len;
+  if (*p == NUL) {
  int i;
  if (*p == NUL)
    return 0;
-  len = utf8len_tab[*p];
+  }
-  for (i = 1; i < len; ++i)
+  const int len = utf8len_tab[*p];
-    if ((p[i] & 0xc0) != 0x80)
+  for (int i = 1; i < len; i++) {
    if ((p[i] & 0xc0) != 0x80) {
      return 1;
    }
  }
  return len;
 }
@ -824,38 +854,38 @@ int utf_ptr2len_len(const char_u *p, int size)
  return len;
 }
-/*
+/// Return the number of bytes occupied by a UTF-8 character in a string
- * Return the number of bytes the UTF-8 encoding of the character at "p" takes.
+///
- * This includes following composing characters.
+/// This includes following composing characters.
- */
+int utfc_ptr2len(const char_u *const p)
-int utfc_ptr2len(const char_u *p)
+  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 {
-  int len;
+  uint8_t b0 = (uint8_t)(*p);
  int b0 = *p;
  int prevlen;
-  if (b0 == NUL)
+  if (b0 == NUL) {
    return 0;
-  if (b0 < 0x80 && p[1] < 0x80)         /* be quick for ASCII */
+  }
  if (b0 < 0x80 && p[1] < 0x80) {  // be quick for ASCII
    return 1;
  }
-  /* Skip over first UTF-8 char, stopping at a NUL byte. */
+  // Skip over first UTF-8 char, stopping at a NUL byte.
-  len = utf_ptr2len(p);
+  int len = utf_ptr2len(p);
-  /* Check for illegal byte. */
+  // Check for illegal byte.
-  if (len == 1 && b0 >= 0x80)
+  if (len == 1 && b0 >= 0x80) {
    return 1;
  }
-  /*
+  // Check for composing characters.  We can handle only the first six, but
-   * Check for composing characters.  We can handle only the first six, but
+  // skip all of them (otherwise the cursor would get stuck).
-   * skip all of them (otherwise the cursor would get stuck).
+  int prevlen = 0;
-   */
+  for (;;) {
-  prevlen = 0;
+    if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len)) {
  for (;; ) {
    if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len))
      return len;
    }
-    /* Skip over composing char */
+    // Skip over composing char.
    prevlen = len;
    len += utf_ptr2len(p + len);
  }
@ -913,23 +943,22 @@ int utfc_ptr2len_len(const char_u *p, int size)
  return len;
 }
-/*
+/// Determine how many bytes certain unicode codepoint will occupy
- * Return the number of bytes the UTF-8 encoding of character "c" takes.
+int utf_char2len(const int c)
 * This does not include composing characters.
 */
 int utf_char2len(int c)
 {
-  if (c < 0x80)
+  if (c < 0x80) {
    return 1;
-  if (c < 0x800)
+  } else if (c < 0x800) {
    return 2;
-  if (c < 0x10000)
+  } else if (c < 0x10000) {
    return 3;
-  if (c < 0x200000)
+  } else if (c < 0x200000) {
    return 4;
-  if (c < 0x4000000)
+  } else if (c < 0x4000000) {
    return 5;
  } else {
    return 6;
  }
 }
 /// Convert Unicode character to UTF-8 string
@ -937,39 +966,34 @@ int utf_char2len(int c)
 /// @param c character to convert to \p buf
 /// @param[out] buf UTF-8 string generated from \p c, does not add \0
 /// @return Number of bytes (1-6). Does not include composing characters.
-int utf_char2bytes(int c, char_u *const buf)
+int utf_char2bytes(const int c, char_u *const buf)
 {
-  if (c < 0x80) {               /* 7 bits */
+  if (c < 0x80) {  // 7 bits
    buf[0] = c;
    return 1;
-  }
+  } else if (c < 0x800) {  // 11 bits
  if (c < 0x800) {              /* 11 bits */
    buf[0] = 0xc0 + ((unsigned)c >> 6);
    buf[1] = 0x80 + (c & 0x3f);
    return 2;
-  }
+  } else if (c < 0x10000) {  // 16 bits
  if (c < 0x10000) {            /* 16 bits */
    buf[0] = 0xe0 + ((unsigned)c >> 12);
    buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f);
    buf[2] = 0x80 + (c & 0x3f);
    return 3;
-  }
+  } else if (c < 0x200000) {  // 21 bits
  if (c < 0x200000) {           /* 21 bits */
    buf[0] = 0xf0 + ((unsigned)c >> 18);
    buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f);
    buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f);
    buf[3] = 0x80 + (c & 0x3f);
    return 4;
-  }
+  } else if (c < 0x4000000) {  // 26 bits
  if (c < 0x4000000) {          /* 26 bits */
    buf[0] = 0xf8 + ((unsigned)c >> 24);
    buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f);
    buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f);
    buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f);
    buf[4] = 0x80 + (c & 0x3f);
    return 5;
-  }
+  } else {  // 31 bits
  /* 31 bits */
    buf[0] = 0xfc + ((unsigned)c >> 30);
    buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f);
    buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f);
@ -977,6 +1001,7 @@ int utf_char2bytes(int c, char_u *const buf)
    buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f);
    buf[5] = 0x80 + (c & 0x3f);
    return 6;
  }
 }
 /*
@ -1513,14 +1538,15 @@ int utf_head_off(const char_u *base, const char_u *p)
  return (int)(p - q);
 }
-/*
+/// Copy a character, advancing the pointers
- * Copy a character from "*fp" to "*tp" and advance the pointers.
+///
- */
+/// @param[in,out]  fp  Source of the character to copy.
-void mb_copy_char(const char_u **fp, char_u **tp)
+/// @param[in,out]  tp  Destination to copy to.
 void mb_copy_char(const char_u **const fp, char_u **const tp)
 {
-  int l = (*mb_ptr2len)(*fp);
+  const size_t l = (size_t)utfc_ptr2len(*fp);
-  memmove(*tp, *fp, (size_t)l);
+  memmove(*tp, *fp, l);
  *tp += l;
  *fp += l;
 }