vim-patch:8.0.0243

Problem: When making a character lower case with tolower() changes the byte cound, it is not made lower case. Solution: Add strlow_save(). (Dominique Pelle, closes vim/vim#1406) cc5b22b3bf Join almost identical strup_save and strlow_save functions to one Function.
2025-02-25 18:55:25 -06:00 · 2017-04-09 09:29:00 +02:00 · 2017-04-09 09:29:00 +02:00 · 4c857dae11
commit 4c857dae11
parent a3a06d0248
3 changed files with 167 additions and 52 deletions
--- a/src/nvim/eval.c
+++ b/src/nvim/eval.c
@ -16791,30 +16791,8 @@ void timer_teardown(void)
 */
 static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 {
-  char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0]));
  rettv->v_type = VAR_STRING;
-  rettv->vval.v_string = p;
-
-  while (*p != NUL) {
-    int l;
-
-    if (enc_utf8) {
-      int c, lc;
-
-      c = utf_ptr2char(p);
-      lc = mb_tolower(c);
-      l = utf_ptr2len(p);
-      /* TODO: reallocate string when byte count changes. */
-      if (utf_char2len(lc) == l)
-        utf_char2bytes(lc, p);
-      p += l;
-    } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-      p += l;                 /* skip multi-byte character */
-    else {
-      *p = TOLOWER_LOC(*p);         /* note that tolower() can be a macro */
-      ++p;
-    }
-  }
+  rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), false);
 }

 /*
@ -16823,7 +16801,7 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 {
  rettv->v_type = VAR_STRING;
-  rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0]));
+  rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), true);
 }

 /*
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@ -291,14 +291,15 @@ void vim_strup(char_u *p)
  }
 }

-/// Make given string all upper-case
+/// Make given string all upper-case or all lower-case
 ///
-/// Handels multi-byte characters as good as possible.
+/// Handles multi-byte characters as good as possible.
 ///
 /// @param[in]  orig  Input string.
+/// @param[in]  upper If true make uppercase, otherwise lowercase
 ///
 /// @return [allocated] upper-cased string.
-char *strup_save(const char *const orig)
+char *strcase_save(const char *const orig, bool upper)
  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
 {
  char *res = xstrdup(orig);
@ -307,33 +308,25 @@ char *strup_save(const char *const orig)
  while (*p != NUL) {
    int l;

-    if (enc_utf8) {
-      int c = utf_ptr2char((const char_u *)p);
-      int uc = mb_toupper(c);
+    int c = utf_ptr2char((const char_u *)p);
+    int uc = upper ? mb_toupper(c) : mb_tolower(c);

-      // Reallocate string when byte count changes.  This is rare,
-      // thus it's OK to do another malloc()/free().
-      l = utf_ptr2len((const char_u *)p);
-      int newl = utf_char2len(uc);
-      if (newl != l) {
-        // TODO(philix): use xrealloc() in strup_save()
-        char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
-        memcpy(s, res, (size_t)(p - res));
-        STRCPY(s + (p - res) + newl, p + l);
-        p = s + (p - res);
-        xfree(res);
-        res = s;
-      }
-
-      utf_char2bytes(uc, (char_u *)p);
-      p += newl;
-    } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) {
-      p += l;  // Skip multi-byte character.
-    } else {
-      // note that toupper() can be a macro
-      *p = (char)(uint8_t)TOUPPER_LOC(*p);
-      p++;
+    // Reallocate string when byte count changes.  This is rare,
+    // thus it's OK to do another malloc()/free().
+    l = utf_ptr2len((const char_u *)p);
+    int newl = utf_char2len(uc);
+    if (newl != l) {
+      // TODO(philix): use xrealloc() in strup_save()
+      char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
+      memcpy(s, res, (size_t)(p - res));
+      STRCPY(s + (p - res) + newl, p + l);
+      p = s + (p - res);
+      xfree(res);
+      res = s;
    }
+
+    utf_char2bytes(uc, (char_u *)p);
+    p += newl;
  }

  return res;
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@ -29,3 +29,147 @@ func Test_setbufvar_options()
  bwipe!
 endfunc

+func Test_tolower()
+  call assert_equal("", tolower(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
+          \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few uppercase diacritics.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("bḃḇ", tolower("BḂḆ"))
+  call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
+  call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("fḟ ", tolower("FḞ "))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
+  call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("jĵ", tolower("JĴ"))
+  call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
+  call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
+  call assert_equal("mḿṁ", tolower("MḾṀ"))
+  call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("pṕṗ", tolower("PṔṖ"))
+  call assert_equal("q", tolower("Q"))
+  call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
+  call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
+  call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("vṽ", tolower("VṼ"))
+  call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
+  call assert_equal("xẋẍ", tolower("XẊẌ"))
+  call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
+  call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
+
+  " Test with a few lowercase diacritics, which should remain unchanged.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("bḃḇ", tolower("bḃḇ"))
+  call assert_equal("cçćĉċč", tolower("cçćĉċč"))
+  call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
+  call assert_equal("fḟ", tolower("fḟ"))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
+  call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
+  call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
+  call assert_equal("jĵǰ", tolower("jĵǰ"))
+  call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
+  call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
+  call assert_equal("mḿṁ ", tolower("mḿṁ "))
+  call assert_equal("nñńņňŉṅṉ", tolower("nñńņňŉṅṉ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("pṕṗ", tolower("pṕṗ"))
+  call assert_equal("q", tolower("q"))
+  call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
+  call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
+  call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("vṽ", tolower("vṽ"))
+  call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
+  call assert_equal("ẋẍ", tolower("ẋẍ"))
+  call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
+
+  " According to https://twitter.com/jifa/status/625776454479970304
+  " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
+  " in length (2 to 3 bytes) when lowercased. So let's test them.
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+func Test_toupper()
+  call assert_equal("", toupper(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
+          \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few lowercase diacritics.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("BḂḆ", toupper("bḃḇ"))
+  call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
+  call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
+  call assert_equal("FḞ", toupper("fḟ"))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
+  call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
+  call assert_equal("JĴǰ", toupper("jĵǰ"))
+  call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
+  call assert_equal("MḾṀ ", toupper("mḿṁ "))
+  call assert_equal("NÑŃŅŇŉṄṈ", toupper("nñńņňŉṅṉ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("PṔṖ", toupper("pṕṗ"))
+  call assert_equal("Q", toupper("q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
+  call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("VṼ", toupper("vṽ"))
+  call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
+  call assert_equal("ẊẌ", toupper("ẋẍ"))
+  call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
+
+  " Test that uppercase diacritics, which should remain unchanged.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("BḂḆ", toupper("BḂḆ"))
+  call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
+  call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("FḞ ", toupper("FḞ "))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("JĴ", toupper("JĴ"))
+  call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
+  call assert_equal("MḾṀ", toupper("MḾṀ"))
+  call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("PṔṖ", toupper("PṔṖ"))
+  call assert_equal("Q", toupper("Q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
+  call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("VṼ", toupper("VṼ"))
+  call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
+  call assert_equal("XẊẌ", toupper("XẊẌ"))
+  call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
+
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+