mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
vim-patch:8.0.0243
Problem: When making a character lower case with tolower() changes the byte
cound, it is not made lower case.
Solution: Add strlow_save(). (Dominique Pelle, closes vim/vim#1406)
cc5b22b3bf
Join almost identical strup_save and strlow_save functions to one
Function.
This commit is contained in:
parent
a3a06d0248
commit
4c857dae11
@ -16791,30 +16791,8 @@ void timer_teardown(void)
|
|||||||
*/
|
*/
|
||||||
static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
||||||
{
|
{
|
||||||
char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0]));
|
|
||||||
rettv->v_type = VAR_STRING;
|
rettv->v_type = VAR_STRING;
|
||||||
rettv->vval.v_string = p;
|
rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), false);
|
||||||
|
|
||||||
while (*p != NUL) {
|
|
||||||
int l;
|
|
||||||
|
|
||||||
if (enc_utf8) {
|
|
||||||
int c, lc;
|
|
||||||
|
|
||||||
c = utf_ptr2char(p);
|
|
||||||
lc = mb_tolower(c);
|
|
||||||
l = utf_ptr2len(p);
|
|
||||||
/* TODO: reallocate string when byte count changes. */
|
|
||||||
if (utf_char2len(lc) == l)
|
|
||||||
utf_char2bytes(lc, p);
|
|
||||||
p += l;
|
|
||||||
} else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
|
|
||||||
p += l; /* skip multi-byte character */
|
|
||||||
else {
|
|
||||||
*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -16823,7 +16801,7 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
|||||||
static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
||||||
{
|
{
|
||||||
rettv->v_type = VAR_STRING;
|
rettv->v_type = VAR_STRING;
|
||||||
rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0]));
|
rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -291,14 +291,15 @@ void vim_strup(char_u *p)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Make given string all upper-case
|
/// Make given string all upper-case or all lower-case
|
||||||
///
|
///
|
||||||
/// Handels multi-byte characters as good as possible.
|
/// Handles multi-byte characters as good as possible.
|
||||||
///
|
///
|
||||||
/// @param[in] orig Input string.
|
/// @param[in] orig Input string.
|
||||||
|
/// @param[in] upper If true make uppercase, otherwise lowercase
|
||||||
///
|
///
|
||||||
/// @return [allocated] upper-cased string.
|
/// @return [allocated] upper-cased string.
|
||||||
char *strup_save(const char *const orig)
|
char *strcase_save(const char *const orig, bool upper)
|
||||||
FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
|
FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
|
||||||
{
|
{
|
||||||
char *res = xstrdup(orig);
|
char *res = xstrdup(orig);
|
||||||
@ -307,33 +308,25 @@ char *strup_save(const char *const orig)
|
|||||||
while (*p != NUL) {
|
while (*p != NUL) {
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
if (enc_utf8) {
|
int c = utf_ptr2char((const char_u *)p);
|
||||||
int c = utf_ptr2char((const char_u *)p);
|
int uc = upper ? mb_toupper(c) : mb_tolower(c);
|
||||||
int uc = mb_toupper(c);
|
|
||||||
|
|
||||||
// Reallocate string when byte count changes. This is rare,
|
// Reallocate string when byte count changes. This is rare,
|
||||||
// thus it's OK to do another malloc()/free().
|
// thus it's OK to do another malloc()/free().
|
||||||
l = utf_ptr2len((const char_u *)p);
|
l = utf_ptr2len((const char_u *)p);
|
||||||
int newl = utf_char2len(uc);
|
int newl = utf_char2len(uc);
|
||||||
if (newl != l) {
|
if (newl != l) {
|
||||||
// TODO(philix): use xrealloc() in strup_save()
|
// TODO(philix): use xrealloc() in strup_save()
|
||||||
char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
|
char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
|
||||||
memcpy(s, res, (size_t)(p - res));
|
memcpy(s, res, (size_t)(p - res));
|
||||||
STRCPY(s + (p - res) + newl, p + l);
|
STRCPY(s + (p - res) + newl, p + l);
|
||||||
p = s + (p - res);
|
p = s + (p - res);
|
||||||
xfree(res);
|
xfree(res);
|
||||||
res = s;
|
res = s;
|
||||||
}
|
|
||||||
|
|
||||||
utf_char2bytes(uc, (char_u *)p);
|
|
||||||
p += newl;
|
|
||||||
} else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) {
|
|
||||||
p += l; // Skip multi-byte character.
|
|
||||||
} else {
|
|
||||||
// note that toupper() can be a macro
|
|
||||||
*p = (char)(uint8_t)TOUPPER_LOC(*p);
|
|
||||||
p++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
utf_char2bytes(uc, (char_u *)p);
|
||||||
|
p += newl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
@ -29,3 +29,147 @@ func Test_setbufvar_options()
|
|||||||
bwipe!
|
bwipe!
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
func Test_tolower()
|
||||||
|
call assert_equal("", tolower(""))
|
||||||
|
|
||||||
|
" Test with all printable ASCII characters.
|
||||||
|
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
|
||||||
|
\ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
|
||||||
|
|
||||||
|
if !has('multi_byte')
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
" Test with a few uppercase diacritics.
|
||||||
|
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
|
||||||
|
call assert_equal("bḃḇ", tolower("BḂḆ"))
|
||||||
|
call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
|
||||||
|
call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
|
||||||
|
call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
|
||||||
|
call assert_equal("fḟ ", tolower("FḞ "))
|
||||||
|
call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
|
||||||
|
call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
|
||||||
|
call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
|
||||||
|
call assert_equal("jĵ", tolower("JĴ"))
|
||||||
|
call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
|
||||||
|
call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
|
||||||
|
call assert_equal("mḿṁ", tolower("MḾṀ"))
|
||||||
|
call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
|
||||||
|
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
|
||||||
|
call assert_equal("pṕṗ", tolower("PṔṖ"))
|
||||||
|
call assert_equal("q", tolower("Q"))
|
||||||
|
call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
|
||||||
|
call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
|
||||||
|
call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
|
||||||
|
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
|
||||||
|
call assert_equal("vṽ", tolower("VṼ"))
|
||||||
|
call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
|
||||||
|
call assert_equal("xẋẍ", tolower("XẊẌ"))
|
||||||
|
call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
|
||||||
|
call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
|
||||||
|
|
||||||
|
" Test with a few lowercase diacritics, which should remain unchanged.
|
||||||
|
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
|
||||||
|
call assert_equal("bḃḇ", tolower("bḃḇ"))
|
||||||
|
call assert_equal("cçćĉċč", tolower("cçćĉċč"))
|
||||||
|
call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
|
||||||
|
call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
|
||||||
|
call assert_equal("fḟ", tolower("fḟ"))
|
||||||
|
call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
|
||||||
|
call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
|
||||||
|
call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
|
||||||
|
call assert_equal("jĵǰ", tolower("jĵǰ"))
|
||||||
|
call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
|
||||||
|
call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
|
||||||
|
call assert_equal("mḿṁ ", tolower("mḿṁ "))
|
||||||
|
call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ"))
|
||||||
|
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
|
||||||
|
call assert_equal("pṕṗ", tolower("pṕṗ"))
|
||||||
|
call assert_equal("q", tolower("q"))
|
||||||
|
call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
|
||||||
|
call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
|
||||||
|
call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
|
||||||
|
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
|
||||||
|
call assert_equal("vṽ", tolower("vṽ"))
|
||||||
|
call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
|
||||||
|
call assert_equal("ẋẍ", tolower("ẋẍ"))
|
||||||
|
call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
|
||||||
|
call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
|
||||||
|
|
||||||
|
" According to https://twitter.com/jifa/status/625776454479970304
|
||||||
|
" Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
|
||||||
|
" in length (2 to 3 bytes) when lowercased. So let's test them.
|
||||||
|
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
func Test_toupper()
|
||||||
|
call assert_equal("", toupper(""))
|
||||||
|
|
||||||
|
" Test with all printable ASCII characters.
|
||||||
|
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
|
||||||
|
\ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
|
||||||
|
|
||||||
|
if !has('multi_byte')
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
" Test with a few lowercase diacritics.
|
||||||
|
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
|
||||||
|
call assert_equal("BḂḆ", toupper("bḃḇ"))
|
||||||
|
call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
|
||||||
|
call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
|
||||||
|
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
|
||||||
|
call assert_equal("FḞ", toupper("fḟ"))
|
||||||
|
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
|
||||||
|
call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
|
||||||
|
call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
|
||||||
|
call assert_equal("JĴǰ", toupper("jĵǰ"))
|
||||||
|
call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
|
||||||
|
call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
|
||||||
|
call assert_equal("MḾṀ ", toupper("mḿṁ "))
|
||||||
|
call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ"))
|
||||||
|
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
|
||||||
|
call assert_equal("PṔṖ", toupper("pṕṗ"))
|
||||||
|
call assert_equal("Q", toupper("q"))
|
||||||
|
call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
|
||||||
|
call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
|
||||||
|
call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
|
||||||
|
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
|
||||||
|
call assert_equal("VṼ", toupper("vṽ"))
|
||||||
|
call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
|
||||||
|
call assert_equal("ẊẌ", toupper("ẋẍ"))
|
||||||
|
call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
|
||||||
|
call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
|
||||||
|
|
||||||
|
" Test that uppercase diacritics, which should remain unchanged.
|
||||||
|
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
|
||||||
|
call assert_equal("BḂḆ", toupper("BḂḆ"))
|
||||||
|
call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
|
||||||
|
call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
|
||||||
|
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
|
||||||
|
call assert_equal("FḞ ", toupper("FḞ "))
|
||||||
|
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
|
||||||
|
call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
|
||||||
|
call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
|
||||||
|
call assert_equal("JĴ", toupper("JĴ"))
|
||||||
|
call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
|
||||||
|
call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
|
||||||
|
call assert_equal("MḾṀ", toupper("MḾṀ"))
|
||||||
|
call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
|
||||||
|
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
|
||||||
|
call assert_equal("PṔṖ", toupper("PṔṖ"))
|
||||||
|
call assert_equal("Q", toupper("Q"))
|
||||||
|
call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
|
||||||
|
call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
|
||||||
|
call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
|
||||||
|
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
|
||||||
|
call assert_equal("VṼ", toupper("VṼ"))
|
||||||
|
call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
|
||||||
|
call assert_equal("XẊẌ", toupper("XẊẌ"))
|
||||||
|
call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
|
||||||
|
call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
|
||||||
|
|
||||||
|
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user