vim-patch:8.2.1933: cannot sort using locale ordering

Problem:    Cannot sort using locale ordering.
Solution:   Add a flag for :sort and sort() to use the locale. (Dominique
            Pellé, closes vim/vim#7237)
55e29611d2
This commit is contained in:
Sean Dewar 2021-04-19 19:57:19 +01:00
parent 1d72b6e4cd
commit 6a0b8cbd81
No known key found for this signature in database
GPG Key ID: 08CC2C83AD41B581
5 changed files with 109 additions and 11 deletions

View File

@ -1742,7 +1742,7 @@ Vim has a sorting function and a sorting command. The sorting function can be
found here: |sort()|, |uniq()|.
*:sor* *:sort*
:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
Sort lines in [range]. When no range is given all
lines are sorted.
@ -1750,6 +1750,14 @@ found here: |sort()|, |uniq()|.
With [i] case is ignored.
With [l] sort uses the current locale. See
`language collate` to check or set the locale used
for ordering. For example, with "en_US.UTF8",
Ö will be ordered after O and before P,
whereas with the Swedish locale "sv_SE.UTF8",
it will be after Z.
Case is typically ignored by the locale.
Options [n][f][x][o][b] are mutually exclusive.
With [n] sorting is done on the first decimal number
@ -1816,8 +1824,7 @@ found here: |sort()|, |uniq()|.
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
quite useless.
The details about sorting depend on the library function used. There is no
guarantee that sorting obeys the current locale. You will have to try it out.
`:sort` does not use the current locale unless the l flag is used.
Vim does do a "stable" sort.
The sorting can be interrupted, but if you interrupt it too late in the

View File

@ -8359,6 +8359,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702*
When {func} is given and it is '1' or 'i' then case is
ignored.
When {func} is given and it is 'l' then the current locale
is used for ordering. See `language collate` to check or set
the locale used for ordering. For example, with "en_US.UTF8",
Ö will be ordered after O and before P, whereas with the
Swedish locale "sv_SE.UTF8", it will be after Z.
Case is typically ignored by the locale.
When {func} is given and it is 'n' then all items will be
sorted numerical (Implementation detail: This uses the
strtod() function to parse numbers, Strings, Lists, Dicts and

View File

@ -9166,6 +9166,7 @@ static void f_sockconnect(typval_T *argvars, typval_T *rettv, FunPtr fptr)
/// struct storing information about current sort
typedef struct {
int item_compare_ic;
bool item_compare_lc;
bool item_compare_numeric;
bool item_compare_numbers;
bool item_compare_float;
@ -9240,10 +9241,10 @@ static int item_compare(const void *s1, const void *s2, bool keep_zero)
p2 = "";
}
if (!sortinfo->item_compare_numeric) {
if (sortinfo->item_compare_ic) {
res = STRICMP(p1, p2);
if (sortinfo->item_compare_lc) {
res = strcoll(p1, p2);
} else {
res = STRCMP(p1, p2);
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
}
} else {
double n1, n2;
@ -9378,6 +9379,7 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
}
info.item_compare_ic = false;
info.item_compare_lc = false;
info.item_compare_numeric = false;
info.item_compare_numbers = false;
info.item_compare_float = false;
@ -9422,6 +9424,9 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
} else if (strcmp(info.item_compare_func, "i") == 0) {
info.item_compare_func = NULL;
info.item_compare_ic = true;
} else if (strcmp(info.item_compare_func, "l") == 0) {
info.item_compare_func = NULL;
info.item_compare_lc = true;
}
}
}

View File

@ -358,6 +358,7 @@ static int linelen(int *has_tab)
static char_u *sortbuf1;
static char_u *sortbuf2;
static int sort_lc; ///< sort using locale
static int sort_ic; ///< ignore case
static int sort_nr; ///< sort on number
static int sort_rx; ///< sort on regex instead of skipping it
@ -381,6 +382,13 @@ typedef struct {
} st_u;
} sorti_T;
static int string_compare(const void *s1, const void *s2) FUNC_ATTR_NONNULL_ALL
{
if (sort_lc) {
return strcoll((char *)s1, (char *)s2);
}
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
}
static int sort_compare(const void *s1, const void *s2)
{
@ -424,8 +432,7 @@ static int sort_compare(const void *s1, const void *s2)
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = NUL;
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
: STRCMP(sortbuf1, sortbuf2);
result = string_compare(sortbuf1, sortbuf2);
}
/* If two lines have the same value, preserve the original line order. */
@ -466,7 +473,7 @@ void ex_sort(exarg_T *eap)
regmatch.regprog = NULL;
sorti_T *nrs = xmalloc(count * sizeof(sorti_T));
sort_abort = sort_ic = sort_rx = sort_nr = sort_flt = 0;
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = sort_flt = 0;
size_t format_found = 0;
bool change_occurred = false; // Buffer contents changed.
@ -474,6 +481,8 @@ void ex_sort(exarg_T *eap)
if (ascii_iswhite(*p)) {
} else if (*p == 'i') {
sort_ic = true;
} else if (*p == 'l') {
sort_lc = true;
} else if (*p == 'r') {
sort_rx = true;
} else if (*p == 'n') {
@ -645,8 +654,7 @@ void ex_sort(exarg_T *eap)
s = ml_get(get_lnum);
size_t bytelen = STRLEN(s) + 1; // include EOL in bytelen
old_count += bytelen;
if (!unique || i == 0
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0) {
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0) {
// Copy the line into a buffer, it may become invalid in
// ml_append(). And it's needed for "unique".
STRCPY(sortbuf1, s);

View File

@ -13,6 +13,25 @@ func Test_sort_strings()
" numbers compared as strings
call assert_equal([1, 2, 3], sort([3, 2, 1]))
call assert_equal([13, 28, 3], sort([3, 28, 13]))
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
let lc = execute('language collate')
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
" ... whereas with a Swedish locale, the accentuated letters are ordered
" after Z.
elseif lc =~? '"sv.*utf-\?8"'
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
endif
endfunc
func Test_sort_numeric()
@ -1223,6 +1242,58 @@ func Test_sort_cmd()
\ },
\ ]
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
let lc = execute('language collate')
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
let tests += [
\ {
\ 'name' : 'sort with locale',
\ 'cmd' : '%sort l',
\ 'input' : [
\ 'A',
\ 'E',
\ 'O',
\ 'À',
\ 'È',
\ 'É',
\ 'Ô',
\ 'Œ',
\ 'Z',
\ 'a',
\ 'e',
\ 'o',
\ 'à',
\ 'è',
\ 'é',
\ 'ô',
\ 'œ',
\ 'z'
\ ],
\ 'expected' : [
\ 'a',
\ 'A',
\ 'à',
\ 'À',
\ 'e',
\ 'E',
\ 'é',
\ 'É',
\ 'è',
\ 'È',
\ 'o',
\ 'O',
\ 'ô',
\ 'Ô',
\ 'œ',
\ 'Œ',
\ 'z',
\ 'Z'
\ ]
\ },
\ ]
endif
for t in tests
enew!
call append(0, t.input)