mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
vim-patch:8.2.1933: cannot sort using locale ordering
Problem: Cannot sort using locale ordering.
Solution: Add a flag for :sort and sort() to use the locale. (Dominique
Pellé, closes vim/vim#7237)
55e29611d2
This commit is contained in:
parent
1d72b6e4cd
commit
6a0b8cbd81
@ -1742,7 +1742,7 @@ Vim has a sorting function and a sorting command. The sorting function can be
|
|||||||
found here: |sort()|, |uniq()|.
|
found here: |sort()|, |uniq()|.
|
||||||
|
|
||||||
*:sor* *:sort*
|
*:sor* *:sort*
|
||||||
:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
|
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
|
||||||
Sort lines in [range]. When no range is given all
|
Sort lines in [range]. When no range is given all
|
||||||
lines are sorted.
|
lines are sorted.
|
||||||
|
|
||||||
@ -1750,6 +1750,14 @@ found here: |sort()|, |uniq()|.
|
|||||||
|
|
||||||
With [i] case is ignored.
|
With [i] case is ignored.
|
||||||
|
|
||||||
|
With [l] sort uses the current locale. See
|
||||||
|
`language collate` to check or set the locale used
|
||||||
|
for ordering. For example, with "en_US.UTF8",
|
||||||
|
Ö will be ordered after O and before P,
|
||||||
|
whereas with the Swedish locale "sv_SE.UTF8",
|
||||||
|
it will be after Z.
|
||||||
|
Case is typically ignored by the locale.
|
||||||
|
|
||||||
Options [n][f][x][o][b] are mutually exclusive.
|
Options [n][f][x][o][b] are mutually exclusive.
|
||||||
|
|
||||||
With [n] sorting is done on the first decimal number
|
With [n] sorting is done on the first decimal number
|
||||||
@ -1816,8 +1824,7 @@ found here: |sort()|, |uniq()|.
|
|||||||
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
|
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
|
||||||
quite useless.
|
quite useless.
|
||||||
|
|
||||||
The details about sorting depend on the library function used. There is no
|
`:sort` does not use the current locale unless the l flag is used.
|
||||||
guarantee that sorting obeys the current locale. You will have to try it out.
|
|
||||||
Vim does do a "stable" sort.
|
Vim does do a "stable" sort.
|
||||||
|
|
||||||
The sorting can be interrupted, but if you interrupt it too late in the
|
The sorting can be interrupted, but if you interrupt it too late in the
|
||||||
|
@ -8359,6 +8359,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702*
|
|||||||
When {func} is given and it is '1' or 'i' then case is
|
When {func} is given and it is '1' or 'i' then case is
|
||||||
ignored.
|
ignored.
|
||||||
|
|
||||||
|
When {func} is given and it is 'l' then the current locale
|
||||||
|
is used for ordering. See `language collate` to check or set
|
||||||
|
the locale used for ordering. For example, with "en_US.UTF8",
|
||||||
|
Ö will be ordered after O and before P, whereas with the
|
||||||
|
Swedish locale "sv_SE.UTF8", it will be after Z.
|
||||||
|
Case is typically ignored by the locale.
|
||||||
|
|
||||||
When {func} is given and it is 'n' then all items will be
|
When {func} is given and it is 'n' then all items will be
|
||||||
sorted numerical (Implementation detail: This uses the
|
sorted numerical (Implementation detail: This uses the
|
||||||
strtod() function to parse numbers, Strings, Lists, Dicts and
|
strtod() function to parse numbers, Strings, Lists, Dicts and
|
||||||
|
@ -9166,6 +9166,7 @@ static void f_sockconnect(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
|||||||
/// struct storing information about current sort
|
/// struct storing information about current sort
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int item_compare_ic;
|
int item_compare_ic;
|
||||||
|
bool item_compare_lc;
|
||||||
bool item_compare_numeric;
|
bool item_compare_numeric;
|
||||||
bool item_compare_numbers;
|
bool item_compare_numbers;
|
||||||
bool item_compare_float;
|
bool item_compare_float;
|
||||||
@ -9240,10 +9241,10 @@ static int item_compare(const void *s1, const void *s2, bool keep_zero)
|
|||||||
p2 = "";
|
p2 = "";
|
||||||
}
|
}
|
||||||
if (!sortinfo->item_compare_numeric) {
|
if (!sortinfo->item_compare_numeric) {
|
||||||
if (sortinfo->item_compare_ic) {
|
if (sortinfo->item_compare_lc) {
|
||||||
res = STRICMP(p1, p2);
|
res = strcoll(p1, p2);
|
||||||
} else {
|
} else {
|
||||||
res = STRCMP(p1, p2);
|
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
double n1, n2;
|
double n1, n2;
|
||||||
@ -9378,6 +9379,7 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
|
|||||||
}
|
}
|
||||||
|
|
||||||
info.item_compare_ic = false;
|
info.item_compare_ic = false;
|
||||||
|
info.item_compare_lc = false;
|
||||||
info.item_compare_numeric = false;
|
info.item_compare_numeric = false;
|
||||||
info.item_compare_numbers = false;
|
info.item_compare_numbers = false;
|
||||||
info.item_compare_float = false;
|
info.item_compare_float = false;
|
||||||
@ -9422,6 +9424,9 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
|
|||||||
} else if (strcmp(info.item_compare_func, "i") == 0) {
|
} else if (strcmp(info.item_compare_func, "i") == 0) {
|
||||||
info.item_compare_func = NULL;
|
info.item_compare_func = NULL;
|
||||||
info.item_compare_ic = true;
|
info.item_compare_ic = true;
|
||||||
|
} else if (strcmp(info.item_compare_func, "l") == 0) {
|
||||||
|
info.item_compare_func = NULL;
|
||||||
|
info.item_compare_lc = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -358,6 +358,7 @@ static int linelen(int *has_tab)
|
|||||||
static char_u *sortbuf1;
|
static char_u *sortbuf1;
|
||||||
static char_u *sortbuf2;
|
static char_u *sortbuf2;
|
||||||
|
|
||||||
|
static int sort_lc; ///< sort using locale
|
||||||
static int sort_ic; ///< ignore case
|
static int sort_ic; ///< ignore case
|
||||||
static int sort_nr; ///< sort on number
|
static int sort_nr; ///< sort on number
|
||||||
static int sort_rx; ///< sort on regex instead of skipping it
|
static int sort_rx; ///< sort on regex instead of skipping it
|
||||||
@ -381,6 +382,13 @@ typedef struct {
|
|||||||
} st_u;
|
} st_u;
|
||||||
} sorti_T;
|
} sorti_T;
|
||||||
|
|
||||||
|
static int string_compare(const void *s1, const void *s2) FUNC_ATTR_NONNULL_ALL
|
||||||
|
{
|
||||||
|
if (sort_lc) {
|
||||||
|
return strcoll((char *)s1, (char *)s2);
|
||||||
|
}
|
||||||
|
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
|
||||||
|
}
|
||||||
|
|
||||||
static int sort_compare(const void *s1, const void *s2)
|
static int sort_compare(const void *s1, const void *s2)
|
||||||
{
|
{
|
||||||
@ -424,8 +432,7 @@ static int sort_compare(const void *s1, const void *s2)
|
|||||||
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
|
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
|
||||||
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = NUL;
|
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = NUL;
|
||||||
|
|
||||||
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
|
result = string_compare(sortbuf1, sortbuf2);
|
||||||
: STRCMP(sortbuf1, sortbuf2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If two lines have the same value, preserve the original line order. */
|
/* If two lines have the same value, preserve the original line order. */
|
||||||
@ -466,7 +473,7 @@ void ex_sort(exarg_T *eap)
|
|||||||
regmatch.regprog = NULL;
|
regmatch.regprog = NULL;
|
||||||
sorti_T *nrs = xmalloc(count * sizeof(sorti_T));
|
sorti_T *nrs = xmalloc(count * sizeof(sorti_T));
|
||||||
|
|
||||||
sort_abort = sort_ic = sort_rx = sort_nr = sort_flt = 0;
|
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = sort_flt = 0;
|
||||||
size_t format_found = 0;
|
size_t format_found = 0;
|
||||||
bool change_occurred = false; // Buffer contents changed.
|
bool change_occurred = false; // Buffer contents changed.
|
||||||
|
|
||||||
@ -474,6 +481,8 @@ void ex_sort(exarg_T *eap)
|
|||||||
if (ascii_iswhite(*p)) {
|
if (ascii_iswhite(*p)) {
|
||||||
} else if (*p == 'i') {
|
} else if (*p == 'i') {
|
||||||
sort_ic = true;
|
sort_ic = true;
|
||||||
|
} else if (*p == 'l') {
|
||||||
|
sort_lc = true;
|
||||||
} else if (*p == 'r') {
|
} else if (*p == 'r') {
|
||||||
sort_rx = true;
|
sort_rx = true;
|
||||||
} else if (*p == 'n') {
|
} else if (*p == 'n') {
|
||||||
@ -645,8 +654,7 @@ void ex_sort(exarg_T *eap)
|
|||||||
s = ml_get(get_lnum);
|
s = ml_get(get_lnum);
|
||||||
size_t bytelen = STRLEN(s) + 1; // include EOL in bytelen
|
size_t bytelen = STRLEN(s) + 1; // include EOL in bytelen
|
||||||
old_count += bytelen;
|
old_count += bytelen;
|
||||||
if (!unique || i == 0
|
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0) {
|
||||||
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0) {
|
|
||||||
// Copy the line into a buffer, it may become invalid in
|
// Copy the line into a buffer, it may become invalid in
|
||||||
// ml_append(). And it's needed for "unique".
|
// ml_append(). And it's needed for "unique".
|
||||||
STRCPY(sortbuf1, s);
|
STRCPY(sortbuf1, s);
|
||||||
|
@ -13,6 +13,25 @@ func Test_sort_strings()
|
|||||||
" numbers compared as strings
|
" numbers compared as strings
|
||||||
call assert_equal([1, 2, 3], sort([3, 2, 1]))
|
call assert_equal([1, 2, 3], sort([3, 2, 1]))
|
||||||
call assert_equal([13, 28, 3], sort([3, 28, 13]))
|
call assert_equal([13, 28, 3], sort([3, 28, 13]))
|
||||||
|
|
||||||
|
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
|
||||||
|
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
|
||||||
|
|
||||||
|
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
|
||||||
|
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
|
||||||
|
|
||||||
|
let lc = execute('language collate')
|
||||||
|
" With the following locales, the accentuated letters are ordered
|
||||||
|
" similarly to the non-accentuated letters...
|
||||||
|
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
|
||||||
|
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
|
||||||
|
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
|
||||||
|
" ... whereas with a Swedish locale, the accentuated letters are ordered
|
||||||
|
" after Z.
|
||||||
|
elseif lc =~? '"sv.*utf-\?8"'
|
||||||
|
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
|
||||||
|
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
|
||||||
|
endif
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
func Test_sort_numeric()
|
func Test_sort_numeric()
|
||||||
@ -1223,6 +1242,58 @@ func Test_sort_cmd()
|
|||||||
\ },
|
\ },
|
||||||
\ ]
|
\ ]
|
||||||
|
|
||||||
|
" With the following locales, the accentuated letters are ordered
|
||||||
|
" similarly to the non-accentuated letters...
|
||||||
|
let lc = execute('language collate')
|
||||||
|
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
|
||||||
|
let tests += [
|
||||||
|
\ {
|
||||||
|
\ 'name' : 'sort with locale',
|
||||||
|
\ 'cmd' : '%sort l',
|
||||||
|
\ 'input' : [
|
||||||
|
\ 'A',
|
||||||
|
\ 'E',
|
||||||
|
\ 'O',
|
||||||
|
\ 'À',
|
||||||
|
\ 'È',
|
||||||
|
\ 'É',
|
||||||
|
\ 'Ô',
|
||||||
|
\ 'Œ',
|
||||||
|
\ 'Z',
|
||||||
|
\ 'a',
|
||||||
|
\ 'e',
|
||||||
|
\ 'o',
|
||||||
|
\ 'à',
|
||||||
|
\ 'è',
|
||||||
|
\ 'é',
|
||||||
|
\ 'ô',
|
||||||
|
\ 'œ',
|
||||||
|
\ 'z'
|
||||||
|
\ ],
|
||||||
|
\ 'expected' : [
|
||||||
|
\ 'a',
|
||||||
|
\ 'A',
|
||||||
|
\ 'à',
|
||||||
|
\ 'À',
|
||||||
|
\ 'e',
|
||||||
|
\ 'E',
|
||||||
|
\ 'é',
|
||||||
|
\ 'É',
|
||||||
|
\ 'è',
|
||||||
|
\ 'È',
|
||||||
|
\ 'o',
|
||||||
|
\ 'O',
|
||||||
|
\ 'ô',
|
||||||
|
\ 'Ô',
|
||||||
|
\ 'œ',
|
||||||
|
\ 'Œ',
|
||||||
|
\ 'z',
|
||||||
|
\ 'Z'
|
||||||
|
\ ]
|
||||||
|
\ },
|
||||||
|
\ ]
|
||||||
|
endif
|
||||||
|
|
||||||
for t in tests
|
for t in tests
|
||||||
enew!
|
enew!
|
||||||
call append(0, t.input)
|
call append(0, t.input)
|
||||||
|
Loading…
Reference in New Issue
Block a user