vim-patch:8.2.2813: cannot grep using fuzzy matching

Problem:    Cannot grep using fuzzy matching.
Solution:   Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes vim/vim#8152)
bb01a1ef3a
This commit is contained in:
Sean Dewar 2022-01-01 23:06:09 +00:00
parent 715fbcbb8c
commit ce797e08f5
No known key found for this signature in database
GPG Key ID: 08CC2C83AD41B581
7 changed files with 162 additions and 70 deletions

View File

@ -989,7 +989,7 @@ commands can be combined to create a NewGrep command: >
5.1 using Vim's internal grep 5.1 using Vim's internal grep
*:vim* *:vimgrep* *E682* *E683* *:vim* *:vimgrep* *E682* *E683*
:vim[grep][!] /{pattern}/[g][j] {file} ... :vim[grep][!] /{pattern}/[g][j][f] {file} ...
Search for {pattern} in the files {file} ... and set Search for {pattern} in the files {file} ... and set
the error list to the matches. Files matching the error list to the matches. Files matching
'wildignore' are ignored; files in 'suffixes' are 'wildignore' are ignored; files in 'suffixes' are
@ -1042,20 +1042,20 @@ commands can be combined to create a NewGrep command: >
:vimgrep Error *.c :vimgrep Error *.c
< <
*:lv* *:lvimgrep* *:lv* *:lvimgrep*
:lv[imgrep][!] /{pattern}/[g][j] {file} ... :lv[imgrep][!] /{pattern}/[g][j][f] {file} ...
:lv[imgrep][!] {pattern} {file} ... :lv[imgrep][!] {pattern} {file} ...
Same as ":vimgrep", except the location list for the Same as ":vimgrep", except the location list for the
current window is used instead of the quickfix list. current window is used instead of the quickfix list.
*:vimgrepa* *:vimgrepadd* *:vimgrepa* *:vimgrepadd*
:vimgrepa[dd][!] /{pattern}/[g][j] {file} ... :vimgrepa[dd][!] /{pattern}/[g][j][f] {file} ...
:vimgrepa[dd][!] {pattern} {file} ... :vimgrepa[dd][!] {pattern} {file} ...
Just like ":vimgrep", but instead of making a new list Just like ":vimgrep", but instead of making a new list
of errors the matches are appended to the current of errors the matches are appended to the current
list. list.
*:lvimgrepa* *:lvimgrepadd* *:lvimgrepa* *:lvimgrepadd*
:lvimgrepa[dd][!] /{pattern}/[g][j] {file} ... :lvimgrepa[dd][!] /{pattern}/[g][j][f] {file} ...
:lvimgrepa[dd][!] {pattern} {file} ... :lvimgrepa[dd][!] {pattern} {file} ...
Same as ":vimgrepadd", except the location list for Same as ":vimgrepadd", except the location list for
the current window is used instead of the quickfix the current window is used instead of the quickfix

View File

@ -6141,12 +6141,14 @@ char_u *skip_vimgrep_pat(char_u *p, char_u **s, int *flags)
p++; p++;
// Find the flags // Find the flags
while (*p == 'g' || *p == 'j') { while (*p == 'g' || *p == 'j' || *p == 'f') {
if (flags != NULL) { if (flags != NULL) {
if (*p == 'g') { if (*p == 'g') {
*flags |= VGR_GLOBAL; *flags |= VGR_GLOBAL;
} else { } else if (*p == 'j') {
*flags |= VGR_NOJUMP; *flags |= VGR_NOJUMP;
} else {
*flags |= VGR_FUZZY;
} }
} }
p++; p++;

View File

@ -5194,49 +5194,93 @@ static bool vgr_qflist_valid(win_T *wp, qf_info_T *qi, unsigned qfid, char_u *ti
/// Search for a pattern in all the lines in a buffer and add the matching lines /// Search for a pattern in all the lines in a buffer and add the matching lines
/// to a quickfix list. /// to a quickfix list.
static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, regmmatch_T *regmatch, static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, char_u *spat,
long *tomatch, int duplicate_name, int flags) regmmatch_T *regmatch, long *tomatch, int duplicate_name, int flags)
FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5) FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5, 6)
{ {
bool found_match = false; bool found_match = false;
for (long lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) { for (long lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) {
colnr_T col = 0; colnr_T col = 0;
while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, if (!(flags & VGR_FUZZY)) {
NULL) > 0) { // Regular expression match
// Pass the buffer number so that it gets used even for a while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, NULL) > 0) {
// dummy buffer, unless duplicate_name is set, then the // Pass the buffer number so that it gets used even for a
// buffer will be wiped out below. // dummy buffer, unless duplicate_name is set, then the
if (qf_add_entry(qfl, // buffer will be wiped out below.
NULL, // dir if (qf_add_entry(qfl,
fname, NULL, // dir
NULL, fname,
duplicate_name ? 0 : buf->b_fnum, NULL,
ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, duplicate_name ? 0 : buf->b_fnum,
false), ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, false),
regmatch->startpos[0].lnum + lnum, regmatch->startpos[0].lnum + lnum,
regmatch->endpos[0].lnum + lnum, regmatch->endpos[0].lnum + lnum,
regmatch->startpos[0].col + 1, regmatch->startpos[0].col + 1,
regmatch->endpos[0].col + 1, regmatch->endpos[0].col + 1,
false, // vis_col false, // vis_col
NULL, // search pattern NULL, // search pattern
0, // nr 0, // nr
0, // type 0, // type
true) // valid true) // valid
== QF_FAIL) { == QF_FAIL) {
got_int = true; got_int = true;
break; break;
}
found_match = true;
if (--*tomatch == 0) {
break;
}
if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) {
break;
}
col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col);
if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) {
break;
}
} }
found_match = true; } else {
if (--*tomatch == 0) { const size_t pat_len = STRLEN(spat);
break; char_u *const str = ml_get_buf(buf, lnum, false);
} int score;
if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) { uint32_t matches[MAX_FUZZY_MATCHES];
break; const size_t sz = sizeof(matches) / sizeof(matches[0]);
}
col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col); // Fuzzy string match
if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) { while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) {
break; // Pass the buffer number so that it gets used even for a
// dummy buffer, unless duplicate_name is set, then the
// buffer will be wiped out below.
if (qf_add_entry(qfl,
NULL, // dir
fname,
NULL,
duplicate_name ? 0 : buf->b_fnum,
str,
lnum,
0,
(colnr_T)matches[0] + col + 1,
0,
false, // vis_col
NULL, // search pattern
0, // nr
0, // type
true) // valid
== QF_FAIL) {
got_int = true;
break;
}
found_match = true;
if (--*tomatch == 0) {
break;
}
if ((flags & VGR_GLOBAL) == 0) {
break;
}
col = (colnr_T)matches[pat_len - 1] + col + 1;
if (col > (colnr_T)STRLEN(str)) {
break;
}
} }
} }
line_breakcheck(); line_breakcheck();
@ -5418,8 +5462,7 @@ void ex_vimgrep(exarg_T *eap)
} else { } else {
// Try for a match in all lines of the buffer. // Try for a match in all lines of the buffer.
// For ":1vimgrep" look for first match only. // For ":1vimgrep" look for first match only.
found_match = vgr_match_buflines(qf_get_curlist(qi), found_match = vgr_match_buflines(qf_get_curlist(qi), fname, buf, s, &regmatch, &tomatch,
fname, buf, &regmatch, &tomatch,
duplicate_name, flags); duplicate_name, flags);
if (using_dummy) { if (using_dummy) {

View File

@ -7,6 +7,7 @@
// flags for skip_vimgrep_pat() // flags for skip_vimgrep_pat()
#define VGR_GLOBAL 1 #define VGR_GLOBAL 1
#define VGR_NOJUMP 2 #define VGR_NOJUMP 2
#define VGR_FUZZY 4
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "quickfix.h.generated.h" # include "quickfix.h.generated.h"

View File

@ -48,8 +48,6 @@
#include "nvim/vim.h" #include "nvim/vim.h"
#include "nvim/window.h" #include "nvim/window.h"
typedef uint32_t matchidx_T;
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "search.c.generated.h" # include "search.c.generated.h"
#endif #endif
@ -4843,13 +4841,11 @@ typedef struct {
#define SCORE_NONE -9999 #define SCORE_NONE -9999
#define FUZZY_MATCH_RECURSION_LIMIT 10 #define FUZZY_MATCH_RECURSION_LIMIT 10
/// Maximum number of characters that can be fuzzy matched
#define MAXMATCHES 256
/// Compute a score for a fuzzy matched string. The matching character locations /// Compute a score for a fuzzy matched string. The matching character locations
/// are in 'matches'. /// are in 'matches'.
static int fuzzy_match_compute_score(const char_u *const str, const int strSz, static int fuzzy_match_compute_score(const char_u *const str, const int strSz,
const matchidx_T *const matches, const int numMatches) const uint32_t *const matches, const int numMatches)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
{ {
// Initialize score // Initialize score
@ -4868,10 +4864,10 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz,
// Apply ordering bonuses // Apply ordering bonuses
for (int i = 0; i < numMatches; i++) { for (int i = 0; i < numMatches; i++) {
const matchidx_T currIdx = matches[i]; const uint32_t currIdx = matches[i];
if (i > 0) { if (i > 0) {
const matchidx_T prevIdx = matches[i - 1]; const uint32_t prevIdx = matches[i - 1];
// Sequential // Sequential
if (currIdx == prevIdx + 1) { if (currIdx == prevIdx + 1) {
@ -4887,7 +4883,7 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz,
const char_u *p = str; const char_u *p = str;
int neighbor; int neighbor;
for (matchidx_T sidx = 0; sidx < currIdx; sidx++) { for (uint32_t sidx = 0; sidx < currIdx; sidx++) {
neighbor = utf_ptr2char(p); neighbor = utf_ptr2char(p);
MB_PTR_ADV(p); MB_PTR_ADV(p);
} }
@ -4913,16 +4909,16 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz,
/// Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. /// Perform a recursive search for fuzzy matching 'fuzpat' in 'str'.
/// @return the number of matching characters. /// @return the number of matching characters.
static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchidx_T strIdx, static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, uint32_t strIdx,
int *const outScore, const char_u *const strBegin, int *const outScore, const char_u *const strBegin,
const int strLen, const matchidx_T *const srcMatches, const int strLen, const uint32_t *const srcMatches,
matchidx_T *const matches, const int maxMatches, int nextMatch, uint32_t *const matches, const int maxMatches, int nextMatch,
int *const recursionCount) int *const recursionCount)
FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT
{ {
// Recursion params // Recursion params
bool recursiveMatch = false; bool recursiveMatch = false;
matchidx_T bestRecursiveMatches[MAXMATCHES]; uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES];
int bestRecursiveScore = 0; int bestRecursiveScore = 0;
// Count recursions // Count recursions
@ -4932,7 +4928,7 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi
} }
// Detect end of strings // Detect end of strings
if (*fuzpat == '\0' || *str == '\0') { if (*fuzpat == NUL || *str == NUL) {
return 0; return 0;
} }
@ -4956,7 +4952,7 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi
} }
// Recursive call that "skips" this match // Recursive call that "skips" this match
matchidx_T recursiveMatches[MAXMATCHES]; uint32_t recursiveMatches[MAX_FUZZY_MATCHES];
int recursiveScore = 0; int recursiveScore = 0;
const char_u *const next_char = str + utfc_ptr2len(str); const char_u *const next_char = str + utfc_ptr2len(str);
if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen, if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen,
@ -4965,7 +4961,8 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi
recursionCount)) { recursionCount)) {
// Pick best recursive score // Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) { if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
memcpy(bestRecursiveMatches, recursiveMatches, MAXMATCHES * sizeof(recursiveMatches[0])); memcpy(bestRecursiveMatches, recursiveMatches,
MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0]));
bestRecursiveScore = recursiveScore; bestRecursiveScore = recursiveScore;
} }
recursiveMatch = true; recursiveMatch = true;
@ -5008,13 +5005,13 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi
/// normalized and varies with pattern. /// normalized and varies with pattern.
/// Recursion is limited internally (default=10) to prevent degenerate cases /// Recursion is limited internally (default=10) to prevent degenerate cases
/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). /// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
/// Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES /// Uses char_u for match indices. Therefore patterns are limited to
/// characters. /// MAX_FUZZY_MATCHES characters.
/// ///
/// @return true if 'pat_arg' matches 'str'. Also returns the match score in /// @return true if 'pat_arg' matches 'str'. Also returns the match score in
/// 'outScore' and the matching character positions in 'matches'. /// 'outScore' and the matching character positions in 'matches'.
static bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq, bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq,
int *const outScore, matchidx_T *const matches, const int maxMatches) int *const outScore, uint32_t *const matches, const int maxMatches)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
{ {
const int len = mb_charlen(str); const int len = mb_charlen(str);
@ -5108,7 +5105,7 @@ static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bo
fuzzyItem_T *const ptrs = xcalloc(len, sizeof(fuzzyItem_T)); fuzzyItem_T *const ptrs = xcalloc(len, sizeof(fuzzyItem_T));
long i = 0; long i = 0;
bool found_match = false; bool found_match = false;
matchidx_T matches[MAXMATCHES]; uint32_t matches[MAX_FUZZY_MATCHES];
// For all the string items in items, get the fuzzy matching score // For all the string items in items, get the fuzzy matching score
TV_LIST_ITER(items, li, { TV_LIST_ITER(items, li, {
@ -5250,8 +5247,8 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
// To search a dict, either a callback function or a key can be // To search a dict, either a callback function or a key can be
// specified. // specified.
dict_T *const d = argvars[2].vval.v_dict; dict_T *const d = argvars[2].vval.v_dict;
const dictitem_T *di; const dictitem_T *const di = tv_dict_find(d, "key", -1);
if ((di = tv_dict_find(d, "key", -1)) != NULL) { if (di != NULL) {
if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
|| *di->di_tv.vval.v_string == NUL) { || *di->di_tv.vval.v_string == NUL) {
semsg(_(e_invarg2), tv_get_string(&di->di_tv)); semsg(_(e_invarg2), tv_get_string(&di->di_tv));
@ -5262,7 +5259,7 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
semsg(_(e_invargval), "text_cb"); semsg(_(e_invargval), "text_cb");
return; return;
} }
if ((di = tv_dict_find(d, "matchseq", -1)) != NULL) { if (tv_dict_find(d, "matchseq", -1) != NULL) {
matchseq = true; matchseq = true;
} }
} }

View File

@ -55,6 +55,9 @@
#define SEARCH_STAT_DEF_MAX_COUNT 99 #define SEARCH_STAT_DEF_MAX_COUNT 99
#define SEARCH_STAT_BUF_LEN 12 #define SEARCH_STAT_BUF_LEN 12
/// Maximum number of characters that can be fuzzy matched
#define MAX_FUZZY_MATCHES 256
/// Structure containing offset definition for the last search pattern /// Structure containing offset definition for the last search pattern
/// ///
/// @note Only offset for the last search pattern is used, not for the last /// @note Only offset for the last search pattern is used, not for the last

View File

@ -32,7 +32,7 @@ func s:setup_commands(cchar)
command! -count -nargs=* -bang Xnfile <mods><count>cnfile<bang> <args> command! -count -nargs=* -bang Xnfile <mods><count>cnfile<bang> <args>
command! -nargs=* -bang Xpfile <mods>cpfile<bang> <args> command! -nargs=* -bang Xpfile <mods>cpfile<bang> <args>
command! -nargs=* Xexpr <mods>cexpr <args> command! -nargs=* Xexpr <mods>cexpr <args>
command! -count -nargs=* Xvimgrep <mods> <count>vimgrep <args> command! -count=999 -nargs=* Xvimgrep <mods> <count>vimgrep <args>
command! -nargs=* Xvimgrepadd <mods> vimgrepadd <args> command! -nargs=* Xvimgrepadd <mods> vimgrepadd <args>
command! -nargs=* Xgrep <mods> grep <args> command! -nargs=* Xgrep <mods> grep <args>
command! -nargs=* Xgrepadd <mods> grepadd <args> command! -nargs=* Xgrepadd <mods> grepadd <args>
@ -69,7 +69,7 @@ func s:setup_commands(cchar)
command! -count -nargs=* -bang Xnfile <mods><count>lnfile<bang> <args> command! -count -nargs=* -bang Xnfile <mods><count>lnfile<bang> <args>
command! -nargs=* -bang Xpfile <mods>lpfile<bang> <args> command! -nargs=* -bang Xpfile <mods>lpfile<bang> <args>
command! -nargs=* Xexpr <mods>lexpr <args> command! -nargs=* Xexpr <mods>lexpr <args>
command! -count -nargs=* Xvimgrep <mods> <count>lvimgrep <args> command! -count=999 -nargs=* Xvimgrep <mods> <count>lvimgrep <args>
command! -nargs=* Xvimgrepadd <mods> lvimgrepadd <args> command! -nargs=* Xvimgrepadd <mods> lvimgrepadd <args>
command! -nargs=* Xgrep <mods> lgrep <args> command! -nargs=* Xgrep <mods> lgrep <args>
command! -nargs=* Xgrepadd <mods> lgrepadd <args> command! -nargs=* Xgrepadd <mods> lgrepadd <args>
@ -5028,6 +5028,52 @@ func Test_qfbuf_update()
call Xqfbuf_update('l') call Xqfbuf_update('l')
endfunc endfunc
" Test for the :vimgrep 'f' flag (fuzzy match)
func Xvimgrep_fuzzy_match(cchar)
call s:setup_commands(a:cchar)
Xvimgrep /three one/f Xfile*
let l = g:Xgetlist()
call assert_equal(2, len(l))
call assert_equal(['Xfile1', 1, 9, 'one two three'],
\ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
call assert_equal(['Xfile2', 2, 1, 'three one two'],
\ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
Xvimgrep /the/f Xfile*
let l = g:Xgetlist()
call assert_equal(3, len(l))
call assert_equal(['Xfile1', 1, 9, 'one two three'],
\ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
call assert_equal(['Xfile2', 2, 1, 'three one two'],
\ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
call assert_equal(['Xfile2', 4, 4, 'aaathreeaaa'],
\ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text])
Xvimgrep /aaa/fg Xfile*
let l = g:Xgetlist()
call assert_equal(4, len(l))
call assert_equal(['Xfile1', 2, 1, 'aaaaaa'],
\ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
call assert_equal(['Xfile1', 2, 4, 'aaaaaa'],
\ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
call assert_equal(['Xfile2', 4, 1, 'aaathreeaaa'],
\ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text])
call assert_equal(['Xfile2', 4, 9, 'aaathreeaaa'],
\ [bufname(l[3].bufnr), l[3].lnum, l[3].col, l[3].text])
call assert_fails('Xvimgrep /xyz/fg Xfile*', 'E480:')
endfunc
func Test_vimgrep_fuzzy_match()
call writefile(['one two three', 'aaaaaa'], 'Xfile1')
call writefile(['one', 'three one two', 'two', 'aaathreeaaa'], 'Xfile2')
call Xvimgrep_fuzzy_match('c')
call Xvimgrep_fuzzy_match('l')
call delete('Xfile1')
call delete('Xfile2')
endfunc
" Test for getting a specific item from a quickfix list " Test for getting a specific item from a quickfix list
func Xtest_getqflist_by_idx(cchar) func Xtest_getqflist_by_idx(cchar)
call s:setup_commands(a:cchar) call s:setup_commands(a:cchar)