From fba00b5e7ef2b6903a4588a2c080d8b33a8a2b68 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 14:58:32 +0000 Subject: [PATCH 1/8] vim-patch:8.2.1665: cannot do fuzzy string matching Problem: Cannot do fuzzy string matching. Solution: Add matchfuzzy(). (Yegappan Lakshmanan, closes vim/vim#6932) https://github.com/vim/vim/commit/635414dd2f3ae7d4d972d79b806348a6516cb91a Adjust Test_matchfuzzy's 2nd assert to expect the last error thrown, as v8.2.1183 hasn't been ported yet (to be honest, the error message is kinda weird if the 2nd argument is not convertible to string). We can still port this fully as porting v8.2.1183 would require removing this change to pass CI. --- runtime/doc/builtin.txt | 22 ++ runtime/doc/usr_41.txt | 1 + src/nvim/eval.lua | 1 + src/nvim/search.c | 298 ++++++++++++++++++++++++++++ src/nvim/testdir/test_functions.vim | 26 +++ 5 files changed, 348 insertions(+) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 56bc8bfb3e..198ef25ed9 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -296,6 +296,7 @@ matcharg({nr}) List arguments of |:match| matchdelete({id} [, {win}]) Number delete match identified by {id} matchend({expr}, {pat}[, {start}[, {count}]]) Number position where {pat} ends in {expr} +matchfuzzy({list}, {str}) List fuzzy match {str} in {list} matchlist({expr}, {pat}[, {start}[, {count}]]) List match and submatches of {pat} in {expr} matchstr({expr}, {pat}[, {start}[, {count}]]) @@ -4857,6 +4858,27 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()* Can also be used as a |method|: > GetText()->matchend('word') +matchfuzzy({list}, {str}) *matchfuzzy()* + Returns a list with all the strings in {list} that fuzzy + match {str}. The strings in the returned list are sorted + based on the matching score. {str} is treated as a literal + string and regular expression matching is NOT supported. + The maximum supported {str} length is 256. + + If there are no matching strings or there is an error, then an + empty list is returned. If length of {str} is greater than + 256, then returns an empty list. + + Example: > + :echo matchfuzzy(["clay", "crow"], "cay") +< results in ["clay"]. > + :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") +< results in a list of buffer names fuzzy matching "ndl". > + :echo v:oldfiles->matchfuzzy("test") +< results in a list of file names fuzzy matching "test". > + :let l = readfile("buffer.c")->matchfuzzy("str") +< results in a list of lines in "buffer.c" fuzzy matching "str". + matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* Same as |match()|, but return a |List|. The first item in the list is the matched string, same as what matchstr() would diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index bf29c94d51..19f58cddd6 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -608,6 +608,7 @@ String manipulation: *string-functions* toupper() turn a string to uppercase match() position where a pattern matches in a string matchend() position where a pattern match ends in a string + matchfuzzy() fuzzy matches a string in a list of strings matchstr() match of a pattern in a string matchstrpos() match and positions of a pattern in a string matchlist() like matchstr() and also return submatches diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index eedc8ac45d..37b051222e 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -249,6 +249,7 @@ return { matcharg={args=1, base=1}, matchdelete={args={1, 2}, base=1}, matchend={args={2, 4}, base=1}, + matchfuzzy={args=2, base=1}, matchlist={args={2, 4}, base=1}, matchstr={args={2, 4}, base=1}, matchstrpos={args={2,4}, base=1}, diff --git a/src/nvim/search.c b/src/nvim/search.c index 93180f97fe..7bc994fad3 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -4764,6 +4764,304 @@ the_end: restore_last_search_pattern(); } +/// Fuzzy string matching +/// +/// Ported from the lib_fts library authored by Forrest Smith. +/// https://github.com/forrestthewoods/lib_fts/tree/master/code +/// +/// Blog describing the algorithm: +/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ +/// +/// Each matching string is assigned a score. The following factors are checked: +/// Matched letter +/// Unmatched letter +/// Consecutively matched letters +/// Proximity to start +/// Letter following a separator (space, underscore) +/// Uppercase letter following lowercase (aka CamelCase) +/// +/// Matched letters are good. Unmatched letters are bad. Matching near the start +/// is good. Matching the first letter in the middle of a phrase is good. +/// Matching the uppercase letters in camel case entries is good. +/// +/// The score assigned for each factor is explained below. +/// File paths are different from file names. File extensions may be ignorable. +/// Single words care about consecutive matches but not separators or camel +/// case. +/// Score starts at 0 +/// Matched letter: +0 points +/// Unmatched letter: -1 point +/// Consecutive match bonus: +5 points +/// Separator bonus: +10 points +/// Camel case bonus: +10 points +/// Unmatched leading letter: -3 points (max: -9) +/// +/// There is some nuance to this. Scores don’t have an intrinsic meaning. The +/// score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a +/// lower minimum score due to unmatched letter penalty. Longer search patterns +/// have a higher maximum score due to match bonuses. +/// +/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth +/// quite a bit. +/// +/// There is a penalty if you DON’T match the first three letters. Which +/// effectively rewards matching near the start. However there’s no difference +/// in matching between the middle and end. +/// +/// There is not an explicit bonus for an exact match. Unmatched letters receive +/// a penalty. So shorter strings and closer matches are worth more. +typedef struct { + const listitem_T *item; + int score; +} fuzzyItem_T; + +static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, int *const outScore, + const char_u *const strBegin, const char_u *const srcMatches, + char_u *const matches, const int maxMatches, int nextMatch, + int *const recursionCount, const int recursionLimit) + FUNC_ATTR_NONNULL_ARG(1, 2, 3, 4, 6, 9) FUNC_ATTR_WARN_UNUSED_RESULT +{ + // Recursion params + bool recursiveMatch = false; + char_u bestRecursiveMatches[256]; + int bestRecursiveScore = 0; + + // Count recursions + (*recursionCount)++; + if (*recursionCount >= recursionLimit) { + return false; + } + + // Detect end of strings + if (*fuzpat == '\0' || *str == '\0') { + return false; + } + + // Loop through fuzpat and str looking for a match + bool first_match = true; + while (*fuzpat != '\0' && *str != '\0') { + // Found match + if (mb_tolower(*fuzpat) == mb_tolower(*str)) { + // Supplied matches buffer was too short + if (nextMatch >= maxMatches) { + return false; + } + + // "Copy-on-Write" srcMatches into matches + if (first_match && srcMatches) { + memcpy(matches, srcMatches, nextMatch); + first_match = false; + } + + // Recursive call that "skips" this match + char_u recursiveMatches[256]; + int recursiveScore = 0; + if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, strBegin, matches, + recursiveMatches, sizeof(recursiveMatches), nextMatch, + recursionCount, recursionLimit)) { + // Pick best recursive score + if (!recursiveMatch || recursiveScore > bestRecursiveScore) { + memcpy(bestRecursiveMatches, recursiveMatches, 256); + bestRecursiveScore = recursiveScore; + } + recursiveMatch = true; + } + + // Advance + matches[nextMatch++] = (char_u)(str - strBegin); + fuzpat++; + } + str++; + } + + // Determine if full fuzpat was matched + const bool matched = *fuzpat == '\0'; + + // Calculate score + if (matched) { + // bonus for adjacent matches + const int sequential_bonus = 15; + // bonus if match occurs after a separator + const int separator_bonus = 30; + // bonus if match is uppercase and prev is lower + const int camel_bonus = 30; + // bonus if the first letter is matched + const int first_letter_bonus = 15; + // penalty applied for every letter in str before the first match + const int leading_letter_penalty = -5; + // maximum penalty for leading letters + const int max_leading_letter_penalty = -15; + // penalty for every letter that doesn't matter + const int unmatched_letter_penalty = -1; + + // Iterate str to end + while (*str != '\0') { + str++; + } + + // Initialize score + *outScore = 100; + + // Apply leading letter penalty + int penalty = leading_letter_penalty * matches[0]; + if (penalty < max_leading_letter_penalty) { + penalty = max_leading_letter_penalty; + } + *outScore += penalty; + + // Apply unmatched penalty + const int unmatched = (int)(str - strBegin) - nextMatch; + *outScore += unmatched_letter_penalty * unmatched; + + // Apply ordering bonuses + for (int i = 0; i < nextMatch; i++) { + const char_u currIdx = matches[i]; + + if (i > 0) { + const char_u prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == (prevIdx + 1)) { + *outScore += sequential_bonus; + } + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) { + // Camel case + const char_u neighbor = strBegin[currIdx - 1]; + const char_u curr = strBegin[currIdx]; + + if (islower(neighbor) && isupper(curr)) { + *outScore += camel_bonus; + } + + // Separator + const bool neighborSeparator = neighbor == '_' || neighbor == ' '; + if (neighborSeparator) { + *outScore += separator_bonus; + } + } else { + // First letter + *outScore += first_letter_bonus; + } + } + } + + // Return best result + if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) { + // Recursive score is better than "this" + memcpy(matches, bestRecursiveMatches, maxMatches); + *outScore = bestRecursiveScore; + return true; + } else if (matched) { + return true; // "this" score is better than recursive + } + + return false; // no match +} + +/// fuzzy_match() +/// +/// Performs exhaustive search via recursion to find all possible matches and +/// match with highest score. +/// Scores values have no intrinsic meaning. Possible score range is not +/// normalized and varies with pattern. +/// Recursion is limited internally (default=10) to prevent degenerate cases +/// (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). +/// Uses char_u for match indices. Therefore patterns are limited to 256 +/// characters. +/// +/// Returns true if fuzpat is found AND calculates a score. +static bool fuzzy_match(const char_u *const str, const char_u *const fuzpat, int *const outScore) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ + char_u matches[256]; + int recursionCount = 0; + int recursionLimit = 10; + + *outScore = 0; + + return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, sizeof(matches), 0, + &recursionCount, recursionLimit); +} + +/// Sort the fuzzy matches in the descending order of the match score. +static int fuzzy_item_compare(const void *const s1, const void *const s2) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE +{ + const int v1 = ((const fuzzyItem_T *)s1)->score; + const int v2 = ((const fuzzyItem_T *)s2)->score; + + return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; +} + +/// Fuzzy search the string 'str' in 'strlist' and return the matching strings +/// in 'fmatchlist'. +static void match_fuzzy(const list_T *const strlist, const char_u *const str, + list_T *const fmatchlist) + FUNC_ATTR_NONNULL_ARG(2, 3) +{ + const long len = tv_list_len(strlist); + if (len == 0) { + return; + } + + fuzzyItem_T *const ptrs = xmalloc(sizeof(fuzzyItem_T) * len); + long i = 0; + bool found_match = false; + + // For all the string items in strlist, get the fuzzy matching score + TV_LIST_ITER_CONST(strlist, li, { + ptrs[i].item = li; + ptrs[i].score = -9999; + // ignore non-string items in the list + const typval_T *const tv = TV_LIST_ITEM_TV(li); + if (tv->v_type == VAR_STRING && tv->vval.v_string != NULL) { + int score; + if (fuzzy_match(tv->vval.v_string, str, &score)) { + ptrs[i].score = score; + found_match = true; + } + } + i++; + }); + + if (found_match) { + // Sort the list by the descending order of the match score + qsort(ptrs, (size_t)len, sizeof(fuzzyItem_T), fuzzy_item_compare); + + // Copy the matching strings with 'score != -9999' to the return list + for (i = 0; i < len; i++) { + if (ptrs[i].score == -9999) { + break; + } + const typval_T *const tv = TV_LIST_ITEM_TV(ptrs[i].item); + tv_list_append_string(fmatchlist, (const char *)tv->vval.v_string, -1); + } + } + + xfree(ptrs); +} + +/// "matchfuzzy()" function +void f_matchfuzzy(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + if (argvars[0].v_type != VAR_LIST) { + emsg(_(e_listreq)); + return; + } + if (argvars[0].vval.v_list == NULL) { + return; + } + if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { + semsg(_(e_invarg2), tv_get_string(&argvars[1])); + return; + } + match_fuzzy(argvars[0].vval.v_list, (const char_u *)tv_get_string(&argvars[1]), + tv_list_alloc_ret(rettv, kListLenUnknown)); +} + /// Find identifiers or defines in included files. /// If p_ic && (compl_cont_status & CONT_SOL) then ptr must be in lowercase. /// diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index 438bed51c6..57ae0d3020 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -1731,6 +1731,32 @@ func Test_nr2char() call assert_equal("\x80\xfc\b\xfd\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX", eval('"\"')) endfunc +" Test for matchfuzzy() +func Test_matchfuzzy() + call assert_fails('call matchfuzzy(10, "abc")', 'E714:') + " Needs v8.2.1183. + " call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') + call assert_fails('call matchfuzzy(["abc"], [])', 'E475:') + call assert_equal([], matchfuzzy([], 'abc')) + call assert_equal([], matchfuzzy(['abc'], '')) + call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) + call assert_equal([], matchfuzzy([10, 20], 'ac')) + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) + call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) + call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) + call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) + call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + + %bw! + eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) + let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') + call assert_equal(1, len(l)) + call assert_match('needle', l[0]) +endfunc + " Test for getcurpos() and setpos() func Test_getcurpos_setpos() new From 960ea01972ad5fd291a846dce67f96a95222c310 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 16:40:28 +0000 Subject: [PATCH 2/8] vim-patch:8.2.1726: fuzzy matching only works on strings Problem: Fuzzy matching only works on strings. Solution: Support passing a dict. Add matchfuzzypos() to also get the match positions. (Yegappan Lakshmanan, closes vim/vim#6947) https://github.com/vim/vim/commit/4f73b8e9cc83f647b34002554a8bdf9abec0a82f Also remove some N/A and seemingly useless NULL checks -- Nvim allocs can't return NULL. I'm not sure why the retmatchpos stuff in match_fuzzy checks for NULL too, given that Vim checks for NULL alloc in do_fuzzymatch; assert that the li stuff is not NULL as that's the one check I'm ever-so-slightly unsure about. Adjust tests. Note that the text_cb tests actually throw E6000 in Nvim, but we also can't assert that error due to v8.2.1183 not being ported yet. --- runtime/doc/builtin.txt | 57 +++- runtime/doc/usr_41.txt | 1 + src/nvim/eval.lua | 3 +- src/nvim/eval/funcs.c | 1 - src/nvim/globals.h | 1 + src/nvim/search.c | 400 ++++++++++++++++++--------- src/nvim/testdir/test_functions.vim | 26 -- src/nvim/testdir/test_matchfuzzy.vim | 203 ++++++++++++++ 8 files changed, 530 insertions(+), 162 deletions(-) create mode 100644 src/nvim/testdir/test_matchfuzzy.vim diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 198ef25ed9..14cf9f09fc 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -296,7 +296,10 @@ matcharg({nr}) List arguments of |:match| matchdelete({id} [, {win}]) Number delete match identified by {id} matchend({expr}, {pat}[, {start}[, {count}]]) Number position where {pat} ends in {expr} -matchfuzzy({list}, {str}) List fuzzy match {str} in {list} +matchfuzzy({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} +matchfuzzypos({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} matchlist({expr}, {pat}[, {start}[, {count}]]) List match and submatches of {pat} in {expr} matchstr({expr}, {pat}[, {start}[, {count}]]) @@ -4858,12 +4861,25 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()* Can also be used as a |method|: > GetText()->matchend('word') -matchfuzzy({list}, {str}) *matchfuzzy()* - Returns a list with all the strings in {list} that fuzzy - match {str}. The strings in the returned list are sorted - based on the matching score. {str} is treated as a literal - string and regular expression matching is NOT supported. - The maximum supported {str} length is 256. +matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* + If {list} is a list of strings, then returns a list with all + the strings in {list} that fuzzy match {str}. The strings in + the returned list are sorted based on the matching score. + + If {list} is a list of dictionaries, then the optional {dict} + argument supports the following items: + key key of the item which is fuzzy matched against + {str}. The value of this item should be a + string. + text_cb |Funcref| that will be called for every item + in {list} to get the text for fuzzy matching. + This should accept a dictionary item as the + argument and return the text for that item to + use for fuzzy matching. + + {str} is treated as a literal string and regular expression + matching is NOT supported. The maximum supported {str} length + is 256. If there are no matching strings or there is an error, then an empty list is returned. If length of {str} is greater than @@ -4874,11 +4890,38 @@ matchfuzzy({list}, {str}) *matchfuzzy()* < results in ["clay"]. > :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") < results in a list of buffer names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("spl", + \ {'text_cb' : {v -> v.name}}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "spl". > :echo v:oldfiles->matchfuzzy("test") < results in a list of file names fuzzy matching "test". > :let l = readfile("buffer.c")->matchfuzzy("str") < results in a list of lines in "buffer.c" fuzzy matching "str". +matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* + Same as |matchfuzzy()|, but returns the list of matched + strings and the list of character positions where characters + in {str} matches. + + If {str} matches multiple times in a string, then only the + positions for the best match is returned. + + If there are no matching strings or there is an error, then a + list with two empty list items is returned. + + Example: > + :echo matchfuzzypos(['testing'], 'tsg') +< results in [['testing'], [[0, 2, 6]]] > + :echo matchfuzzypos(['clay', 'lacy'], 'la') +< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] > + :echo [{'text': 'hello', 'id' : 10}] + \ ->matchfuzzypos('ll', {'key' : 'text'}) +< results in [{'id': 10, 'text': 'hello'}] [[2, 3]] + matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* Same as |match()|, but return a |List|. The first item in the list is the matched string, same as what matchstr() would diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index 19f58cddd6..bf024315f6 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -609,6 +609,7 @@ String manipulation: *string-functions* match() position where a pattern matches in a string matchend() position where a pattern match ends in a string matchfuzzy() fuzzy matches a string in a list of strings + matchfuzzypos() fuzzy matches a string in a list of strings matchstr() match of a pattern in a string matchstrpos() match and positions of a pattern in a string matchlist() like matchstr() and also return submatches diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index 37b051222e..6dedd0f745 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -249,7 +249,8 @@ return { matcharg={args=1, base=1}, matchdelete={args={1, 2}, base=1}, matchend={args={2, 4}, base=1}, - matchfuzzy={args=2, base=1}, + matchfuzzy={args={2, 3}, base=1}, + matchfuzzypos={args={2, 3}, base=1}, matchlist={args={2, 4}, base=1}, matchstr={args={2, 4}, base=1}, matchstrpos={args={2,4}, base=1}, diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 138745094c..111fae0928 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -98,7 +98,6 @@ PRAGMA_DIAG_POP #endif -static char *e_listarg = N_("E686: Argument of %s must be a List"); static char *e_listblobarg = N_("E899: Argument of %s must be a List or Blob"); static char *e_invalwindow = N_("E957: Invalid window number"); diff --git a/src/nvim/globals.h b/src/nvim/globals.h index 041b60d838..f6fbe98ff0 100644 --- a/src/nvim/globals.h +++ b/src/nvim/globals.h @@ -979,6 +979,7 @@ EXTERN char e_invalidreg[] INIT(= N_("E850: Invalid register name")); EXTERN char e_dirnotf[] INIT(= N_("E919: Directory not found in '%s': \"%s\"")); EXTERN char e_au_recursive[] INIT(= N_("E952: Autocommand caused recursive behavior")); EXTERN char e_autocmd_close[] INIT(= N_("E813: Cannot close autocmd window")); +EXTERN char e_listarg[] INIT(= N_("E686: Argument of %s must be a List")); EXTERN char e_unsupportedoption[] INIT(= N_("E519: Option not supported")); EXTERN char e_fnametoolong[] INIT(= N_("E856: Filename too long")); EXTERN char e_float_as_string[] INIT(= N_("E806: using Float as a String")); diff --git a/src/nvim/search.c b/src/nvim/search.c index 7bc994fad3..8f3e66744f 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -48,6 +48,8 @@ #include "nvim/vim.h" #include "nvim/window.h" +typedef uint32_t matchidx_T; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "search.c.generated.h" #endif @@ -4811,24 +4813,109 @@ the_end: /// There is not an explicit bonus for an exact match. Unmatched letters receive /// a penalty. So shorter strings and closer matches are worth more. typedef struct { - const listitem_T *item; + listitem_T *item; int score; + list_T *lmatchpos; } fuzzyItem_T; -static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, int *const outScore, - const char_u *const strBegin, const char_u *const srcMatches, - char_u *const matches, const int maxMatches, int nextMatch, - int *const recursionCount, const int recursionLimit) - FUNC_ATTR_NONNULL_ARG(1, 2, 3, 4, 6, 9) FUNC_ATTR_WARN_UNUSED_RESULT +/// bonus for adjacent matches +#define SEQUENTIAL_BONUS 15 +/// bonus if match occurs after a separator +#define SEPARATOR_BONUS 30 +/// bonus if match is uppercase and prev is lower +#define CAMEL_BONUS 30 +/// bonus if the first letter is matched +#define FIRST_LETTER_BONUS 15 +/// penalty applied for every letter in str before the first match +#define LEADING_LETTER_PENALTY -5 +/// maximum penalty for leading letters +#define MAX_LEADING_LETTER_PENALTY -15 +/// penalty for every letter that doesn't match +#define UNMATCHED_LETTER_PENALTY -1 +/// Score for a string that doesn't fuzzy match the pattern +#define SCORE_NONE -9999 + +#define FUZZY_MATCH_RECURSION_LIMIT 10 +/// Maximum number of characters that can be fuzzy matched +#define MAXMATCHES 256 + +/// Compute a score for a fuzzy matched string. The matching character locations +/// are in 'matches'. +static int fuzzy_match_compute_score(const char_u *const str, const int strSz, + const matchidx_T *const matches, const int numMatches) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE +{ + // Initialize score + int score = 100; + + // Apply leading letter penalty + int penalty = LEADING_LETTER_PENALTY * matches[0]; + if (penalty < MAX_LEADING_LETTER_PENALTY) { + penalty = MAX_LEADING_LETTER_PENALTY; + } + score += penalty; + + // Apply unmatched penalty + const int unmatched = strSz - numMatches; + score += UNMATCHED_LETTER_PENALTY * unmatched; + + // Apply ordering bonuses + for (int i = 0; i < numMatches; i++) { + const matchidx_T currIdx = matches[i]; + + if (i > 0) { + const matchidx_T prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == prevIdx + 1) { + score += SEQUENTIAL_BONUS; + } + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) { + // Camel case + const char_u *p = str; + int neighbor; + + for (matchidx_T sidx = 0; sidx < currIdx; sidx++) { + neighbor = utf_ptr2char(p); + mb_ptr2char_adv(&p); + } + const int curr = utf_ptr2char(p); + + if (mb_islower(neighbor) && mb_isupper(curr)) { + score += CAMEL_BONUS; + } + + // Separator + const bool neighborSeparator = neighbor == '_' || neighbor == ' '; + if (neighborSeparator) { + score += SEPARATOR_BONUS; + } + } else { + // First letter + score += FIRST_LETTER_BONUS; + } + } + return score; +} + +static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchidx_T strIdx, + int *const outScore, const char_u *const strBegin, + const int strLen, const matchidx_T *const srcMatches, + matchidx_T *const matches, const int maxMatches, int nextMatch, + int *const recursionCount) + FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT { // Recursion params bool recursiveMatch = false; - char_u bestRecursiveMatches[256]; + matchidx_T bestRecursiveMatches[MAXMATCHES]; int bestRecursiveScore = 0; // Count recursions (*recursionCount)++; - if (*recursionCount >= recursionLimit) { + if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) { return false; } @@ -4839,119 +4926,59 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, int * // Loop through fuzpat and str looking for a match bool first_match = true; - while (*fuzpat != '\0' && *str != '\0') { + while (*fuzpat != NUL && *str != NUL) { + const int c1 = utf_ptr2char(fuzpat); + const int c2 = utf_ptr2char(str); + // Found match - if (mb_tolower(*fuzpat) == mb_tolower(*str)) { + if (mb_tolower(c1) == mb_tolower(c2)) { // Supplied matches buffer was too short if (nextMatch >= maxMatches) { return false; } // "Copy-on-Write" srcMatches into matches - if (first_match && srcMatches) { - memcpy(matches, srcMatches, nextMatch); + if (first_match && srcMatches != NULL) { + memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); first_match = false; } // Recursive call that "skips" this match - char_u recursiveMatches[256]; + matchidx_T recursiveMatches[MAXMATCHES]; int recursiveScore = 0; - if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, strBegin, matches, - recursiveMatches, sizeof(recursiveMatches), nextMatch, - recursionCount, recursionLimit)) { + const char_u *const next_char = str + utfc_ptr2len(str); + if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen, + matches, recursiveMatches, + sizeof(recursiveMatches) / sizeof(recursiveMatches[0]), nextMatch, + recursionCount)) { // Pick best recursive score if (!recursiveMatch || recursiveScore > bestRecursiveScore) { - memcpy(bestRecursiveMatches, recursiveMatches, 256); + memcpy(bestRecursiveMatches, recursiveMatches, MAXMATCHES * sizeof(recursiveMatches[0])); bestRecursiveScore = recursiveScore; } recursiveMatch = true; } // Advance - matches[nextMatch++] = (char_u)(str - strBegin); - fuzpat++; + matches[nextMatch++] = strIdx; + mb_ptr2char_adv(&fuzpat); } - str++; + mb_ptr2char_adv(&str); + strIdx++; } // Determine if full fuzpat was matched - const bool matched = *fuzpat == '\0'; + const bool matched = *fuzpat == NUL; // Calculate score if (matched) { - // bonus for adjacent matches - const int sequential_bonus = 15; - // bonus if match occurs after a separator - const int separator_bonus = 30; - // bonus if match is uppercase and prev is lower - const int camel_bonus = 30; - // bonus if the first letter is matched - const int first_letter_bonus = 15; - // penalty applied for every letter in str before the first match - const int leading_letter_penalty = -5; - // maximum penalty for leading letters - const int max_leading_letter_penalty = -15; - // penalty for every letter that doesn't matter - const int unmatched_letter_penalty = -1; - - // Iterate str to end - while (*str != '\0') { - str++; - } - - // Initialize score - *outScore = 100; - - // Apply leading letter penalty - int penalty = leading_letter_penalty * matches[0]; - if (penalty < max_leading_letter_penalty) { - penalty = max_leading_letter_penalty; - } - *outScore += penalty; - - // Apply unmatched penalty - const int unmatched = (int)(str - strBegin) - nextMatch; - *outScore += unmatched_letter_penalty * unmatched; - - // Apply ordering bonuses - for (int i = 0; i < nextMatch; i++) { - const char_u currIdx = matches[i]; - - if (i > 0) { - const char_u prevIdx = matches[i - 1]; - - // Sequential - if (currIdx == (prevIdx + 1)) { - *outScore += sequential_bonus; - } - } - - // Check for bonuses based on neighbor character value - if (currIdx > 0) { - // Camel case - const char_u neighbor = strBegin[currIdx - 1]; - const char_u curr = strBegin[currIdx]; - - if (islower(neighbor) && isupper(curr)) { - *outScore += camel_bonus; - } - - // Separator - const bool neighborSeparator = neighbor == '_' || neighbor == ' '; - if (neighborSeparator) { - *outScore += separator_bonus; - } - } else { - // First letter - *outScore += first_letter_bonus; - } - } + *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, nextMatch); } // Return best result if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) { // Recursive score is better than "this" - memcpy(matches, bestRecursiveMatches, maxMatches); + memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); *outScore = bestRecursiveScore; return true; } else if (matched) { @@ -4969,21 +4996,22 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, int * /// normalized and varies with pattern. /// Recursion is limited internally (default=10) to prevent degenerate cases /// (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). -/// Uses char_u for match indices. Therefore patterns are limited to 256 +/// Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES /// characters. /// -/// Returns true if fuzpat is found AND calculates a score. -static bool fuzzy_match(const char_u *const str, const char_u *const fuzpat, int *const outScore) +/// @return true if 'fuzpat' matches 'str'. Also returns the match score in +/// 'outScore' and the matching character positions in 'matches'. +static bool fuzzy_match(char_u *const str, const char_u *const fuzpat, int *const outScore, + matchidx_T *const matches, const int maxMatches) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { - char_u matches[256]; int recursionCount = 0; - int recursionLimit = 10; + const int len = mb_charlen(str); *outScore = 0; - return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, sizeof(matches), 0, - &recursionCount, recursionLimit); + return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, matches, maxMatches, 0, + &recursionCount); } /// Sort the fuzzy matches in the descending order of the match score. @@ -4996,70 +5024,188 @@ static int fuzzy_item_compare(const void *const s1, const void *const s2) return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; } -/// Fuzzy search the string 'str' in 'strlist' and return the matching strings -/// in 'fmatchlist'. -static void match_fuzzy(const list_T *const strlist, const char_u *const str, - list_T *const fmatchlist) - FUNC_ATTR_NONNULL_ARG(2, 3) +/// Fuzzy search the string 'str' in a list of 'items' and return the matching +/// strings in 'fmatchlist'. +/// If 'items' is a list of strings, then search for 'str' in the list. +/// If 'items' is a list of dicts, then either use 'key' to lookup the string +/// for each item or use 'item_cb' Funcref function to get the string. +/// If 'retmatchpos' is true, then return a list of positions where 'str' +/// matches for each item. +static void match_fuzzy(list_T *const items, char_u *const str, const char_u *const key, + Callback *const item_cb, const bool retmatchpos, list_T *const fmatchlist) + FUNC_ATTR_NONNULL_ARG(2, 4, 6) { - const long len = tv_list_len(strlist); + const long len = tv_list_len(items); if (len == 0) { return; } - fuzzyItem_T *const ptrs = xmalloc(sizeof(fuzzyItem_T) * len); + fuzzyItem_T *const ptrs = xcalloc(len, sizeof(fuzzyItem_T)); long i = 0; bool found_match = false; + matchidx_T matches[MAXMATCHES]; - // For all the string items in strlist, get the fuzzy matching score - TV_LIST_ITER_CONST(strlist, li, { + // For all the string items in items, get the fuzzy matching score + TV_LIST_ITER(items, li, { ptrs[i].item = li; - ptrs[i].score = -9999; - // ignore non-string items in the list + ptrs[i].score = SCORE_NONE; + char_u *itemstr = NULL; + typval_T rettv; + rettv.v_type = VAR_UNKNOWN; const typval_T *const tv = TV_LIST_ITEM_TV(li); - if (tv->v_type == VAR_STRING && tv->vval.v_string != NULL) { - int score; - if (fuzzy_match(tv->vval.v_string, str, &score)) { - ptrs[i].score = score; - found_match = true; + if (tv->v_type == VAR_STRING) { // list of strings + itemstr = tv->vval.v_string; + } else if (tv->v_type == VAR_DICT && (key != NULL || item_cb->type != kCallbackNone)) { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) { + itemstr = (char_u *)tv_dict_get_string(tv->vval.v_dict, (const char *)key, false); + } else { + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + tv->vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = tv->vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (callback_call(item_cb, 1, argv, &rettv)) { + if (rettv.v_type == VAR_STRING) { + itemstr = rettv.vval.v_string; + } + } + tv_dict_unref(tv->vval.v_dict); } } + + int score; + if (itemstr != NULL + && fuzzy_match(itemstr, str, &score, matches, sizeof(matches) / sizeof(matches[0]))) { + // Copy the list of matching positions in itemstr to a list, if + // 'retmatchpos' is set. + if (retmatchpos) { + const int strsz = mb_charlen(str); + ptrs[i].lmatchpos = tv_list_alloc(strsz); + for (int j = 0; j < strsz; j++) { + tv_list_append_number(ptrs[i].lmatchpos, matches[j]); + } + } + ptrs[i].score = score; + found_match = true; + } i++; + tv_clear(&rettv); }); if (found_match) { // Sort the list by the descending order of the match score - qsort(ptrs, (size_t)len, sizeof(fuzzyItem_T), fuzzy_item_compare); + qsort(ptrs, len, sizeof(fuzzyItem_T), fuzzy_item_compare); - // Copy the matching strings with 'score != -9999' to the return list + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with two items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + list_T *l; + if (retmatchpos) { + const listitem_T *const li = tv_list_find(fmatchlist, 0); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + } else { + l = fmatchlist; + } + + // Copy the matching strings with a valid score to the return list for (i = 0; i < len; i++) { - if (ptrs[i].score == -9999) { + if (ptrs[i].score == SCORE_NONE) { break; } - const typval_T *const tv = TV_LIST_ITEM_TV(ptrs[i].item); - tv_list_append_string(fmatchlist, (const char *)tv->vval.v_string, -1); + tv_list_append_tv(l, TV_LIST_ITEM_TV(ptrs[i].item)); + } + + // next copy the list of matching positions + if (retmatchpos) { + const listitem_T *const li = tv_list_find(fmatchlist, -1); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + for (i = 0; i < len; i++) { + if (ptrs[i].score == SCORE_NONE) { + break; + } + tv_list_append_list(l, ptrs[i].lmatchpos); + } } } - xfree(ptrs); } -/// "matchfuzzy()" function -void f_matchfuzzy(typval_T *argvars, typval_T *rettv, FunPtr fptr) +/// Do fuzzy matching. Returns the list of matched strings in 'rettv'. +/// If 'retmatchpos' is true, also returns the matching character positions. +static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, + const bool retmatchpos) + FUNC_ATTR_NONNULL_ALL { - if (argvars[0].v_type != VAR_LIST) { - emsg(_(e_listreq)); - return; - } - if (argvars[0].vval.v_list == NULL) { + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { + semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); return; } if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { semsg(_(e_invarg2), tv_get_string(&argvars[1])); return; } - match_fuzzy(argvars[0].vval.v_list, (const char_u *)tv_get_string(&argvars[1]), - tv_list_alloc_ret(rettv, kListLenUnknown)); + + Callback cb = CALLBACK_NONE; + const char_u *key = NULL; + if (argvars[2].v_type != VAR_UNKNOWN) { + if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) { + emsg(_(e_dictreq)); + return; + } + + // To search a dict, either a callback function or a key can be + // specified. + dict_T *const d = argvars[2].vval.v_dict; + const dictitem_T *const di = tv_dict_find(d, "key", -1); + if (di != NULL) { + if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) { + semsg(_(e_invarg2), tv_get_string(&di->di_tv)); + return; + } + key = (const char_u *)tv_get_string(&di->di_tv); + } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) { + semsg(_(e_invargval), "text_cb"); + return; + } + } + + // get the fuzzy matches + tv_list_alloc_ret(rettv, retmatchpos ? 2 : kListLenUnknown); + if (retmatchpos) { + // For matchfuzzypos(), a list with two items are returned. First item + // is a list of matching strings and the second item is a list of + // lists with matching positions within each string. + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + } + + match_fuzzy(argvars[0].vval.v_list, (char_u *)tv_get_string(&argvars[1]), key, &cb, retmatchpos, + rettv->vval.v_list); + callback_free(&cb); +} + +/// "matchfuzzy()" function +void f_matchfuzzy(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + do_fuzzymatch(argvars, rettv, false); +} + +/// "matchfuzzypos()" function +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + do_fuzzymatch(argvars, rettv, true); } /// Find identifiers or defines in included files. diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index 57ae0d3020..438bed51c6 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -1731,32 +1731,6 @@ func Test_nr2char() call assert_equal("\x80\xfc\b\xfd\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX", eval('"\"')) endfunc -" Test for matchfuzzy() -func Test_matchfuzzy() - call assert_fails('call matchfuzzy(10, "abc")', 'E714:') - " Needs v8.2.1183. - " call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') - call assert_fails('call matchfuzzy(["abc"], [])', 'E475:') - call assert_equal([], matchfuzzy([], 'abc')) - call assert_equal([], matchfuzzy(['abc'], '')) - call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) - call assert_equal([], matchfuzzy([10, 20], 'ac')) - call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) - call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) - call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) - call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) - call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) - - %bw! - eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) - let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') - call assert_equal(1, len(l)) - call assert_match('needle', l[0]) -endfunc - " Test for getcurpos() and setpos() func Test_getcurpos_setpos() new diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim new file mode 100644 index 0000000000..4390852639 --- /dev/null +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -0,0 +1,203 @@ +" Tests for fuzzy matching + +source shared.vim +source check.vim + +" Test for matchfuzzy() +func Test_matchfuzzy() + call assert_fails('call matchfuzzy(10, "abc")', 'E686:') + " Needs v8.2.1183; match the final error that's thrown for now + " call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') + call assert_fails('call matchfuzzy(["abc"], [])', 'E475:') + call assert_fails("let x = matchfuzzy(v:_null_list, 'foo')", 'E686:') + call assert_fails('call matchfuzzy(["abc"], v:_null_string)', 'E475:') + call assert_equal([], matchfuzzy([], 'abc')) + call assert_equal([], matchfuzzy(['abc'], '')) + call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) + call assert_equal([], matchfuzzy([10, 20], 'ac')) + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) + call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) + call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) + call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) + call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) + call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + + " Tests for match preferences + " preference for camel case match + call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + " preference for match after a separator (_ or space) + call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + " preference for leading letter match + call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) + " preference for sequential match + call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo')) + " non-matching leading letter(s) penalty + call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo')) + " total non-matching letter(s) penalty + call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) + + %bw! + eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) + let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') + call assert_equal(1, len(l)) + call assert_match('needle', l[0]) + + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:') + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([], matchfuzzy(l, 'cam')) + " Nvim's callback implementation is different, so E6000 is expected instead, + " but we need v8.2.1183 to assert it + " call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:') + " call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E6000:') + call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E475:') + " call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E475:') + call assert_fails("let x = matchfuzzy(l, 'cam', v:_null_dict)", 'E715:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : v:_null_string})", 'E475:') + " Nvim doesn't have null functions + " call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') + + " Test in latin1 encoding + let save_enc = &encoding + " Nvim supports utf-8 encoding only + " set encoding=latin1 + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + let &encoding = save_enc +endfunc + +" Test for the fuzzymatchpos() function +func Test_matchfuzzypos() + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['hello', 'hello world hello world'], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]], + \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab')) + let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + call assert_equal([[], []], matchfuzzypos([], 'abc')) + + " match in a long string + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]], + \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) + + " preference for camel case match + call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc')) + " preference for match after a separator (_ or space) + call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab')) + " preference for leading letter match + call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab')) + " preference for sequential match + call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one')) + " best recursive match + call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one')) + + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + \ matchfuzzypos(l, 'cam', {'key' : 'val'})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:') + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([[], []], matchfuzzypos(l, 'cam')) + " Nvim's callback implementation is different, so E6000 is expected instead, + " but we need v8.2.1183 to assert it + " call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:') + " call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E6000:') + call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E475:') + " call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E475:') + call assert_fails("let x = matchfuzzypos(l, 'cam', v:_null_dict)", 'E715:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : v:_null_string})", 'E475:') + " Nvim doesn't have null functions + " call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') +endfunc + +func Test_matchfuzzy_mbyte() + CheckFeature multi_lang + call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal(['αβΩxxx', 'xαxβxΩx'], + \ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + + " preference for camel case match + call assert_equal(['oneĄwo', 'oneąwo'], + \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + " preference for match after a separator (_ or space) + call assert_equal(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡabㄟㄠ'], + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + " preference for leading letter match + call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], + \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) + " preference for sequential match + call assert_equal(['ㄞㄡㄤfffifl', 'ㄞaㄡbㄤcffdfiefl'], + \ ['ㄞaㄡbㄤcffdfiefl', 'ㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " non-matching leading letter(s) penalty + call assert_equal(['xㄞㄡㄤfffifl', 'xxㄞㄡㄤfffifl'], + \ ['xxㄞㄡㄤfffifl', 'xㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " total non-matching letter(s) penalty + call assert_equal(['ŗŝţ', 'ŗŝţx', 'ŗŝţxx'], + \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ')) +endfunc + +func Test_matchfuzzypos_mbyte() + CheckFeature multi_lang + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]], + \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([[], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]]], + \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ [[0, 1], [0, 1], [0, 1], [0, 2]]], + \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + call assert_equal([['ααααααα'], [[0, 1, 2]]], + \ matchfuzzypos(['ααααααα'], 'ααα')) + + call assert_equal([[], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) + let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + + " match in a long string + call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]], + \ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗')) + " preference for camel case match + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) + " preference for match after a separator (_ or space) + call assert_equal([['xちだx_ちだ'], [[5, 6]]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + " preference for leading letter match + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + " preference for sequential match + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + " best recursive match + call assert_equal([['xффйд'], [[2, 3, 4]]], matchfuzzypos(['xффйд'], 'фйд')) +endfunc + +" vim: shiftwidth=2 sts=2 expandtab From 8313d31e4a5065ac0e945ebc689fa083a50e41dc Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 21:12:53 +0000 Subject: [PATCH 3/8] vim-patch:8.2.1872: matchfuzzy() does not prefer sequential matches Problem: Matchfuzzy() does not prefer sequential matches. Solution: Give sequential matches a higher bonus. (Christian Brabandt, closes vim/vim#7140) https://github.com/vim/vim/commit/e9f9f16387554e5b34ba42ce00c42c28dd66af58 --- src/nvim/search.c | 5 +++-- src/nvim/testdir/test_matchfuzzy.vim | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/nvim/search.c b/src/nvim/search.c index 8f3e66744f..a5b7d8f5ee 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -4818,8 +4818,9 @@ typedef struct { list_T *lmatchpos; } fuzzyItem_T; -/// bonus for adjacent matches -#define SEQUENTIAL_BONUS 15 +/// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that +/// matching a whole word is preferred. +#define SEQUENTIAL_BONUS 40 /// bonus if match occurs after a separator #define SEPARATOR_BONUS 30 /// bonus if match is uppercase and prev is lower diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim index 4390852639..293f7387b8 100644 --- a/src/nvim/testdir/test_matchfuzzy.vim +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -20,7 +20,7 @@ func Test_matchfuzzy() call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) @@ -29,7 +29,11 @@ func Test_matchfuzzy() " preference for camel case match call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) " preference for match after a separator (_ or space) - call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + if has("win32") + call assert_equal(['onetwo', 'one two', 'one_two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + else + call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + endif " preference for leading letter match call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) " preference for sequential match @@ -38,6 +42,8 @@ func Test_matchfuzzy() call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo')) " total non-matching letter(s) penalty call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) + " prefer complete matches over separator matches + call assert_equal(['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c'], ['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c']->matchfuzzy('vimrc')) %bw! eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) @@ -148,9 +154,16 @@ func Test_matchfuzzy_mbyte() " preference for camel case match call assert_equal(['oneĄwo', 'oneąwo'], \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) - " preference for match after a separator (_ or space) - call assert_equal(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡabㄟㄠ'], - \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + " preference for complete match then match after separator (_ or space) + if has("win32") + " order is different between Windows and Unix :( + " It's important that the complete match is first + call assert_equal(['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ'], + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + else + call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + endif " preference for leading letter match call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) From 30deb14f397e576aedfa54600ed7408b3e03459d Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 21:25:41 +0000 Subject: [PATCH 4/8] vim-patch:8.2.1893: fuzzy matching does not support multiple words Problem: Fuzzy matching does not support multiple words. Solution: Add support for matching white space separated words. (Yegappan Lakshmanan, closes vim/vim#7163) https://github.com/vim/vim/commit/8ded5b647aa4b3338da721b343e0bce0f86655f6 --- runtime/doc/builtin.txt | 19 ++- src/nvim/search.c | 171 +++++++++++++++++++-------- src/nvim/testdir/test_matchfuzzy.vim | 62 +++++++--- 3 files changed, 184 insertions(+), 68 deletions(-) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 14cf9f09fc..745f1d9116 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -4866,8 +4866,15 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* the strings in {list} that fuzzy match {str}. The strings in the returned list are sorted based on the matching score. + The optional {dict} argument always supports the following + items: + matchseq When this item is present and {str} contains + multiple words separated by white space, then + returns only matches that contain the words in + the given sequence. + If {list} is a list of dictionaries, then the optional {dict} - argument supports the following items: + argument supports the following additional items: key key of the item which is fuzzy matched against {str}. The value of this item should be a string. @@ -4881,6 +4888,9 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* matching is NOT supported. The maximum supported {str} length is 256. + When {str} has multiple words each separated by white space, + then the list of strings that have all the words is returned. + If there are no matching strings or there is an error, then an empty list is returned. If length of {str} is greater than 256, then returns an empty list. @@ -4900,7 +4910,12 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* :echo v:oldfiles->matchfuzzy("test") < results in a list of file names fuzzy matching "test". > :let l = readfile("buffer.c")->matchfuzzy("str") -< results in a list of lines in "buffer.c" fuzzy matching "str". +< results in a list of lines in "buffer.c" fuzzy matching "str". > + :echo ['one two', 'two one']->matchfuzzy('two one') +< results in ['two one', 'one two']. > + :echo ['one two', 'two one']->matchfuzzy('two one', + \ {'matchseq': 1}) +< results in ['two one']. matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* Same as |matchfuzzy()|, but returns the list of matched diff --git a/src/nvim/search.c b/src/nvim/search.c index a5b7d8f5ee..a021466446 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -4771,16 +4771,16 @@ the_end: /// Ported from the lib_fts library authored by Forrest Smith. /// https://github.com/forrestthewoods/lib_fts/tree/master/code /// -/// Blog describing the algorithm: +/// The following blog describes the fuzzy matching algorithm: /// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ /// /// Each matching string is assigned a score. The following factors are checked: -/// Matched letter -/// Unmatched letter -/// Consecutively matched letters -/// Proximity to start -/// Letter following a separator (space, underscore) -/// Uppercase letter following lowercase (aka CamelCase) +/// - Matched letter +/// - Unmatched letter +/// - Consecutively matched letters +/// - Proximity to start +/// - Letter following a separator (space, underscore) +/// - Uppercase letter following lowercase (aka CamelCase) /// /// Matched letters are good. Unmatched letters are bad. Matching near the start /// is good. Matching the first letter in the middle of a phrase is good. @@ -4790,16 +4790,17 @@ the_end: /// File paths are different from file names. File extensions may be ignorable. /// Single words care about consecutive matches but not separators or camel /// case. -/// Score starts at 0 +/// Score starts at 100 /// Matched letter: +0 points /// Unmatched letter: -1 point -/// Consecutive match bonus: +5 points -/// Separator bonus: +10 points -/// Camel case bonus: +10 points -/// Unmatched leading letter: -3 points (max: -9) +/// Consecutive match bonus: +15 points +/// First letter bonus: +15 points +/// Separator bonus: +30 points +/// Camel case bonus: +30 points +/// Unmatched leading letter: -5 points (max: -15) /// /// There is some nuance to this. Scores don’t have an intrinsic meaning. The -/// score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a +/// score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a /// lower minimum score due to unmatched letter penalty. Longer search patterns /// have a higher maximum score due to match bonuses. /// @@ -4813,6 +4814,7 @@ the_end: /// There is not an explicit bonus for an exact match. Unmatched letters receive /// a penalty. So shorter strings and closer matches are worth more. typedef struct { + int idx; ///< used for stable sort listitem_T *item; int score; list_T *lmatchpos; @@ -4833,6 +4835,8 @@ typedef struct { #define MAX_LEADING_LETTER_PENALTY -15 /// penalty for every letter that doesn't match #define UNMATCHED_LETTER_PENALTY -1 +/// penalty for gap in matching positions (-2 * k) +#define GAP_PENALTY -2 /// Score for a string that doesn't fuzzy match the pattern #define SCORE_NONE -9999 @@ -4870,6 +4874,8 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, // Sequential if (currIdx == prevIdx + 1) { score += SEQUENTIAL_BONUS; + } else { + score += GAP_PENALTY * (currIdx - prevIdx); } } @@ -4881,7 +4887,7 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, for (matchidx_T sidx = 0; sidx < currIdx; sidx++) { neighbor = utf_ptr2char(p); - mb_ptr2char_adv(&p); + MB_PTR_ADV(p); } const int curr = utf_ptr2char(p); @@ -4902,11 +4908,13 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, return score; } -static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchidx_T strIdx, - int *const outScore, const char_u *const strBegin, - const int strLen, const matchidx_T *const srcMatches, - matchidx_T *const matches, const int maxMatches, int nextMatch, - int *const recursionCount) +/// Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. +/// @return the number of matching characters. +static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchidx_T strIdx, + int *const outScore, const char_u *const strBegin, + const int strLen, const matchidx_T *const srcMatches, + matchidx_T *const matches, const int maxMatches, int nextMatch, + int *const recursionCount) FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT { // Recursion params @@ -4917,12 +4925,12 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, match // Count recursions (*recursionCount)++; if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) { - return false; + return 0; } // Detect end of strings if (*fuzpat == '\0' || *str == '\0') { - return false; + return 0; } // Loop through fuzpat and str looking for a match @@ -4935,7 +4943,7 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, match if (mb_tolower(c1) == mb_tolower(c2)) { // Supplied matches buffer was too short if (nextMatch >= maxMatches) { - return false; + return 0; } // "Copy-on-Write" srcMatches into matches @@ -4962,9 +4970,9 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, match // Advance matches[nextMatch++] = strIdx; - mb_ptr2char_adv(&fuzpat); + MB_PTR_ADV(fuzpat); } - mb_ptr2char_adv(&str); + MB_PTR_ADV(str); strIdx++; } @@ -4981,12 +4989,12 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, match // Recursive score is better than "this" memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); *outScore = bestRecursiveScore; - return true; + return nextMatch; } else if (matched) { - return true; // "this" score is better than recursive + return nextMatch; // "this" score is better than recursive } - return false; // no match + return 0; // no match } /// fuzzy_match() @@ -4996,45 +5004,98 @@ static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, match /// Scores values have no intrinsic meaning. Possible score range is not /// normalized and varies with pattern. /// Recursion is limited internally (default=10) to prevent degenerate cases -/// (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). +/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). /// Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES /// characters. /// -/// @return true if 'fuzpat' matches 'str'. Also returns the match score in +/// @return true if 'pat_arg' matches 'str'. Also returns the match score in /// 'outScore' and the matching character positions in 'matches'. -static bool fuzzy_match(char_u *const str, const char_u *const fuzpat, int *const outScore, - matchidx_T *const matches, const int maxMatches) +static bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq, + int *const outScore, matchidx_T *const matches, const int maxMatches) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { - int recursionCount = 0; const int len = mb_charlen(str); + bool complete = false; + int numMatches = 0; *outScore = 0; - return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, matches, maxMatches, 0, - &recursionCount); + char_u *const save_pat = vim_strsave(pat_arg); + char_u *pat = save_pat; + char_u *p = pat; + + // Try matching each word in 'pat_arg' in 'str' + while (true) { + if (matchseq) { + complete = true; + } else { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) { + break; + } + pat = p; + while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) { + MB_PTR_ADV(p); + } + if (*p == NUL) { // processed all the words + complete = true; + } + *p = NUL; + } + + int score = 0; + int recursionCount = 0; + const int matchCount + = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, matches + numMatches, + maxMatches - numMatches, 0, &recursionCount); + if (matchCount == 0) { + numMatches = 0; + break; + } + + // Accumulate the match score and the number of matches + *outScore += score; + numMatches += matchCount; + + if (complete) { + break; + } + + // try matching the next word + p++; + } + + xfree(save_pat); + return numMatches != 0; } /// Sort the fuzzy matches in the descending order of the match score. -static int fuzzy_item_compare(const void *const s1, const void *const s2) +/// For items with same score, retain the order using the index (stable sort) +static int fuzzy_match_item_compare(const void *const s1, const void *const s2) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE { const int v1 = ((const fuzzyItem_T *)s1)->score; const int v2 = ((const fuzzyItem_T *)s2)->score; + const int idx1 = ((const fuzzyItem_T *)s1)->idx; + const int idx2 = ((const fuzzyItem_T *)s2)->idx; - return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; + return v1 == v2 ? (idx1 - idx2) : v1 > v2 ? -1 : 1; } /// Fuzzy search the string 'str' in a list of 'items' and return the matching /// strings in 'fmatchlist'. +/// If 'matchseq' is true, then for multi-word search strings, match all the +/// words in sequence. /// If 'items' is a list of strings, then search for 'str' in the list. /// If 'items' is a list of dicts, then either use 'key' to lookup the string /// for each item or use 'item_cb' Funcref function to get the string. /// If 'retmatchpos' is true, then return a list of positions where 'str' /// matches for each item. -static void match_fuzzy(list_T *const items, char_u *const str, const char_u *const key, - Callback *const item_cb, const bool retmatchpos, list_T *const fmatchlist) - FUNC_ATTR_NONNULL_ARG(2, 4, 6) +static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bool matchseq, + const char_u *const key, Callback *const item_cb, + const bool retmatchpos, list_T *const fmatchlist) + FUNC_ATTR_NONNULL_ARG(2, 5, 7) { const long len = tv_list_len(items); if (len == 0) { @@ -5048,6 +5109,7 @@ static void match_fuzzy(list_T *const items, char_u *const str, const char_u *co // For all the string items in items, get the fuzzy matching score TV_LIST_ITER(items, li, { + ptrs[i].idx = i; ptrs[i].item = li; ptrs[i].score = SCORE_NONE; char_u *itemstr = NULL; @@ -5079,15 +5141,20 @@ static void match_fuzzy(list_T *const items, char_u *const str, const char_u *co } int score; - if (itemstr != NULL - && fuzzy_match(itemstr, str, &score, matches, sizeof(matches) / sizeof(matches[0]))) { + if (itemstr != NULL && fuzzy_match(itemstr, str, matchseq, &score, matches, + sizeof(matches) / sizeof(matches[0]))) { // Copy the list of matching positions in itemstr to a list, if // 'retmatchpos' is set. if (retmatchpos) { - const int strsz = mb_charlen(str); - ptrs[i].lmatchpos = tv_list_alloc(strsz); - for (int j = 0; j < strsz; j++) { - tv_list_append_number(ptrs[i].lmatchpos, matches[j]); + ptrs[i].lmatchpos = tv_list_alloc(kListLenMayKnow); + int j = 0; + const char_u *p = str; + while (*p != NUL) { + if (!ascii_iswhite(utf_ptr2char(p))) { + tv_list_append_number(ptrs[i].lmatchpos, matches[j]); + j++; + } + MB_PTR_ADV(p); } } ptrs[i].score = score; @@ -5099,7 +5166,7 @@ static void match_fuzzy(list_T *const items, char_u *const str, const char_u *co if (found_match) { // Sort the list by the descending order of the match score - qsort(ptrs, len, sizeof(fuzzyItem_T), fuzzy_item_compare); + qsort(ptrs, len, sizeof(fuzzyItem_T), fuzzy_match_item_compare); // For matchfuzzy(), return a list of matched strings. // ['str1', 'str2', 'str3'] @@ -5159,6 +5226,7 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, Callback cb = CALLBACK_NONE; const char_u *key = NULL; + bool matchseq = false; if (argvars[2].v_type != VAR_UNKNOWN) { if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) { emsg(_(e_dictreq)); @@ -5168,8 +5236,8 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, // To search a dict, either a callback function or a key can be // specified. dict_T *const d = argvars[2].vval.v_dict; - const dictitem_T *const di = tv_dict_find(d, "key", -1); - if (di != NULL) { + const dictitem_T *di; + if ((di = tv_dict_find(d, "key", -1)) != NULL) { if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL || *di->di_tv.vval.v_string == NUL) { semsg(_(e_invarg2), tv_get_string(&di->di_tv)); @@ -5180,6 +5248,9 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, semsg(_(e_invargval), "text_cb"); return; } + if ((di = tv_dict_find(d, "matchseq", -1)) != NULL) { + matchseq = true; + } } // get the fuzzy matches @@ -5192,8 +5263,8 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); } - match_fuzzy(argvars[0].vval.v_list, (char_u *)tv_get_string(&argvars[1]), key, &cb, retmatchpos, - rettv->vval.v_list); + fuzzy_match_in_list(argvars[0].vval.v_list, (char_u *)tv_get_string(&argvars[1]), matchseq, key, + &cb, retmatchpos, rettv->vval.v_list); callback_free(&cb); } diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim index 293f7387b8..28367b878d 100644 --- a/src/nvim/testdir/test_matchfuzzy.vim +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -24,16 +24,15 @@ func Test_matchfuzzy() call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + " matches with same score should not be reordered + let l = ['abc1', 'abc2', 'abc3'] + call assert_equal(l, l->matchfuzzy('abc')) " Tests for match preferences " preference for camel case match call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) " preference for match after a separator (_ or space) - if has("win32") - call assert_equal(['onetwo', 'one two', 'one_two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) - else - call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) - endif + call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) " preference for leading letter match call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) " preference for sequential match @@ -44,6 +43,17 @@ func Test_matchfuzzy() call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) " prefer complete matches over separator matches call assert_equal(['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c'], ['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c']->matchfuzzy('vimrc')) + " gap penalty + call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) + + " match multiple words (separated by space) + call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) + call assert_equal([], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('one two')) + call assert_equal([], ['foo bar']->matchfuzzy(" \t ")) + + " test for matching a sequence of words + call assert_equal(['bar foo'], ['foo bar', 'bar foo', 'foobar', 'barfoo']->matchfuzzy('bar foo', {'matchseq' : 1})) + call assert_equal([#{text: 'two one'}], [#{text: 'one two'}, #{text: 'two one'}]->matchfuzzy('two one', #{key: 'text', matchseq: v:true})) %bw! eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) @@ -51,6 +61,7 @@ func Test_matchfuzzy() call assert_equal(1, len(l)) call assert_match('needle', l[0]) + " Test for fuzzy matching dicts let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}})) call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'})) @@ -72,6 +83,9 @@ func Test_matchfuzzy() call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : v:_null_string})", 'E475:') " Nvim doesn't have null functions " call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + " matches with same score should not be reordered + let l = [#{text: 'abc', id: 1}, #{text: 'abc', id: 2}, #{text: 'abc', id: 3}] + call assert_equal(l, l->matchfuzzy('abc', #{key: 'text'})) let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') @@ -84,7 +98,7 @@ func Test_matchfuzzy() let &encoding = save_enc endfunc -" Test for the fuzzymatchpos() function +" Test for the matchfuzzypos() function func Test_matchfuzzypos() call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl')) call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) @@ -92,6 +106,10 @@ func Test_matchfuzzypos() \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([[], []], matchfuzzypos(['a b'], ' ')) call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) call assert_equal(range(256), x[1][0]) @@ -113,6 +131,12 @@ func Test_matchfuzzypos() " best recursive match call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one')) + " match multiple words (separated by space) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([[], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) + call assert_equal([[], []], ['foo bar']->matchfuzzypos(" \t ")) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]]], ['grace']->matchfuzzypos('race ace grace')) + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) @@ -141,6 +165,7 @@ func Test_matchfuzzypos() call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') endfunc +" Test for matchfuzzy() with multibyte characters func Test_matchfuzzy_mbyte() CheckFeature multi_lang call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ')) @@ -151,19 +176,19 @@ func Test_matchfuzzy_mbyte() call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + " match multiple words (separated by space) + call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의')) + call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘')) + " preference for camel case match call assert_equal(['oneĄwo', 'oneąwo'], \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) " preference for complete match then match after separator (_ or space) - if has("win32") - " order is different between Windows and Unix :( - " It's important that the complete match is first - call assert_equal(['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ'], - \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) - else - call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), + call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) - endif + " preference for match after a separator (_ or space) + call assert_equal(['ㄓㄔabㄟㄠ', 'ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ'], + \ ['ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ', 'ㄓㄔabㄟㄠ']->matchfuzzy('ㄓㄔabㄟㄠ')) " preference for leading letter match call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) @@ -178,6 +203,7 @@ func Test_matchfuzzy_mbyte() \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ')) endfunc +" Test for matchfuzzypos() with multibyte characters func Test_matchfuzzypos_mbyte() CheckFeature multi_lang call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]], @@ -198,9 +224,13 @@ func Test_matchfuzzypos_mbyte() call assert_equal(range(256), x[1][0]) call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + " match multiple words (separated by space) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([[], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) + " match in a long string - call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]], - \ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗')) + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]]], + \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) " preference for camel case match call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) From 712c7e5d5fe8fbd93e47b8064a24ff9346720402 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 22:35:43 +0000 Subject: [PATCH 5/8] vim-patch:8.2.1921: fuzzy matching does not recognize path separators Problem: Fuzzy matching does not recognize path separators. Solution: Add a bonus for slash and backslash. (Yegappan Lakshmanan, closes vim/vim#7225) https://github.com/vim/vim/commit/dcdd42a8ccb9bafd857735d694b074269f337333 --- src/nvim/search.c | 15 +++++++++------ src/nvim/testdir/test_matchfuzzy.vim | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/nvim/search.c b/src/nvim/search.c index a021466446..b08de3177e 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -4823,8 +4823,10 @@ typedef struct { /// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that /// matching a whole word is preferred. #define SEQUENTIAL_BONUS 40 -/// bonus if match occurs after a separator -#define SEPARATOR_BONUS 30 +/// bonus if match occurs after a path separator +#define PATH_SEPARATOR_BONUS 30 +/// bonus if match occurs after a word separator +#define WORD_SEPARATOR_BONUS 25 /// bonus if match is uppercase and prev is lower #define CAMEL_BONUS 30 /// bonus if the first letter is matched @@ -4895,10 +4897,11 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, score += CAMEL_BONUS; } - // Separator - const bool neighborSeparator = neighbor == '_' || neighbor == ' '; - if (neighborSeparator) { - score += SEPARATOR_BONUS; + // Bonus if the match follows a separator character + if (neighbor == '/' || neighbor == '\\') { + score += PATH_SEPARATOR_BONUS; + } else if (neighbor == ' ' || neighbor == '_') { + score += WORD_SEPARATOR_BONUS; } } else { // First letter diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim index 28367b878d..d67d9bd893 100644 --- a/src/nvim/testdir/test_matchfuzzy.vim +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -45,6 +45,8 @@ func Test_matchfuzzy() call assert_equal(['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c'], ['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c']->matchfuzzy('vimrc')) " gap penalty call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) + " path separator vs word separator + call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) " match multiple words (separated by space) call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) From 715fbcbb8c6ec4385d1168b1260fdb991d4e6fc5 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 22:40:42 +0000 Subject: [PATCH 6/8] vim-patch:8.2.2280: fuzzy matching doesn't give access to the scores Problem: Fuzzy matching doesn't give access to the scores. Solution: Return the scores with a third list. (Yegappan Lakshmanan, closes vim/vim#7596) https://github.com/vim/vim/commit/9d19e4f4ba55f8bef18d4991abdf740ff6472dba Remove seemingly useless NULL checks. assert that removing the li one wasn't dumb. --- runtime/doc/builtin.txt | 14 +++-- src/nvim/search.c | 27 ++++++--- src/nvim/testdir/test_matchfuzzy.vim | 88 ++++++++++++++-------------- 3 files changed, 72 insertions(+), 57 deletions(-) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 745f1d9116..45c03fc447 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -4919,23 +4919,25 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* Same as |matchfuzzy()|, but returns the list of matched - strings and the list of character positions where characters - in {str} matches. + strings, the list of character positions where characters + in {str} matches and a list of matching scores. You can + use |byteidx()| to convert a character position to a byte + position. If {str} matches multiple times in a string, then only the positions for the best match is returned. If there are no matching strings or there is an error, then a - list with two empty list items is returned. + list with three empty list items is returned. Example: > :echo matchfuzzypos(['testing'], 'tsg') -< results in [['testing'], [[0, 2, 6]]] > +< results in [['testing'], [[0, 2, 6]], [99]] > :echo matchfuzzypos(['clay', 'lacy'], 'la') -< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] > +< results in [['lacy', 'clay'], [[0, 1], [1, 2]], [153, 133]] > :echo [{'text': 'hello', 'id' : 10}] \ ->matchfuzzypos('ll', {'key' : 'text'}) -< results in [{'id': 10, 'text': 'hello'}] [[2, 3]] +< results in [[{'id': 10, 'text': 'hello'}], [[2, 3]], [127]] matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* Same as |match()|, but return a |List|. The first item in the diff --git a/src/nvim/search.c b/src/nvim/search.c index b08de3177e..960c0e97c0 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -5173,10 +5173,10 @@ static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bo // For matchfuzzy(), return a list of matched strings. // ['str1', 'str2', 'str3'] - // For matchfuzzypos(), return a list with two items. + // For matchfuzzypos(), return a list with three items. // The first item is a list of matched strings. The second item // is a list of lists where each list item is a list of matched - // character positions. + // character positions. The third item is a list of matching scores. // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] list_T *l; if (retmatchpos) { @@ -5197,7 +5197,7 @@ static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bo // next copy the list of matching positions if (retmatchpos) { - const listitem_T *const li = tv_list_find(fmatchlist, -1); + const listitem_T *li = tv_list_find(fmatchlist, -2); assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); l = TV_LIST_ITEM_TV(li)->vval.v_list; for (i = 0; i < len; i++) { @@ -5206,6 +5206,17 @@ static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bo } tv_list_append_list(l, ptrs[i].lmatchpos); } + + // copy the matching scores + li = tv_list_find(fmatchlist, -1); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + for (i = 0; i < len; i++) { + if (ptrs[i].score == SCORE_NONE) { + break; + } + tv_list_append_number(l, ptrs[i].score); + } } } xfree(ptrs); @@ -5257,11 +5268,13 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, } // get the fuzzy matches - tv_list_alloc_ret(rettv, retmatchpos ? 2 : kListLenUnknown); + tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown); if (retmatchpos) { - // For matchfuzzypos(), a list with two items are returned. First item - // is a list of matching strings and the second item is a list of - // lists with matching positions within each string. + // For matchfuzzypos(), a list with three items are returned. First + // item is a list of matching strings, the second item is a list of + // lists with matching positions within each string and the third item + // is the list of scores of the matches. + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); } diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim index d67d9bd893..abcc9b40c1 100644 --- a/src/nvim/testdir/test_matchfuzzy.vim +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -102,55 +102,55 @@ endfunc " Test for the matchfuzzypos() function func Test_matchfuzzypos() - call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl')) - call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) call assert_equal([['hello', 'hello world hello world'], - \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [275, 257]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) - call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa')) - call assert_equal([['a b'], [[0, 3]]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0, 3]]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0]]], matchfuzzypos(['a b'], ' a ')) - call assert_equal([[], []], matchfuzzypos(['a b'], ' ')) - call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [191]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [112]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) + call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) - call assert_equal([[], []], matchfuzzypos([], 'abc')) + call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + call assert_equal([[], [], []], matchfuzzypos([], 'abc')) " match in a long string - call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]], + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-135]], \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) " preference for camel case match - call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc')) + call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [189]], matchfuzzypos(['xabcxxaBc'], 'abc')) " preference for match after a separator (_ or space) - call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab')) + call assert_equal([['xabx_ab'], [[5, 6]], [145]], matchfuzzypos(['xabx_ab'], 'ab')) " preference for leading letter match - call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab')) + call assert_equal([['abcxabc'], [[0, 1]], [150]], matchfuzzypos(['abcxabc'], 'ab')) " preference for sequential match - call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one')) + call assert_equal([['aobncedone'], [[7, 8, 9]], [158]], matchfuzzypos(['aobncedone'], 'one')) " best recursive match - call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one')) + call assert_equal([['xoone'], [[2, 3, 4]], [168]], matchfuzzypos(['xoone'], 'one')) " match multiple words (separated by space) - call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) - call assert_equal([[], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) - call assert_equal([[], []], ['foo bar']->matchfuzzypos(" \t ")) - call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]]], ['grace']->matchfuzzypos('race ace grace')) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) + call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t ")) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [657]], ['grace']->matchfuzzypos('race ace grace')) let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], \ matchfuzzypos(l, 'cam', {'key' : 'val'})) - call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) - call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:') - call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) - call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') - call assert_equal([[], []], matchfuzzypos(l, 'cam')) + call assert_equal([[], [], []], matchfuzzypos(l, 'cam')) " Nvim's callback implementation is different, so E6000 is expected instead, " but we need v8.2.1183 to assert it " call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:') @@ -208,41 +208,41 @@ endfunc " Test for matchfuzzypos() with multibyte characters func Test_matchfuzzypos_mbyte() CheckFeature multi_lang - call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]], + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]], \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) - call assert_equal([['ンヹㄇヺヴ'], [[1, 3]]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) " reverse the order of characters - call assert_equal([[], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) - call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]]], + call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [222, 113]], \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], - \ [[0, 1], [0, 1], [0, 1], [0, 2]]], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [151, 148, 145, 110]], \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) - call assert_equal([['ααααααα'], [[0, 1, 2]]], + call assert_equal([['ααααααα'], [[0, 1, 2]], [191]], \ matchfuzzypos(['ααααααα'], 'ααα')) - call assert_equal([[], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) + call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) " match multiple words (separated by space) - call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) - call assert_equal([[], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) " match in a long string - call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]]], + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-135]], \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) " preference for camel case match - call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [189]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) - call assert_equal([['xちだx_ちだ'], [[5, 6]]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) " preference for leading letter match - call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) " preference for sequential match - call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) " best recursive match - call assert_equal([['xффйд'], [[2, 3, 4]]], matchfuzzypos(['xффйд'], 'фйд')) + call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд')) endfunc " vim: shiftwidth=2 sts=2 expandtab From ce797e08f539dc24d16ea8c02591296bbbc83772 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sat, 1 Jan 2022 23:06:09 +0000 Subject: [PATCH 7/8] vim-patch:8.2.2813: cannot grep using fuzzy matching Problem: Cannot grep using fuzzy matching. Solution: Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes vim/vim#8152) https://github.com/vim/vim/commit/bb01a1ef3a093cdb36877ba73474719c531dc8cb --- runtime/doc/quickfix.txt | 8 +- src/nvim/ex_cmds.c | 6 +- src/nvim/quickfix.c | 121 +++++++++++++++++++---------- src/nvim/quickfix.h | 1 + src/nvim/search.c | 43 +++++----- src/nvim/search.h | 3 + src/nvim/testdir/test_quickfix.vim | 50 +++++++++++- 7 files changed, 162 insertions(+), 70 deletions(-) diff --git a/runtime/doc/quickfix.txt b/runtime/doc/quickfix.txt index bb4d807413..c7289cfca6 100644 --- a/runtime/doc/quickfix.txt +++ b/runtime/doc/quickfix.txt @@ -989,7 +989,7 @@ commands can be combined to create a NewGrep command: > 5.1 using Vim's internal grep *:vim* *:vimgrep* *E682* *E683* -:vim[grep][!] /{pattern}/[g][j] {file} ... +:vim[grep][!] /{pattern}/[g][j][f] {file} ... Search for {pattern} in the files {file} ... and set the error list to the matches. Files matching 'wildignore' are ignored; files in 'suffixes' are @@ -1042,20 +1042,20 @@ commands can be combined to create a NewGrep command: > :vimgrep Error *.c < *:lv* *:lvimgrep* -:lv[imgrep][!] /{pattern}/[g][j] {file} ... +:lv[imgrep][!] /{pattern}/[g][j][f] {file} ... :lv[imgrep][!] {pattern} {file} ... Same as ":vimgrep", except the location list for the current window is used instead of the quickfix list. *:vimgrepa* *:vimgrepadd* -:vimgrepa[dd][!] /{pattern}/[g][j] {file} ... +:vimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :vimgrepa[dd][!] {pattern} {file} ... Just like ":vimgrep", but instead of making a new list of errors the matches are appended to the current list. *:lvimgrepa* *:lvimgrepadd* -:lvimgrepa[dd][!] /{pattern}/[g][j] {file} ... +:lvimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :lvimgrepa[dd][!] {pattern} {file} ... Same as ":vimgrepadd", except the location list for the current window is used instead of the quickfix diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 3b3d4e50cc..81fce3565a 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -6141,12 +6141,14 @@ char_u *skip_vimgrep_pat(char_u *p, char_u **s, int *flags) p++; // Find the flags - while (*p == 'g' || *p == 'j') { + while (*p == 'g' || *p == 'j' || *p == 'f') { if (flags != NULL) { if (*p == 'g') { *flags |= VGR_GLOBAL; - } else { + } else if (*p == 'j') { *flags |= VGR_NOJUMP; + } else { + *flags |= VGR_FUZZY; } } p++; diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c index 0196e05455..c609daa55c 100644 --- a/src/nvim/quickfix.c +++ b/src/nvim/quickfix.c @@ -5194,49 +5194,93 @@ static bool vgr_qflist_valid(win_T *wp, qf_info_T *qi, unsigned qfid, char_u *ti /// Search for a pattern in all the lines in a buffer and add the matching lines /// to a quickfix list. -static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, regmmatch_T *regmatch, - long *tomatch, int duplicate_name, int flags) - FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5) +static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, char_u *spat, + regmmatch_T *regmatch, long *tomatch, int duplicate_name, int flags) + FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5, 6) { bool found_match = false; for (long lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) { colnr_T col = 0; - while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, - NULL) > 0) { - // Pass the buffer number so that it gets used even for a - // dummy buffer, unless duplicate_name is set, then the - // buffer will be wiped out below. - if (qf_add_entry(qfl, - NULL, // dir - fname, - NULL, - duplicate_name ? 0 : buf->b_fnum, - ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, - false), - regmatch->startpos[0].lnum + lnum, - regmatch->endpos[0].lnum + lnum, - regmatch->startpos[0].col + 1, - regmatch->endpos[0].col + 1, - false, // vis_col - NULL, // search pattern - 0, // nr - 0, // type - true) // valid - == QF_FAIL) { - got_int = true; - break; + if (!(flags & VGR_FUZZY)) { + // Regular expression match + while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, NULL) > 0) { + // Pass the buffer number so that it gets used even for a + // dummy buffer, unless duplicate_name is set, then the + // buffer will be wiped out below. + if (qf_add_entry(qfl, + NULL, // dir + fname, + NULL, + duplicate_name ? 0 : buf->b_fnum, + ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, false), + regmatch->startpos[0].lnum + lnum, + regmatch->endpos[0].lnum + lnum, + regmatch->startpos[0].col + 1, + regmatch->endpos[0].col + 1, + false, // vis_col + NULL, // search pattern + 0, // nr + 0, // type + true) // valid + == QF_FAIL) { + got_int = true; + break; + } + found_match = true; + if (--*tomatch == 0) { + break; + } + if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) { + break; + } + col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col); + if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) { + break; + } } - found_match = true; - if (--*tomatch == 0) { - break; - } - if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) { - break; - } - col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col); - if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) { - break; + } else { + const size_t pat_len = STRLEN(spat); + char_u *const str = ml_get_buf(buf, lnum, false); + int score; + uint32_t matches[MAX_FUZZY_MATCHES]; + const size_t sz = sizeof(matches) / sizeof(matches[0]); + + // Fuzzy string match + while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) { + // Pass the buffer number so that it gets used even for a + // dummy buffer, unless duplicate_name is set, then the + // buffer will be wiped out below. + if (qf_add_entry(qfl, + NULL, // dir + fname, + NULL, + duplicate_name ? 0 : buf->b_fnum, + str, + lnum, + 0, + (colnr_T)matches[0] + col + 1, + 0, + false, // vis_col + NULL, // search pattern + 0, // nr + 0, // type + true) // valid + == QF_FAIL) { + got_int = true; + break; + } + found_match = true; + if (--*tomatch == 0) { + break; + } + if ((flags & VGR_GLOBAL) == 0) { + break; + } + col = (colnr_T)matches[pat_len - 1] + col + 1; + if (col > (colnr_T)STRLEN(str)) { + break; + } } } line_breakcheck(); @@ -5418,8 +5462,7 @@ void ex_vimgrep(exarg_T *eap) } else { // Try for a match in all lines of the buffer. // For ":1vimgrep" look for first match only. - found_match = vgr_match_buflines(qf_get_curlist(qi), - fname, buf, ®match, &tomatch, + found_match = vgr_match_buflines(qf_get_curlist(qi), fname, buf, s, ®match, &tomatch, duplicate_name, flags); if (using_dummy) { diff --git a/src/nvim/quickfix.h b/src/nvim/quickfix.h index f5178e332a..0da43e436c 100644 --- a/src/nvim/quickfix.h +++ b/src/nvim/quickfix.h @@ -7,6 +7,7 @@ // flags for skip_vimgrep_pat() #define VGR_GLOBAL 1 #define VGR_NOJUMP 2 +#define VGR_FUZZY 4 #ifdef INCLUDE_GENERATED_DECLARATIONS # include "quickfix.h.generated.h" diff --git a/src/nvim/search.c b/src/nvim/search.c index 960c0e97c0..3a2435e07a 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -48,8 +48,6 @@ #include "nvim/vim.h" #include "nvim/window.h" -typedef uint32_t matchidx_T; - #ifdef INCLUDE_GENERATED_DECLARATIONS # include "search.c.generated.h" #endif @@ -4843,13 +4841,11 @@ typedef struct { #define SCORE_NONE -9999 #define FUZZY_MATCH_RECURSION_LIMIT 10 -/// Maximum number of characters that can be fuzzy matched -#define MAXMATCHES 256 /// Compute a score for a fuzzy matched string. The matching character locations /// are in 'matches'. static int fuzzy_match_compute_score(const char_u *const str, const int strSz, - const matchidx_T *const matches, const int numMatches) + const uint32_t *const matches, const int numMatches) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE { // Initialize score @@ -4868,10 +4864,10 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, // Apply ordering bonuses for (int i = 0; i < numMatches; i++) { - const matchidx_T currIdx = matches[i]; + const uint32_t currIdx = matches[i]; if (i > 0) { - const matchidx_T prevIdx = matches[i - 1]; + const uint32_t prevIdx = matches[i - 1]; // Sequential if (currIdx == prevIdx + 1) { @@ -4887,7 +4883,7 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, const char_u *p = str; int neighbor; - for (matchidx_T sidx = 0; sidx < currIdx; sidx++) { + for (uint32_t sidx = 0; sidx < currIdx; sidx++) { neighbor = utf_ptr2char(p); MB_PTR_ADV(p); } @@ -4913,16 +4909,16 @@ static int fuzzy_match_compute_score(const char_u *const str, const int strSz, /// Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. /// @return the number of matching characters. -static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchidx_T strIdx, +static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, uint32_t strIdx, int *const outScore, const char_u *const strBegin, - const int strLen, const matchidx_T *const srcMatches, - matchidx_T *const matches, const int maxMatches, int nextMatch, + const int strLen, const uint32_t *const srcMatches, + uint32_t *const matches, const int maxMatches, int nextMatch, int *const recursionCount) FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT { // Recursion params bool recursiveMatch = false; - matchidx_T bestRecursiveMatches[MAXMATCHES]; + uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES]; int bestRecursiveScore = 0; // Count recursions @@ -4932,7 +4928,7 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi } // Detect end of strings - if (*fuzpat == '\0' || *str == '\0') { + if (*fuzpat == NUL || *str == NUL) { return 0; } @@ -4956,7 +4952,7 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi } // Recursive call that "skips" this match - matchidx_T recursiveMatches[MAXMATCHES]; + uint32_t recursiveMatches[MAX_FUZZY_MATCHES]; int recursiveScore = 0; const char_u *const next_char = str + utfc_ptr2len(str); if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen, @@ -4965,7 +4961,8 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi recursionCount)) { // Pick best recursive score if (!recursiveMatch || recursiveScore > bestRecursiveScore) { - memcpy(bestRecursiveMatches, recursiveMatches, MAXMATCHES * sizeof(recursiveMatches[0])); + memcpy(bestRecursiveMatches, recursiveMatches, + MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); bestRecursiveScore = recursiveScore; } recursiveMatch = true; @@ -5008,13 +5005,13 @@ static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, matchi /// normalized and varies with pattern. /// Recursion is limited internally (default=10) to prevent degenerate cases /// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). -/// Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES -/// characters. +/// Uses char_u for match indices. Therefore patterns are limited to +/// MAX_FUZZY_MATCHES characters. /// /// @return true if 'pat_arg' matches 'str'. Also returns the match score in /// 'outScore' and the matching character positions in 'matches'. -static bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq, - int *const outScore, matchidx_T *const matches, const int maxMatches) +bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq, + int *const outScore, uint32_t *const matches, const int maxMatches) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { const int len = mb_charlen(str); @@ -5108,7 +5105,7 @@ static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bo fuzzyItem_T *const ptrs = xcalloc(len, sizeof(fuzzyItem_T)); long i = 0; bool found_match = false; - matchidx_T matches[MAXMATCHES]; + uint32_t matches[MAX_FUZZY_MATCHES]; // For all the string items in items, get the fuzzy matching score TV_LIST_ITER(items, li, { @@ -5250,8 +5247,8 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, // To search a dict, either a callback function or a key can be // specified. dict_T *const d = argvars[2].vval.v_dict; - const dictitem_T *di; - if ((di = tv_dict_find(d, "key", -1)) != NULL) { + const dictitem_T *const di = tv_dict_find(d, "key", -1); + if (di != NULL) { if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL || *di->di_tv.vval.v_string == NUL) { semsg(_(e_invarg2), tv_get_string(&di->di_tv)); @@ -5262,7 +5259,7 @@ static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, semsg(_(e_invargval), "text_cb"); return; } - if ((di = tv_dict_find(d, "matchseq", -1)) != NULL) { + if (tv_dict_find(d, "matchseq", -1) != NULL) { matchseq = true; } } diff --git a/src/nvim/search.h b/src/nvim/search.h index 15b8d41f39..53059cc1ea 100644 --- a/src/nvim/search.h +++ b/src/nvim/search.h @@ -55,6 +55,9 @@ #define SEARCH_STAT_DEF_MAX_COUNT 99 #define SEARCH_STAT_BUF_LEN 12 +/// Maximum number of characters that can be fuzzy matched +#define MAX_FUZZY_MATCHES 256 + /// Structure containing offset definition for the last search pattern /// /// @note Only offset for the last search pattern is used, not for the last diff --git a/src/nvim/testdir/test_quickfix.vim b/src/nvim/testdir/test_quickfix.vim index f137ed5346..00679e1958 100644 --- a/src/nvim/testdir/test_quickfix.vim +++ b/src/nvim/testdir/test_quickfix.vim @@ -32,7 +32,7 @@ func s:setup_commands(cchar) command! -count -nargs=* -bang Xnfile cnfile command! -nargs=* -bang Xpfile cpfile command! -nargs=* Xexpr cexpr - command! -count -nargs=* Xvimgrep vimgrep + command! -count=999 -nargs=* Xvimgrep vimgrep command! -nargs=* Xvimgrepadd vimgrepadd command! -nargs=* Xgrep grep command! -nargs=* Xgrepadd grepadd @@ -69,7 +69,7 @@ func s:setup_commands(cchar) command! -count -nargs=* -bang Xnfile lnfile command! -nargs=* -bang Xpfile lpfile command! -nargs=* Xexpr lexpr - command! -count -nargs=* Xvimgrep lvimgrep + command! -count=999 -nargs=* Xvimgrep lvimgrep command! -nargs=* Xvimgrepadd lvimgrepadd command! -nargs=* Xgrep lgrep command! -nargs=* Xgrepadd lgrepadd @@ -5028,6 +5028,52 @@ func Test_qfbuf_update() call Xqfbuf_update('l') endfunc +" Test for the :vimgrep 'f' flag (fuzzy match) +func Xvimgrep_fuzzy_match(cchar) + call s:setup_commands(a:cchar) + + Xvimgrep /three one/f Xfile* + let l = g:Xgetlist() + call assert_equal(2, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + + Xvimgrep /the/f Xfile* + let l = g:Xgetlist() + call assert_equal(3, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 4, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + + Xvimgrep /aaa/fg Xfile* + let l = g:Xgetlist() + call assert_equal(4, len(l)) + call assert_equal(['Xfile1', 2, 1, 'aaaaaa'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile1', 2, 4, 'aaaaaa'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 1, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + call assert_equal(['Xfile2', 4, 9, 'aaathreeaaa'], + \ [bufname(l[3].bufnr), l[3].lnum, l[3].col, l[3].text]) + + call assert_fails('Xvimgrep /xyz/fg Xfile*', 'E480:') +endfunc + +func Test_vimgrep_fuzzy_match() + call writefile(['one two three', 'aaaaaa'], 'Xfile1') + call writefile(['one', 'three one two', 'two', 'aaathreeaaa'], 'Xfile2') + call Xvimgrep_fuzzy_match('c') + call Xvimgrep_fuzzy_match('l') + call delete('Xfile1') + call delete('Xfile2') +endfunc + " Test for getting a specific item from a quickfix list func Xtest_getqflist_by_idx(cchar) call s:setup_commands(a:cchar) From 02e74314459621bfd18b3b3a1e3ac261658dc096 Mon Sep 17 00:00:00 2001 From: Sean Dewar Date: Sun, 2 Jan 2022 00:42:09 +0000 Subject: [PATCH 8/8] docs(fuzzy-match): cherry-pick latest changes https://github.com/vim/vim/commit/53f7fccc9413c9f770694b56f40f242d383b2d5f https://github.com/vim/vim/commit/1b884a0053982335f644eec6c71027706bf3c522 https://github.com/vim/vim/commit/4c295027a426986566cd7a76c47a6d3a529727e7 https://github.com/vim/vim/commit/3ec3217f0491e9ba8aa8ea02f7e454cd19a287ef --- runtime/doc/builtin.txt | 5 ++++- runtime/doc/pattern.txt | 33 +++++++++++++++++++++++++++++++++ runtime/doc/quickfix.txt | 7 +++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 45c03fc447..b7a9a06b3c 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -4862,7 +4862,7 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()* GetText()->matchend('word') matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* - If {list} is a list of strings, then returns a list with all + If {list} is a list of strings, then returns a |List| with all the strings in {list} that fuzzy match {str}. The strings in the returned list are sorted based on the matching score. @@ -4895,6 +4895,9 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* empty list is returned. If length of {str} is greater than 256, then returns an empty list. + Refer to |fuzzy-match| for more information about fuzzy + matching strings. + Example: > :echo matchfuzzy(["clay", "crow"], "cay") < results in ["clay"]. > diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 634145da3e..42005b0d78 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1421,5 +1421,38 @@ Finally, these constructs are unique to Perl: are suggested to use ":match" for manual matching and ":2match" for another plugin. +============================================================================== +11. Fuzzy matching *fuzzy-match* + +Fuzzy matching refers to matching strings using a non-exact search string. +Fuzzy matching will match a string, if all the characters in the search string +are present anywhere in the string in the same order. Case is ignored. In a +matched string, other characters can be present between two consecutive +characters in the search string. If the search string has multiple words, then +each word is matched separately. So the words in the search string can be +present in any order in a string. + +Fuzzy matching assigns a score for each matched string based on the following +criteria: + - The number of sequentially matching characters. + - The number of characters (distance) between two consecutive matching + characters. + - Matches at the beginning of a word + - Matches at a camel case character (e.g. Case in CamelCase) + - Matches after a path separator or a hyphen. + - The number of unmatched characters in a string. +The matching string with the highest score is returned first. + +For example, when you search for the "get pat" string using fuzzy matching, it +will match the strings "GetPattern", "PatternGet", "getPattern", "patGetter", +"getSomePattern", "MatchpatternGet" etc. + +The functions |matchfuzzy()| and |matchfuzzypos()| can be used to fuzzy search +a string in a List of strings. The matchfuzzy() function returns a List of +matching strings. The matchfuzzypos() functions returns the List of matches, +the matching positions and the fuzzy match scores. + +The "f" flag of `:vimgrep` enables fuzzy matching. + vim:tw=78:ts=8:noet:ft=help:norl: diff --git a/runtime/doc/quickfix.txt b/runtime/doc/quickfix.txt index c7289cfca6..ed736ad4eb 100644 --- a/runtime/doc/quickfix.txt +++ b/runtime/doc/quickfix.txt @@ -1014,6 +1014,13 @@ commands can be combined to create a NewGrep command: > updated. With the [!] any changes in the current buffer are abandoned. + 'f' When the 'f' flag is specified, fuzzy string + matching is used to find matching lines. In this + case, {pattern} is treated as a literal string + instead of a regular expression. See + |fuzzy-match| for more information about fuzzy + matching strings. + |QuickFixCmdPre| and |QuickFixCmdPost| are triggered. A file that is opened for matching may use a buffer number, but it is reused if possible to avoid