mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
vim-patch:9.1.1046: fuzzymatching doesn't prefer matching camelcase (#32155)
Problem: fuzzymatching doesn't prefer matching camelcase
(Tomasz N)
Solution: Add extra score when case matches (glepnir)
fixes: vim/vim#16434
closes: vim/vim#16439
9dfc7e5e61
Co-authored-by: glepnir <glephunter@gmail.com>
This commit is contained in:
parent
a8b6fa07c4
commit
a66f6add29
@ -2973,6 +2973,10 @@ typedef struct {
|
||||
#define CAMEL_BONUS 30
|
||||
/// bonus if the first letter is matched
|
||||
#define FIRST_LETTER_BONUS 15
|
||||
/// bonus if exact match
|
||||
#define EXACT_MATCH_BONUS 100
|
||||
/// bonus if case match when no ignorecase
|
||||
#define CASE_MATCH_BONUS 25
|
||||
/// penalty applied for every letter in str before the first match
|
||||
#define LEADING_LETTER_PENALTY (-5)
|
||||
/// maximum penalty for leading letters
|
||||
@ -2988,16 +2992,23 @@ typedef struct {
|
||||
|
||||
/// Compute a score for a fuzzy matched string. The matching character locations
|
||||
/// are in "matches".
|
||||
static int fuzzy_match_compute_score(const char *const str, const int strSz,
|
||||
const uint32_t *const matches, const int numMatches)
|
||||
static int fuzzy_match_compute_score(const char *const fuzpat, const char *const str,
|
||||
const int strSz, const uint32_t *const matches,
|
||||
const int numMatches)
|
||||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
|
||||
{
|
||||
assert(numMatches > 0); // suppress clang "result of operation is garbage"
|
||||
const char *p = str;
|
||||
uint32_t sidx = 0;
|
||||
bool is_exact_match = true;
|
||||
const char *const orig_fuzpat = fuzpat - numMatches;
|
||||
const char *curpat = orig_fuzpat;
|
||||
int pat_idx = 0;
|
||||
// Track consecutive camel case matches
|
||||
int consecutive_camel = 0;
|
||||
|
||||
// Initialize score
|
||||
int score = 100;
|
||||
bool is_exact_match = true;
|
||||
|
||||
// Apply leading letter penalty
|
||||
int penalty = LEADING_LETTER_PENALTY * (int)matches[0];
|
||||
@ -3013,6 +3024,7 @@ static int fuzzy_match_compute_score(const char *const str, const int strSz,
|
||||
// Apply ordering bonuses
|
||||
for (int i = 0; i < numMatches; i++) {
|
||||
const uint32_t currIdx = matches[i];
|
||||
bool is_camel = false;
|
||||
|
||||
if (i > 0) {
|
||||
const uint32_t prevIdx = matches[i - 1];
|
||||
@ -3022,9 +3034,12 @@ static int fuzzy_match_compute_score(const char *const str, const int strSz,
|
||||
score += SEQUENTIAL_BONUS;
|
||||
} else {
|
||||
score += GAP_PENALTY * (int)(currIdx - prevIdx);
|
||||
// Reset consecutive camel count on gap
|
||||
consecutive_camel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int curr;
|
||||
// Check for bonuses based on neighbor character value
|
||||
if (currIdx > 0) {
|
||||
// Camel case
|
||||
@ -3035,10 +3050,19 @@ static int fuzzy_match_compute_score(const char *const str, const int strSz,
|
||||
MB_PTR_ADV(p);
|
||||
sidx++;
|
||||
}
|
||||
const int curr = utf_ptr2char(p);
|
||||
curr = utf_ptr2char(p);
|
||||
|
||||
// Enhanced camel case scoring
|
||||
if (mb_islower(neighbor) && mb_isupper(curr)) {
|
||||
score += CAMEL_BONUS;
|
||||
score += CAMEL_BONUS * 2; // Double the camel case bonus
|
||||
is_camel = true;
|
||||
consecutive_camel++;
|
||||
// Additional bonus for consecutive camel
|
||||
if (consecutive_camel > 1) {
|
||||
score += CAMEL_BONUS;
|
||||
}
|
||||
} else {
|
||||
consecutive_camel = 0;
|
||||
}
|
||||
|
||||
// Bonus if the match follows a separator character
|
||||
@ -3050,16 +3074,36 @@ static int fuzzy_match_compute_score(const char *const str, const int strSz,
|
||||
} else {
|
||||
// First letter
|
||||
score += FIRST_LETTER_BONUS;
|
||||
curr = utf_ptr2char(p);
|
||||
}
|
||||
|
||||
// Case matching bonus
|
||||
if (mb_isalpha(curr)) {
|
||||
while (pat_idx < i && *curpat) {
|
||||
MB_PTR_ADV(curpat);
|
||||
pat_idx++;
|
||||
}
|
||||
|
||||
if (curr == utf_ptr2char(curpat)) {
|
||||
score += CASE_MATCH_BONUS;
|
||||
// Extra bonus for exact case match in camel
|
||||
if (is_camel) {
|
||||
score += CASE_MATCH_BONUS / 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check exact match condition
|
||||
if (currIdx != (uint32_t)i) {
|
||||
is_exact_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Boost score for exact matches
|
||||
if (is_exact_match && numMatches == strSz) {
|
||||
score += 100;
|
||||
score += EXACT_MATCH_BONUS;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
@ -3138,7 +3182,7 @@ static int fuzzy_match_recursive(const char *fuzpat, const char *str, uint32_t s
|
||||
|
||||
// Calculate score
|
||||
if (matched) {
|
||||
*outScore = fuzzy_match_compute_score(strBegin, strLen, matches, nextMatch);
|
||||
*outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, nextMatch);
|
||||
}
|
||||
|
||||
// Return best result
|
||||
|
@ -100,15 +100,15 @@ endfunc
|
||||
|
||||
" Test for the matchfuzzypos() function
|
||||
func Test_matchfuzzypos()
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
|
||||
call assert_equal([['hello', 'hello world hello world'],
|
||||
\ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [375, 257]],
|
||||
\ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]],
|
||||
\ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
|
||||
call assert_equal([['aaaaaaa'], [[0, 1, 2]], [191]], matchfuzzypos(['aaaaaaa'], 'aaa'))
|
||||
call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0]], [112]], matchfuzzypos(['a b'], ' a '))
|
||||
call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa'))
|
||||
call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a '))
|
||||
call assert_equal([[], [], []], matchfuzzypos(['a b'], ' '))
|
||||
call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab'))
|
||||
let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
|
||||
@ -117,33 +117,33 @@ func Test_matchfuzzypos()
|
||||
call assert_equal([[], [], []], matchfuzzypos([], 'abc'))
|
||||
|
||||
" match in a long string
|
||||
call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-135]],
|
||||
call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-60]],
|
||||
\ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc'))
|
||||
|
||||
" preference for camel case match
|
||||
call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [189]], matchfuzzypos(['xabcxxaBc'], 'abc'))
|
||||
call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc'))
|
||||
" preference for match after a separator (_ or space)
|
||||
call assert_equal([['xabx_ab'], [[5, 6]], [145]], matchfuzzypos(['xabx_ab'], 'ab'))
|
||||
call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab'))
|
||||
" preference for leading letter match
|
||||
call assert_equal([['abcxabc'], [[0, 1]], [150]], matchfuzzypos(['abcxabc'], 'ab'))
|
||||
call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab'))
|
||||
" preference for sequential match
|
||||
call assert_equal([['aobncedone'], [[7, 8, 9]], [158]], matchfuzzypos(['aobncedone'], 'one'))
|
||||
call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one'))
|
||||
" best recursive match
|
||||
call assert_equal([['xoone'], [[2, 3, 4]], [168]], matchfuzzypos(['xoone'], 'one'))
|
||||
call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one'))
|
||||
|
||||
" match multiple words (separated by space)
|
||||
call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
|
||||
call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
|
||||
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1}))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [326]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
|
||||
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two'))
|
||||
call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t "))
|
||||
call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [757]], ['grace']->matchfuzzypos('race ace grace'))
|
||||
call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace'))
|
||||
|
||||
let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]],
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
|
||||
\ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]],
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
|
||||
\ matchfuzzypos(l, 'cam', {'key' : 'val'}))
|
||||
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}}))
|
||||
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'}))
|
||||
@ -161,6 +161,18 @@ func Test_matchfuzzypos()
|
||||
" Nvim doesn't have null functions
|
||||
" call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
|
||||
|
||||
" case match
|
||||
call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma'))
|
||||
call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma'))
|
||||
" CamelCase has high weight even case match
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc'))
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc'))
|
||||
call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT'))
|
||||
call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
|
||||
\ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase'))
|
||||
call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
|
||||
\ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM'))
|
||||
|
||||
let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
|
||||
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
|
||||
endfunc
|
||||
@ -211,12 +223,12 @@ func Test_matchfuzzypos_mbyte()
|
||||
call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
|
||||
" reverse the order of characters
|
||||
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ'))
|
||||
call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [222, 113]],
|
||||
call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]],
|
||||
\ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
|
||||
call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
|
||||
\ [[0, 1], [0, 1], [0, 1], [0, 2]], [151, 148, 145, 110]],
|
||||
\ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]],
|
||||
\ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
|
||||
call assert_equal([['ααααααα'], [[0, 1, 2]], [191]],
|
||||
call assert_equal([['ααααααα'], [[0, 1, 2]], [216]],
|
||||
\ matchfuzzypos(['ααααααα'], 'ααα'))
|
||||
|
||||
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl'))
|
||||
@ -229,10 +241,10 @@ func Test_matchfuzzypos_mbyte()
|
||||
call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘'))
|
||||
|
||||
" match in a long string
|
||||
call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-135]],
|
||||
call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-110]],
|
||||
\ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ'))
|
||||
" preference for camel case match
|
||||
call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [189]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
|
||||
call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
|
||||
" preference for match after a separator (_ or space)
|
||||
call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
|
||||
" preference for leading letter match
|
||||
|
Loading…
Reference in New Issue
Block a user