vim-patch:7.4.293

Problem:    It is not possible to ignore composing characters at a
            specific point in a pattern.
Solution:   Add the %C item.

https://code.google.com/p/vim/source/detail?r=10fc95f48546f438648b8357062e93c9c2c0a377
This commit is contained in:
Damián Silvani 2014-07-20 13:15:21 -03:00
parent 6d45309797
commit 9ea28e1903
3 changed files with 53 additions and 10 deletions

View File

@ -258,6 +258,7 @@
#define RE_MARK 207 /* mark cmp Match mark position */ #define RE_MARK 207 /* mark cmp Match mark position */
#define RE_VISUAL 208 /* Match Visual area */ #define RE_VISUAL 208 /* Match Visual area */
#define RE_COMPOSING 209 // any composing characters
/* /*
* Magic characters have a special meaning, they don't match literally. * Magic characters have a special meaning, they don't match literally.
@ -2024,6 +2025,10 @@ static char_u *regatom(int *flagp)
ret = regnode(RE_VISUAL); ret = regnode(RE_VISUAL);
break; break;
case 'C':
ret = regnode(RE_COMPOSING);
break;
/* \%[abc]: Emit as a list of branches, all ending at the last /* \%[abc]: Emit as a list of branches, all ending at the last
* branch which matches nothing. */ * branch which matches nothing. */
case '[': case '[':
@ -4099,10 +4104,12 @@ regmatch (
status = RA_NOMATCH; status = RA_NOMATCH;
} }
} }
// Check for following composing character. // Check for following composing character, unless %C
// follows (skips over all composing chars).
if (status != RA_NOMATCH && enc_utf8 if (status != RA_NOMATCH && enc_utf8
&& UTF_COMPOSINGLIKE(reginput, reginput + len) && UTF_COMPOSINGLIKE(reginput, reginput + len)
&& !ireg_icombine) { && !ireg_icombine
&& OP(next) != RE_COMPOSING) {
// raaron: This code makes a composing character get // raaron: This code makes a composing character get
// ignored, which is the correct behavior (sometimes) // ignored, which is the correct behavior (sometimes)
// for voweled Hebrew texts. // for voweled Hebrew texts.
@ -4167,6 +4174,15 @@ regmatch (
status = RA_NOMATCH; status = RA_NOMATCH;
break; break;
case RE_COMPOSING:
if (enc_utf8) {
// Skip composing characters.
while (utf_iscomposing(utf_ptr2char(reginput))) {
mb_cptr_adv(reginput);
}
}
break;
case NOTHING: case NOTHING:
break; break;

View File

@ -85,6 +85,7 @@ enum {
NFA_COMPOSING, /* Next nodes in NFA are part of the NFA_COMPOSING, /* Next nodes in NFA are part of the
composing multibyte char */ composing multibyte char */
NFA_END_COMPOSING, /* End of a composing char in the NFA */ NFA_END_COMPOSING, /* End of a composing char in the NFA */
NFA_ANY_COMPOSING, // \%C: Any composing characters.
NFA_OPT_CHARS, /* \%[abc] */ NFA_OPT_CHARS, /* \%[abc] */
/* The following are used only in the postfix form, not in the NFA */ /* The following are used only in the postfix form, not in the NFA */
@ -1350,6 +1351,10 @@ static int nfa_regatom(void)
EMIT(NFA_VISUAL); EMIT(NFA_VISUAL);
break; break;
case 'C':
EMIT(NFA_ANY_COMPOSING);
break;
case '[': case '[':
{ {
int n; int n;
@ -2259,6 +2264,7 @@ static void nfa_set_code(int c)
case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break; case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break;
case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break; case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break;
case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break; case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break;
case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break;
case NFA_STAR: STRCPY(code, "NFA_STAR "); break; case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
@ -2716,6 +2722,7 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
case NFA_NLOWER_IC: case NFA_NLOWER_IC:
case NFA_UPPER_IC: case NFA_UPPER_IC:
case NFA_NUPPER_IC: case NFA_NUPPER_IC:
case NFA_ANY_COMPOSING:
/* possibly non-ascii */ /* possibly non-ascii */
if (has_mbyte) if (has_mbyte)
len += 3; len += 3;
@ -3714,6 +3721,7 @@ static int match_follows(nfa_state_T *startstate, int depth)
continue; continue;
case NFA_ANY: case NFA_ANY:
case NFA_ANY_COMPOSING:
case NFA_IDENT: case NFA_IDENT:
case NFA_SIDENT: case NFA_SIDENT:
case NFA_KWORD: case NFA_KWORD:
@ -3943,7 +3951,7 @@ skip_add:
#endif #endif
switch (state->c) { switch (state->c) {
case NFA_MATCH: case NFA_MATCH:
nfa_match = TRUE; //nfa_match = TRUE;
break; break;
case NFA_SPLIT: case NFA_SPLIT:
@ -4573,6 +4581,7 @@ static int failure_chance(nfa_state_T *state, int depth)
case NFA_MATCH: case NFA_MATCH:
case NFA_MCLOSE: case NFA_MCLOSE:
case NFA_ANY_COMPOSING:
/* empty match works always */ /* empty match works always */
return 0; return 0;
@ -4951,6 +4960,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
switch (t->state->c) { switch (t->state->c) {
case NFA_MATCH: case NFA_MATCH:
{ {
// If the match ends before a composing characters and
// ireg_icombine is not set, that is not really a match.
if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) {
break;
}
nfa_match = TRUE; nfa_match = TRUE;
copy_sub(&submatch->norm, &t->subs.norm); copy_sub(&submatch->norm, &t->subs.norm);
if (nfa_has_zsubexpr) if (nfa_has_zsubexpr)
@ -5430,6 +5444,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
} }
break; break;
case NFA_ANY_COMPOSING:
// On a composing character skip over it. Otherwise do
// nothing. Always matches.
if (enc_utf8 && utf_iscomposing(curc)) {
add_off = clen;
} else {
add_here = TRUE;
add_off = 0;
}
add_state = t->state->out;
break;
/* /*
* Character classes like \a for alpha, \d for digit etc. * Character classes like \a for alpha, \d for digit etc.
*/ */
@ -5769,12 +5795,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
if (!result && ireg_ic) if (!result && ireg_ic)
result = vim_tolower(c) == vim_tolower(curc); result = vim_tolower(c) == vim_tolower(curc);
/* If there is a composing character which is not being
* ignored there can be no match. Match with composing // If ireg_icombine is not set only skip over the character
* character uses NFA_COMPOSING above. */ // itself. When it is set skip over composing characters.
if (result && enc_utf8 && !ireg_icombine if (result && enc_utf8 && !ireg_icombine) {
&& clen != utf_char2len(curc)) clen = utf_char2len(curc);
result = FALSE; }
ADD_STATE_IF_MATCH(t->state); ADD_STATE_IF_MATCH(t->state);
break; break;
} }

View File

@ -256,7 +256,7 @@ static int included_patches[] = {
//296, //296,
295, 295,
//294, //294,
//293, 293,
292, 292,
291, 291,
290, 290,