vim-patch:8.0.0020

Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

6100d02aab
This commit is contained in:
Jurica Bradaric 2017-07-25 14:18:08 +02:00 committed by James McCoy
parent dc3c06e73d
commit fe0bcc0800
No known key found for this signature in database
GPG Key ID: DFE691AE331BA3DB
6 changed files with 674 additions and 558 deletions

View File

@ -861,8 +861,7 @@ Exceptions:
Substitute with an expression *sub-replace-expression*
*sub-replace-\=* *s/\=*
When the substitute string starts with "\=" the remainder is interpreted as an
expression. This does not work recursively: a |substitute()| function inside
the expression cannot use "\=" for the substitute string.
expression.
The special meaning for characters as mentioned at |sub-replace-special| does
not apply except for "<CR>". A <NL> character is used as a line break, you

View File

@ -6011,9 +6011,9 @@ range({expr} [, {max} [, {stride}]]) *range()*
*readfile()*
readfile({fname} [, {binary} [, {max}]])
Read file {fname} and return a |List|, each line of the file
as an item. Lines broken at NL characters. Macintosh files
separated with CR will result in a single long line (unless a
NL appears somewhere).
as an item. Lines are broken at NL characters. Macintosh
files separated with CR will result in a single long line
(unless a NL appears somewhere).
All NUL characters are replaced with a NL character.
When {binary} contains "b" binary mode is used:
- When the last line ends in a NL an extra empty list item is
@ -7330,6 +7330,9 @@ submatch({nr}[, {list}]) *submatch()* *E935*
|substitute()| this list will always contain one or zero
items, since there are no real line breaks.
When substitute() is used recursively only the submatches in
the current (deepest) call can be obtained.
Example: >
:s/\d\+/\=submatch(0) + 1/
< This finds the first number in the line and adds one to it.

File diff suppressed because it is too large Load Diff

View File

@ -4882,7 +4882,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
int c2_len = PTR2LEN(s2);
int c2 = PTR2CHAR(s2);
if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2)))
if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2)))
|| c1_len != c2_len) {
match = false;
break;
@ -4895,13 +4895,13 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
&& !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) {
cleanup_subexpr();
if (REG_MULTI) {
reg_startpos[0].lnum = reglnum;
reg_startpos[0].col = col;
reg_endpos[0].lnum = reglnum;
reg_endpos[0].col = s2 - regline;
rex.reg_startpos[0].lnum = reglnum;
rex.reg_startpos[0].col = col;
rex.reg_endpos[0].lnum = reglnum;
rex.reg_endpos[0].col = s2 - regline;
} else {
reg_startp[0] = regline + col;
reg_endp[0] = s2;
rex.reg_startp[0] = regline + col;
rex.reg_endp[0] = s2;
}
return 1L;
}
@ -5116,8 +5116,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MATCH:
{
// If the match ends before a composing characters and
// ireg_icombine is not set, that is not really a match.
if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) {
// rex.reg_icombine is not set, that is not really a match.
if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) {
break;
}
nfa_match = true;
@ -5400,15 +5400,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class;
// Get class of current and previous char (if it exists).
this_class = mb_get_class_tab(reginput, reg_buf->b_chartab);
this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
if (this_class <= 1) {
result = false;
} else if (reg_prev_class() == this_class) {
result = false;
}
} else if (!vim_iswordc_buf(curc, reg_buf)
} else if (!vim_iswordc_buf(curc, rex.reg_buf)
|| (reginput > regline
&& vim_iswordc_buf(reginput[-1], reg_buf))) {
&& vim_iswordc_buf(reginput[-1], rex.reg_buf))) {
result = false;
}
if (result) {
@ -5425,15 +5425,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class, prev_class;
// Get class of current and previous char (if it exists).
this_class = mb_get_class_tab(reginput, reg_buf->b_chartab);
this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
prev_class = reg_prev_class();
if (this_class == prev_class
|| prev_class == 0 || prev_class == 1) {
result = false;
}
} else if (!vim_iswordc_buf(reginput[-1], reg_buf)
} else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)
|| (reginput[0] != NUL
&& vim_iswordc_buf(curc, reg_buf))) {
&& vim_iswordc_buf(curc, rex.reg_buf))) {
result = false;
}
if (result) {
@ -5444,14 +5444,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_BOF:
if (reglnum == 0 && reginput == regline
&& (!REG_MULTI || reg_firstlnum == 1)) {
&& (!REG_MULTI || rex.reg_firstlnum == 1)) {
add_here = true;
add_state = t->state->out;
}
break;
case NFA_EOF:
if (reglnum == reg_maxline && curc == NUL) {
if (reglnum == rex.reg_maxline && curc == NUL) {
add_here = true;
add_state = t->state->out;
}
@ -5475,7 +5475,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// (no preceding character).
len += mb_char2len(mc);
}
if (ireg_icombine && len == 0) {
if (rex.reg_icombine && len == 0) {
// If \Z was present, then ignore composing characters.
// When ignoring the base character this always matches.
if (sta->c != curc) {
@ -5526,14 +5526,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_NEWL:
if (curc == NUL && !reg_line_lbr && REG_MULTI
&& reglnum <= reg_maxline) {
if (curc == NUL && !rex.reg_line_lbr && REG_MULTI
&& reglnum <= rex.reg_maxline) {
go_to_nextline = true;
// Pass -1 for the offset, which means taking the position
// at the start of the next line.
add_state = t->state->out;
add_off = -1;
} else if (curc == '\n' && reg_line_lbr) {
} else if (curc == '\n' && rex.reg_line_lbr) {
// match \n as if it is an ordinary character
add_state = t->state->out;
add_off = 1;
@ -5574,7 +5574,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
result = result_if_matched;
break;
}
if (ireg_ic) {
if (rex.reg_ic) {
int curc_low = mb_tolower(curc);
int done = false;
@ -5591,7 +5591,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
} else if (state->c < 0 ? check_char_class(state->c, curc)
: (curc == state->c
|| (ireg_ic && mb_tolower(curc)
|| (rex.reg_ic && mb_tolower(curc)
== mb_tolower(state->c)))) {
result = result_if_matched;
break;
@ -5639,13 +5639,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_KWORD: // \k
result = vim_iswordp_buf(reginput, reg_buf);
result = vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_SKWORD: // \K
result = !ascii_isdigit(curc)
&& vim_iswordp_buf(reginput, reg_buf);
&& vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
@ -5760,24 +5760,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_LOWER_IC: // [a-z]
result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NLOWER_IC: // [^a-z]
result = curc != NUL
&& !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
&& !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_UPPER_IC: // [A-Z]
result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NUPPER_IC: // [^A-Z]
result = curc != NUL
&& !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
&& !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
@ -5851,13 +5851,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_LNUM_GT:
case NFA_LNUM_LT:
assert(t->state->val >= 0
&& !((reg_firstlnum > 0 && reglnum > LONG_MAX - reg_firstlnum)
|| (reg_firstlnum <0 && reglnum < LONG_MIN + reg_firstlnum))
&& reglnum + reg_firstlnum >= 0);
&& !((rex.reg_firstlnum > 0
&& reglnum > LONG_MAX - rex.reg_firstlnum)
|| (rex.reg_firstlnum < 0
&& reglnum < LONG_MIN + rex.reg_firstlnum))
&& reglnum + rex.reg_firstlnum >= 0);
result = (REG_MULTI
&& nfa_re_num_cmp((uintmax_t)t->state->val,
t->state->c - NFA_LNUM,
(uintmax_t)(reglnum + reg_firstlnum)));
(uintmax_t)(reglnum + rex.reg_firstlnum)));
if (result) {
add_here = true;
add_state = t->state->out;
@ -5893,7 +5895,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
result = false;
win_T *wp = reg_win == NULL ? curwin : reg_win;
win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
if (op == 1 && col - 1 > t->state->val && col > 100) {
long ts = wp->w_buffer->b_p_ts;
@ -5920,18 +5922,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MARK_GT:
case NFA_MARK_LT:
{
pos_T *pos = getmark_buf(reg_buf, t->state->val, FALSE);
pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false);
// Compare the mark position to the match position.
result = (pos != NULL // mark doesn't exist
&& pos->lnum > 0 // mark isn't set in reg_buf
&& (pos->lnum == reglnum + reg_firstlnum
&& (pos->lnum == reglnum + rex.reg_firstlnum
? (pos->col == (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK
: (pos->col < (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT))
: (pos->lnum < reglnum + reg_firstlnum
: (pos->lnum < reglnum + rex.reg_firstlnum
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT)));
if (result) {
@ -5942,10 +5944,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_CURSOR:
result = (reg_win != NULL
&& (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
result = (rex.reg_win != NULL
&& (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum)
&& ((colnr_T)(reginput - regline)
== reg_win->w_cursor.col));
== rex.reg_win->w_cursor.col));
if (result) {
add_here = true;
add_state = t->state->out;
@ -5995,13 +5997,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
#endif
result = (c == curc);
if (!result && ireg_ic) {
if (!result && rex.reg_ic) {
result = mb_tolower(c) == mb_tolower(curc);
}
// If ireg_icombine is not set only skip over the character
// If rex.reg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
if (result && enc_utf8 && !ireg_icombine) {
if (result && enc_utf8 && !rex.reg_icombine) {
clen = utf_ptr2len(reginput);
}
@ -6109,8 +6111,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
&& ((toplevel
&& reglnum == 0
&& clen != 0
&& (ireg_maxcol == 0
|| (colnr_T)(reginput - regline) < ireg_maxcol))
&& (rex.reg_maxcol == 0
|| (colnr_T)(reginput - regline) < rex.reg_maxcol))
|| (nfa_endp != NULL
&& (REG_MULTI
? (reglnum < nfa_endp->se_u.pos.lnum
@ -6145,7 +6147,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// Checking if the required start character matches is
// cheaper than adding a state that won't match.
c = PTR2CHAR(reginput + clen);
if (c != prog->regstart && (!ireg_ic || mb_tolower(c)
if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c)
!= mb_tolower(prog->regstart))) {
#ifdef REGEXP_DEBUG
fprintf(log_fd,
@ -6271,34 +6273,37 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm)
cleanup_subexpr();
if (REG_MULTI) {
for (i = 0; i < subs.norm.in_use; i++) {
reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
reg_startpos[i].col = subs.norm.list.multi[i].start_col;
rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col;
reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
reg_endpos[i].col = subs.norm.list.multi[i].end_col;
rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col;
}
if (reg_startpos[0].lnum < 0) {
reg_startpos[0].lnum = 0;
reg_startpos[0].col = col;
if (rex.reg_startpos[0].lnum < 0) {
rex.reg_startpos[0].lnum = 0;
rex.reg_startpos[0].col = col;
}
if (rex.reg_endpos[0].lnum < 0) {
// pattern has a \ze but it didn't match, use current end
rex.reg_endpos[0].lnum = reglnum;
rex.reg_endpos[0].col = (int)(reginput - regline);
} else {
// Use line number of "\ze".
reglnum = rex.reg_endpos[0].lnum;
}
if (reg_endpos[0].lnum < 0) {
/* pattern has a \ze but it didn't match, use current end */
reg_endpos[0].lnum = reglnum;
reg_endpos[0].col = (int)(reginput - regline);
} else
/* Use line number of "\ze". */
reglnum = reg_endpos[0].lnum;
} else {
for (i = 0; i < subs.norm.in_use; i++) {
reg_startp[i] = subs.norm.list.line[i].start;
reg_endp[i] = subs.norm.list.line[i].end;
rex.reg_startp[i] = subs.norm.list.line[i].start;
rex.reg_endp[i] = subs.norm.list.line[i].end;
}
if (reg_startp[0] == NULL)
reg_startp[0] = regline + col;
if (reg_endp[0] == NULL)
reg_endp[0] = reginput;
if (rex.reg_startp[0] == NULL) {
rex.reg_startp[0] = regline + col;
}
if (rex.reg_endp[0] == NULL) {
rex.reg_endp[0] = reginput;
}
}
/* Package any found \z(...\) matches for export. Default is none. */
@ -6352,14 +6357,14 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
colnr_T col = startcol;
if (REG_MULTI) {
prog = (nfa_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0); /* relative to the cursor */
reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos;
prog = (nfa_regprog_T *)rex.reg_mmatch->regprog;
line = reg_getline((linenr_T)0); // relative to the cursor
rex.reg_startpos = rex.reg_mmatch->startpos;
rex.reg_endpos = rex.reg_mmatch->endpos;
} else {
prog = (nfa_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp;
reg_endp = reg_match->endp;
prog = (nfa_regprog_T *)rex.reg_match->regprog;
rex.reg_startp = rex.reg_match->startp;
rex.reg_endp = rex.reg_match->endp;
}
/* Be paranoid... */
@ -6368,15 +6373,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
goto theend;
}
/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
if (prog->regflags & RF_ICASE)
ireg_ic = TRUE;
else if (prog->regflags & RF_NOICASE)
ireg_ic = FALSE;
// If pattern contains "\c" or "\C": overrule value of rex.reg_ic
if (prog->regflags & RF_ICASE) {
rex.reg_ic = true;
} else if (prog->regflags & RF_NOICASE) {
rex.reg_ic = false;
}
/* If pattern contains "\Z" overrule value of ireg_icombine */
if (prog->regflags & RF_ICOMBINE)
ireg_icombine = TRUE;
// If pattern contains "\Z" overrule value of rex.reg_icombine
if (prog->regflags & RF_ICOMBINE) {
rex.reg_icombine = true;
}
regline = line;
reglnum = 0; /* relative to line */
@ -6405,17 +6412,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
if (skip_to_start(prog->regstart, &col) == FAIL)
return 0L;
/* If match_text is set it contains the full text that must match.
* Nothing else to try. Doesn't handle combining chars well. */
if (prog->match_text != NULL
&& !ireg_icombine
)
// If match_text is set it contains the full text that must match.
// Nothing else to try. Doesn't handle combining chars well.
if (prog->match_text != NULL && !rex.reg_icombine) {
return find_match_text(col, prog->regstart, prog->match_text);
}
}
/* If the start column is past the maximum column: no need to try. */
if (ireg_maxcol > 0 && col >= ireg_maxcol)
// If the start column is past the maximum column: no need to try.
if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) {
goto theend;
}
nstate = prog->nstate;
for (i = 0; i < nstate; ++i) {
@ -6567,15 +6574,15 @@ nfa_regexec_nl (
bool line_lbr
)
{
reg_match = rmp;
reg_mmatch = NULL;
reg_maxline = 0;
reg_line_lbr = line_lbr;
reg_buf = curbuf;
reg_win = NULL;
ireg_ic = rmp->rm_ic;
ireg_icombine = FALSE;
ireg_maxcol = 0;
rex.reg_match = rmp;
rex.reg_mmatch = NULL;
rex.reg_maxline = 0;
rex.reg_line_lbr = line_lbr;
rex.reg_buf = curbuf;
rex.reg_win = NULL;
rex.reg_ic = rmp->rm_ic;
rex.reg_icombine = false;
rex.reg_maxcol = 0;
return nfa_regexec_both(line, col, NULL);
}
@ -6616,16 +6623,16 @@ nfa_regexec_nl (
static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf,
linenr_T lnum, colnr_T col, proftime_T *tm)
{
reg_match = NULL;
reg_mmatch = rmp;
reg_buf = buf;
reg_win = win;
reg_firstlnum = lnum;
reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
reg_line_lbr = FALSE;
ireg_ic = rmp->rmm_ic;
ireg_icombine = FALSE;
ireg_maxcol = rmp->rmm_maxcol;
rex.reg_match = NULL;
rex.reg_mmatch = rmp;
rex.reg_buf = buf;
rex.reg_win = win;
rex.reg_firstlnum = lnum;
rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
rex.reg_line_lbr = false;
rex.reg_ic = rmp->rmm_ic;
rex.reg_icombine = false;
rex.reg_maxcol = rmp->rmm_maxcol;
return nfa_regexec_both(NULL, col, tm);
}

View File

@ -384,9 +384,10 @@ func Test_substitute_expr()
\ {-> submatch(2) . submatch(3) . submatch(1)}, ''))
func Recurse()
return substitute('yyy', 'y*', {-> g:val}, '')
return substitute('yyy', 'y\(.\)y', {-> submatch(1)}, '')
endfunc
call assert_equal('--', substitute('xxx', 'x*', {-> '-' . Recurse() . '-'}, ''))
" recursive call works
call assert_equal('-y-x-', substitute('xxx', 'x\(.\)x', {-> '-' . Recurse() . '-' . submatch(1) . '-'}, ''))
endfunc
func Test_invalid_submatch()

View File

@ -709,7 +709,7 @@ static const int included_patches[] = {
23,
// 22 NA
// 21,
// 20,
20,
19,
// 18,
17,