refactor: more clint (#20910)

This commit is contained in:
Lewis Russell 2022-11-07 10:21:44 +00:00 committed by GitHub
parent e9c1cb71f8
commit bdb98de2d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 594 additions and 876 deletions

View File

@ -729,7 +729,7 @@ void ex_hardcopy(exarg_T *eap)
}
assert(prtpos.bytes_printed <= SIZE_MAX / 100);
sprintf((char *)IObuff, _("Printing page %d (%zu%%)"),
sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), // NOLINT(runtime/printf)
page_count + 1 + side,
prtpos.bytes_printed * 100 / bytes_to_print);
if (!mch_print_begin_page((char_u *)IObuff)) {
@ -750,8 +750,7 @@ void ex_hardcopy(exarg_T *eap)
prtpos.file_line);
}
for (page_line = 0; page_line < settings.lines_per_page;
++page_line) {
for (page_line = 0; page_line < settings.lines_per_page; page_line++) {
prtpos.column = hardcopy_line(&settings,
page_line, &prtpos);
if (prtpos.column == 0) {
@ -2440,8 +2439,7 @@ bool mch_print_begin(prt_settings_T *psettings)
prt_dsc_font_resource("DocumentNeededResources", &prt_ps_courier_font);
}
if (prt_out_mbyte) {
prt_dsc_font_resource((prt_use_courier ? NULL
: "DocumentNeededResources"), &prt_ps_mb_font);
prt_dsc_font_resource((prt_use_courier ? NULL : "DocumentNeededResources"), &prt_ps_mb_font);
if (!prt_custom_cmap) {
prt_dsc_resources(NULL, "cmap", prt_cmap);
}
@ -2990,7 +2988,7 @@ int mch_print_text_out(char_u *const textp, size_t len)
ga_append(&prt_ps_buffer, '\\'); break;
default:
sprintf((char *)ch_buff, "%03o", (unsigned int)ch);
sprintf((char *)ch_buff, "%03o", (unsigned int)ch); // NOLINT(runtime/printf)
ga_append(&prt_ps_buffer, (char)ch_buff[0]);
ga_append(&prt_ps_buffer, (char)ch_buff[1]);
ga_append(&prt_ps_buffer, (char)ch_buff[2]);

View File

@ -1550,7 +1550,7 @@ void show_utf8(void)
}
clen = utf_ptr2len((char *)line + i);
}
sprintf((char *)IObuff + rlen, "%02x ",
sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf)
(line[i] == NL) ? NUL : line[i]); // NUL is stored as NL
clen--;
rlen += (int)strlen(IObuff + rlen);

View File

@ -2135,13 +2135,13 @@ static int ml_append_int(buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, b
buf->b_ml.ml_stack_top = stack_idx + 1; // truncate stack
if (lineadd) {
--(buf->b_ml.ml_stack_top);
(buf->b_ml.ml_stack_top)--;
// fix line count for rest of blocks in the stack
ml_lineadd(buf, lineadd);
// fix stack itself
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
lineadd;
++(buf->b_ml.ml_stack_top);
(buf->b_ml.ml_stack_top)++;
}
// We are finished, break the loop here.
@ -2428,7 +2428,7 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
buf->b_ml.ml_locked_lineadd;
}
++(buf->b_ml.ml_stack_top);
(buf->b_ml.ml_stack_top)++;
break;
}
@ -2698,11 +2698,11 @@ static bhdr_T *ml_find_line(buf_T *buf, linenr_T lnum, int action)
&& buf->b_ml.ml_locked_high >= lnum) {
// remember to update pointer blocks and stack later
if (action == ML_INSERT) {
++(buf->b_ml.ml_locked_lineadd);
++(buf->b_ml.ml_locked_high);
(buf->b_ml.ml_locked_lineadd)++;
(buf->b_ml.ml_locked_high)++;
} else if (action == ML_DELETE) {
--(buf->b_ml.ml_locked_lineadd);
--(buf->b_ml.ml_locked_high);
(buf->b_ml.ml_locked_lineadd)--;
(buf->b_ml.ml_locked_high)--;
}
return buf->b_ml.ml_locked;
}

View File

@ -96,8 +96,7 @@ bool msgpack_rpc_to_object(const msgpack_object *const obj, Object *const arg)
.size = obj->via.attr.size, \
.data = (obj->via.attr.ptr == NULL || obj->via.attr.size == 0 \
? xmemdupz("", 0) \
: xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), \
})); \
: xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), })); \
break; \
}
STR_CASE(MSGPACK_OBJECT_STR, str, cur.mobj, *cur.aobj, STRING_OBJ)

View File

@ -2266,7 +2266,7 @@ bool find_decl(char_u *ptr, size_t len, bool locally, bool thisblock, int flags_
// Put "\V" before the pattern to avoid that the special meaning of "."
// and "~" causes trouble.
assert(len <= INT_MAX);
sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s",
sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", // NOLINT(runtime/printf)
(int)len, ptr);
old_pos = curwin->w_cursor;
save_p_ws = p_ws;

View File

@ -1,9 +1,7 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/*
* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
*/
// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
// By default: do not create debugging logs or files related to regular
// expressions, even when compiling with -DDEBUG.
@ -41,21 +39,17 @@
# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
#endif
/*
* Magic characters have a special meaning, they don't match literally.
* Magic characters are negative. This separates them from literal characters
* (possibly multi-byte). Only ASCII characters can be Magic.
*/
// Magic characters have a special meaning, they don't match literally.
// Magic characters are negative. This separates them from literal characters
// (possibly multi-byte). Only ASCII characters can be Magic.
#define Magic(x) ((int)(x) - 256)
#define un_Magic(x) ((x) + 256)
#define is_Magic(x) ((x) < 0)
/*
* We should define ftpr as a pointer to a function returning a pointer to
* a function returning a pointer to a function ...
* This is impossible, so we declare a pointer to a function returning a
* pointer to a function returning void. This should work for all compilers.
*/
// We should define ftpr as a pointer to a function returning a pointer to
// a function returning a pointer to a function ...
// This is impossible, so we declare a pointer to a function returning a
// pointer to a function returning void. This should work for all compilers.
typedef void (*(*fptr_T)(int *, int))(void);
static int no_Magic(int x)
@ -143,28 +137,24 @@ static int re_multi_type(int c)
static char *reg_prev_sub = NULL;
/*
* REGEXP_INRANGE contains all characters which are always special in a []
* range after '\'.
* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
* These are:
* \n - New line (NL).
* \r - Carriage Return (CR).
* \t - Tab (TAB).
* \e - Escape (ESC).
* \b - Backspace (Ctrl_H).
* \d - Character code in decimal, eg \d123
* \o - Character code in octal, eg \o80
* \x - Character code in hex, eg \x4a
* \u - Multibyte character code, eg \u20ac
* \U - Long multibyte character code, eg \U12345678
*/
// REGEXP_INRANGE contains all characters which are always special in a []
// range after '\'.
// REGEXP_ABBR contains all characters which act as abbreviations after '\'.
// These are:
// \n - New line (NL).
// \r - Carriage Return (CR).
// \t - Tab (TAB).
// \e - Escape (ESC).
// \b - Backspace (Ctrl_H).
// \d - Character code in decimal, eg \d123
// \o - Character code in octal, eg \o80
// \x - Character code in hex, eg \x4a
// \u - Multibyte character code, eg \u20ac
// \U - Long multibyte character code, eg \U12345678
static char REGEXP_INRANGE[] = "]^-n\\";
static char REGEXP_ABBR[] = "nrtebdoxuU";
/*
* Translate '\x' to its control character, except "\n", which is Magic.
*/
// Translate '\x' to its control character, except "\n", which is Magic.
static int backslash_trans(int c)
{
switch (c) {
@ -239,10 +229,8 @@ static int get_char_class(char **pp)
return CLASS_NONE;
}
/*
* Specific version of character class functions.
* Using a table to keep this fast.
*/
// Specific version of character class functions.
// Using a table to keep this fast.
static int16_t class_tab[256];
#define RI_DIGIT 0x01
@ -325,9 +313,7 @@ static int reg_string; // matching with a string instead of a buffer
// line
static int reg_strict; // "[abc" is illegal
/*
* META contains all characters that may be magic, except '^' and '$'.
*/
// META contains all characters that may be magic, except '^' and '$'.
// uncrustify:off
@ -391,11 +377,9 @@ int re_multiline(const regprog_T *prog)
return prog->regflags & RF_HASNL;
}
/*
* Check for an equivalence class name "[=a=]". "pp" points to the '['.
* Returns a character representing the class. Zero means that no item was
* recognized. Otherwise "pp" is advanced to after the item.
*/
// Check for an equivalence class name "[=a=]". "pp" points to the '['.
// Returns a character representing the class. Zero means that no item was
// recognized. Otherwise "pp" is advanced to after the item.
static int get_equi_class(char **pp)
{
int c;
@ -413,12 +397,10 @@ static int get_equi_class(char **pp)
return 0;
}
/*
* Check for a collating element "[.a.]". "pp" points to the '['.
* Returns a character. Zero means that no item was recognized. Otherwise
* "pp" is advanced to after the item.
* Currently only single characters are recognized!
*/
// Check for a collating element "[.a.]". "pp" points to the '['.
// Returns a character. Zero means that no item was recognized. Otherwise
// "pp" is advanced to after the item.
// Currently only single characters are recognized!
static int get_coll_element(char **pp)
{
int c;
@ -562,9 +544,7 @@ static int prevchr_len; // byte length of previous char
static int at_start; // True when on the first character
static int prev_at_start; // True when on the second character
/*
* Start parsing at "str".
*/
// Start parsing at "str".
static void initchr(char_u *str)
{
regparse = (char *)str;
@ -574,10 +554,8 @@ static void initchr(char_u *str)
prev_at_start = false;
}
/*
* Save the current parse state, so that it can be restored and parsing
* starts in the same state again.
*/
// Save the current parse state, so that it can be restored and parsing
// starts in the same state again.
static void save_parse_state(parse_state_T *ps)
{
ps->regparse = (char_u *)regparse;
@ -591,9 +569,7 @@ static void save_parse_state(parse_state_T *ps)
ps->regnpar = regnpar;
}
/*
* Restore a previously saved parse state.
*/
// Restore a previously saved parse state.
static void restore_parse_state(parse_state_T *ps)
{
regparse = (char *)ps->regparse;
@ -607,9 +583,7 @@ static void restore_parse_state(parse_state_T *ps)
regnpar = ps->regnpar;
}
/*
* Get the next character without advancing.
*/
// Get the next character without advancing.
static int peekchr(void)
{
static int after_slash = false;
@ -736,9 +710,7 @@ static int peekchr(void)
after_slash--;
curchr = toggle_Magic(curchr);
} else if (vim_strchr(REGEXP_ABBR, c)) {
/*
* Handle abbreviations, like "\t" for TAB -- webb
*/
// Handle abbreviations, like "\t" for TAB -- webb
curchr = backslash_trans(c);
} else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) {
curchr = toggle_Magic(c);
@ -757,9 +729,7 @@ static int peekchr(void)
return curchr;
}
/*
* Eat one lexed character. Do this in a way that we can undo it.
*/
// Eat one lexed character. Do this in a way that we can undo it.
static void skipchr(void)
{
// peekchr() eats a backslash, do the same here
@ -781,10 +751,8 @@ static void skipchr(void)
nextchr = -1;
}
/*
* Skip a character while keeping the value of prev_at_start for at_start.
* prevchr and prevprevchr are also kept.
*/
// Skip a character while keeping the value of prev_at_start for at_start.
// prevchr and prevprevchr are also kept.
static void skipchr_keepstart(void)
{
int as = prev_at_start;
@ -797,10 +765,8 @@ static void skipchr_keepstart(void)
prevprevchr = prpr;
}
/*
* Get the next character from the pattern. We know about magic and such, so
* therefore we need a lexical analyzer.
*/
// Get the next character from the pattern. We know about magic and such, so
// therefore we need a lexical analyzer.
static int getchr(void)
{
int chr = peekchr();
@ -809,9 +775,7 @@ static int getchr(void)
return chr;
}
/*
* put character back. Works only once!
*/
// put character back. Works only once!
static void ungetchr(void)
{
nextchr = curchr;
@ -825,15 +789,13 @@ static void ungetchr(void)
regparse -= prevchr_len;
}
/*
* Get and return the value of the hex string at the current position.
* Return -1 if there is no valid hex number.
* The position is updated:
* blahblah\%x20asdf
* before-^ ^-after
* The parameter controls the maximum number of input characters. This will be
* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
*/
// Get and return the value of the hex string at the current position.
// Return -1 if there is no valid hex number.
// The position is updated:
// blahblah\%x20asdf
// before-^ ^-after
// The parameter controls the maximum number of input characters. This will be
// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
static int64_t gethexchrs(int maxinputlen)
{
int64_t nr = 0;
@ -856,10 +818,8 @@ static int64_t gethexchrs(int maxinputlen)
return nr;
}
/*
* Get and return the value of the decimal string immediately after the
* current position. Return -1 for invalid. Consumes all digits.
*/
// Get and return the value of the decimal string immediately after the
// current position. Return -1 for invalid. Consumes all digits.
static int64_t getdecchrs(void)
{
int64_t nr = 0;
@ -883,14 +843,12 @@ static int64_t getdecchrs(void)
return nr;
}
/*
* get and return the value of the octal string immediately after the current
* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
* treat 8 or 9 as recognised characters. Position is updated:
* blahblah\%o210asdf
* before-^ ^-after
*/
// get and return the value of the octal string immediately after the current
// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
// treat 8 or 9 as recognised characters. Position is updated:
// blahblah\%o210asdf
// before-^ ^-after
static int64_t getoctchrs(void)
{
int64_t nr = 0;
@ -913,12 +871,10 @@ static int64_t getoctchrs(void)
return nr;
}
/*
* read_limits - Read two integers to be taken as a minimum and maximum.
* If the first character is '-', then the range is reversed.
* Should end with 'end'. If minval is missing, zero is default, if maxval is
* missing, a very big number is the default.
*/
// read_limits - Read two integers to be taken as a minimum and maximum.
// If the first character is '-', then the range is reversed.
// Should end with 'end'. If minval is missing, zero is default, if maxval is
// missing, a very big number is the default.
static int read_limits(long *minval, long *maxval)
{
int reverse = false;
@ -950,10 +906,8 @@ static int read_limits(long *minval, long *maxval)
EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL);
}
/*
* Reverse the range if there was a '-', or make sure it is in the right
* order otherwise.
*/
// Reverse the range if there was a '-', or make sure it is in the right
// order otherwise.
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) {
tmp = *minval;
*minval = *maxval;
@ -963,13 +917,9 @@ static int read_limits(long *minval, long *maxval)
return OK;
}
/*
* vim_regexec and friends
*/
// vim_regexec and friends
/*
* Global work variables for vim_regexec().
*/
// Global work variables for vim_regexec().
// Sometimes need to save a copy of a line. Since alloc()/free() is very
// slow, we keep one allocated piece of memory and only re-allocate it when
@ -1052,9 +1002,7 @@ static bool reg_iswordc(int c)
return vim_iswordc_buf(c, rex.reg_buf);
}
/*
* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
*/
// Get pointer to the line "lnum", which is relative to "reg_firstlnum".
static char_u *reg_getline(linenr_T lnum)
{
// when looking behind for a match/no-match lnum is negative. But we
@ -1077,9 +1025,7 @@ static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
// true if using multi-line regexp.
#define REG_MULTI (rex.reg_match == NULL)
/*
* Create a new extmatch and mark it as referenced once.
*/
// Create a new extmatch and mark it as referenced once.
static reg_extmatch_T *make_extmatch(void)
FUNC_ATTR_NONNULL_RET
{
@ -1088,9 +1034,7 @@ static reg_extmatch_T *make_extmatch(void)
return em;
}
/*
* Add a reference to an extmatch.
*/
// Add a reference to an extmatch.
reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
{
if (em != NULL) {
@ -1099,10 +1043,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
return em;
}
/*
* Remove a reference to an extmatch. If there are no references left, free
* the info.
*/
// Remove a reference to an extmatch. If there are no references left, free
// the info.
void unref_extmatch(reg_extmatch_T *em)
{
int i;
@ -1201,10 +1143,8 @@ static bool reg_match_visual(void)
return true;
}
/*
* Check the regexp program for its magic number.
* Return true if it's wrong.
*/
// Check the regexp program for its magic number.
// Return true if it's wrong.
static int prog_magic_wrong(void)
{
regprog_T *prog;
@ -1222,11 +1162,9 @@ static int prog_magic_wrong(void)
return false;
}
/*
* Cleanup the subexpressions, if this wasn't done yet.
* This construction is used to clear the subexpressions only when they are
* used (to increase speed).
*/
// Cleanup the subexpressions, if this wasn't done yet.
// This construction is used to clear the subexpressions only when they are
// used (to increase speed).
static void cleanup_subexpr(void)
{
if (rex.need_clear_subexpr) {
@ -1265,12 +1203,10 @@ static void reg_nextline(void)
fast_breakcheck();
}
/*
* Check whether a backreference matches.
* Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
* If "bytelen" is not NULL, it is set to the byte length of the match in the
* last line.
*/
// Check whether a backreference matches.
// Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
// If "bytelen" is not NULL, it is set to the byte length of the match in the
// last line.
static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum,
colnr_T end_col, int *bytelen)
{
@ -1449,9 +1385,9 @@ static int cstrncmp(char *s1, char *s2, int *n)
c1 = mb_ptr2char_adv((const char_u **)&str1);
c2 = mb_ptr2char_adv((const char_u **)&str2);
/* decompose the character if necessary, into 'base' characters
* because I don't care about Arabic, I will hard-code the Hebrew
* which I *do* care about! So sue me... */
// decompose the character if necessary, into 'base' characters
// because I don't care about Arabic, I will hard-code the Hebrew
// which I *do* care about! So sue me...
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
// decomposition necessary?
mb_decompose(c1, &c11, &junk, &junk);
@ -1566,7 +1502,7 @@ char *regtilde(char *source, int magic, bool preview)
int len;
int prevlen;
for (p = newsub; *p; ++p) {
for (p = newsub; *p; p++) {
if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) {
if (reg_prev_sub != NULL) {
// length = len(newsub) - 1 + len(prev_sub) + 1
@ -1871,12 +1807,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
*s = CAR;
} else if (*s == '\\' && s[1] != NUL) {
s++;
/* Change NL to CR here too, so that this works:
* :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
* abc\
* def
* Not when called from vim_regexec_nl().
*/
// Change NL to CR here too, so that this works:
// :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
// abc{backslash}
// def
// Not when called from vim_regexec_nl().
if (*s == NL && !rsm.sm_line_lbr) {
*s = CAR;
}
@ -2172,10 +2107,8 @@ char *reg_submatch(int no)
if (rsm.sm_match == NULL) {
ssize_t len;
/*
* First round: compute the length and allocate memory.
* Second round: copy the text.
*/
// First round: compute the length and allocate memory.
// Second round: copy the text.
for (round = 1; round <= 2; round++) {
lnum = rsm.sm_mmatch->startpos[no].lnum;
if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) {
@ -2216,7 +2149,7 @@ char *reg_submatch(int no)
len++;
}
if (round == 2) {
STRNCPY(retval + len, reg_getline_submatch(lnum),
STRNCPY(retval + len, reg_getline_submatch(lnum), // NOLINT(runtime/printf)
rsm.sm_mmatch->endpos[no].col);
}
len += rsm.sm_mmatch->endpos[no].col;
@ -2327,12 +2260,10 @@ static char_u regname[][30] = {
};
#endif
/*
* Compile a regular expression into internal code.
* Returns the program in allocated memory.
* Use vim_regfree() to free the memory.
* Returns NULL for an error.
*/
// Compile a regular expression into internal code.
// Returns the program in allocated memory.
// Use vim_regfree() to free the memory.
// Returns NULL for an error.
regprog_T *vim_regcomp(char *expr_arg, int re_flags)
{
regprog_T *prog = NULL;
@ -2413,9 +2344,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
return prog;
}
/*
* Free a compiled regexp program, returned by vim_regcomp().
*/
// Free a compiled regexp program, returned by vim_regcomp().
void vim_regfree(regprog_T *prog)
{
if (prog != NULL) {

View File

@ -1,137 +1,130 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/*
*
* Backtracking regular expression implementation.
*
* This file is included in "regexp.c".
*
* NOTICE:
*
* This is NOT the original regular expression code as written by Henry
* Spencer. This code has been modified specifically for use with the VIM
* editor, and should not be used separately from Vim. If you want a good
* regular expression library, get the original code. The copyright notice
* that follows is from the original.
*
* END NOTICE
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
* Beware that some of this code is subtly aware of the way operator
* precedence is structured in regular expressions. Serious changes in
* regular-expression syntax might require a total rethink.
*
* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
* Webb, Ciaran McCreesh and Bram Moolenaar.
* Named character class support added by Walter Briscoe (1998 Jul 01)
*/
// Backtracking regular expression implementation.
//
// This file is included in "regexp.c".
//
// NOTICE:
//
// This is NOT the original regular expression code as written by Henry
// Spencer. This code has been modified specifically for use with the VIM
// editor, and should not be used separately from Vim. If you want a good
// regular expression library, get the original code. The copyright notice
// that follows is from the original.
//
// END NOTICE
//
// Copyright (c) 1986 by University of Toronto.
// Written by Henry Spencer. Not derived from licensed software.
//
// Permission is granted to anyone to use this software for any
// purpose on any computer system, and to redistribute it freely,
// subject to the following restrictions:
//
// 1. The author is not responsible for the consequences of use of
// this software, no matter how awful, even if they arise
// from defects in it.
//
// 2. The origin of this software must not be misrepresented, either
// by explicit claim or by omission.
//
// 3. Altered versions must be plainly marked as such, and must not
// be misrepresented as being the original software.
//
// Beware that some of this code is subtly aware of the way operator
// precedence is structured in regular expressions. Serious changes in
// regular-expression syntax might require a total rethink.
//
// Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
// Webb, Ciaran McCreesh and Bram Moolenaar.
// Named character class support added by Walter Briscoe (1998 Jul 01)
/*
* The "internal use only" fields in regexp_defs.h are present to pass info from
* compile to execute that permits the execute phase to run lots faster on
* simple cases. They are:
*
* regstart char that must begin a match; NUL if none obvious; Can be a
* multi-byte character.
* reganch is the match anchored (at beginning-of-line only)?
* regmust string (pointer into program) that match must include, or NULL
* regmlen length of regmust string
* regflags RF_ values or'ed together
*
* Regstart and reganch permit very fast decisions on suitable starting points
* for a match, cutting down the work a lot. Regmust permits fast rejection
* of lines that cannot possibly match. The regmust tests are costly enough
* that vim_regcomp() supplies a regmust only if the r.e. contains something
* potentially expensive (at present, the only such thing detected is * or +
* at the start of the r.e., which can involve a lot of backup). Regmlen is
* supplied because the test in vim_regexec() needs it and vim_regcomp() is
* computing it anyway.
*/
// The "internal use only" fields in regexp_defs.h are present to pass info from
// compile to execute that permits the execute phase to run lots faster on
// simple cases. They are:
//
// regstart char that must begin a match; NUL if none obvious; Can be a
// multi-byte character.
// reganch is the match anchored (at beginning-of-line only)?
// regmust string (pointer into program) that match must include, or NULL
// regmlen length of regmust string
// regflags RF_ values or'ed together
//
// Regstart and reganch permit very fast decisions on suitable starting points
// for a match, cutting down the work a lot. Regmust permits fast rejection
// of lines that cannot possibly match. The regmust tests are costly enough
// that vim_regcomp() supplies a regmust only if the r.e. contains something
// potentially expensive (at present, the only such thing detected is * or +
// at the start of the r.e., which can involve a lot of backup). Regmlen is
// supplied because the test in vim_regexec() needs it and vim_regcomp() is
// computing it anyway.
/*
* Structure for regexp "program". This is essentially a linear encoding
* of a nondeterministic finite-state machine (aka syntax charts or
* "railroad normal form" in parsing technology). Each node is an opcode
* plus a "next" pointer, possibly plus an operand. "Next" pointers of
* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
* pointer with a BRANCH on both ends of it is connecting two alternatives.
* (Here we have one of the subtle syntax dependencies: an individual BRANCH
* (as opposed to a collection of them) is never concatenated with anything
* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
* node points to the node after the stuff to be repeated.
* The operand of some types of node is a literal string; for others, it is a
* node leading into a sub-FSM. In particular, the operand of a BRANCH node
* is the first node of the branch.
* (NB this is *not* a tree structure: the tail of the branch connects to the
* thing following the set of BRANCHes.)
*
* pattern is coded like:
*
* +-----------------+
* | V
* <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
* | ^ | ^
* +------+ +----------+
*
*
* +------------------+
* V |
* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
* | | ^ ^
* | +---------------+ |
* +---------------------------------------------+
*
*
* +----------------------+
* V |
* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
* | | ^ ^
* | +-----------+ |
* +--------------------------------------------------+
*
*
* +-------------------------+
* V |
* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
* | | ^
* | +----------------+
* +-----------------------------------------------+
*
*
* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
* | | ^ ^
* | +----------------+ |
* +--------------------------------+
*
* +---------+
* | V
* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
* | | | | ^ ^
* | | | +-----+ |
* | | +----------------+ |
* | +---------------------------+ |
* +------------------------------------------------------+
*
* They all start with a BRANCH for "\|" alternatives, even when there is only
* one alternative.
*/
// Structure for regexp "program". This is essentially a linear encoding
// of a nondeterministic finite-state machine (aka syntax charts or
// "railroad normal form" in parsing technology). Each node is an opcode
// plus a "next" pointer, possibly plus an operand. "Next" pointers of
// all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
// pointer with a BRANCH on both ends of it is connecting two alternatives.
// (Here we have one of the subtle syntax dependencies: an individual BRANCH
// (as opposed to a collection of them) is never concatenated with anything
// because of operator precedence). The "next" pointer of a BRACES_COMPLEX
// node points to the node after the stuff to be repeated.
// The operand of some types of node is a literal string; for others, it is a
// node leading into a sub-FSM. In particular, the operand of a BRANCH node
// is the first node of the branch.
// (NB this is *not* a tree structure: the tail of the branch connects to the
// thing following the set of BRANCHes.)
//
// pattern is coded like:
//
// +-----------------+
// | V
// <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
// | ^ | ^
// +------+ +----------+
//
//
// +------------------+
// V |
// <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
// | | ^ ^
// | +---------------+ |
// +---------------------------------------------+
//
//
// +----------------------+
// V |
// <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
// | | ^ ^
// | +-----------+ |
// +--------------------------------------------------+
//
//
// +-------------------------+
// V |
// <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
// | | ^
// | +----------------+
// +-----------------------------------------------+
//
//
// <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
// | | ^ ^
// | +----------------+ |
// +--------------------------------+
//
// +---------+
// | V
// \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
// | | | | ^ ^
// | | | +-----+ |
// | | +----------------+ |
// | +---------------------------+ |
// +------------------------------------------------------+
//
// They all start with a BRANCH for "\|" alternatives, even when there is only
// one alternative.
#include <assert.h>
#include <inttypes.h>
@ -141,9 +134,7 @@
#include "nvim/garray.h"
#include "nvim/regexp.h"
/*
* The opcodes are:
*/
// The opcodes are:
// definition number opnd? meaning
#define END 0 // End of program or NOMATCH operand.
@ -240,9 +231,7 @@
#define RE_VISUAL 208 // Match Visual area
#define RE_COMPOSING 209 // any composing characters
/*
* Flags to be passed up and down.
*/
// Flags to be passed up and down.
#define HASWIDTH 0x1 // Known never to match null string.
#define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand.
#define SPSTART 0x4 // Starts with * or +.
@ -273,10 +262,8 @@ static int classcodes[] = {
UPPER, NUPPER
};
/*
* When regcode is set to this value, code is not emitted and size is computed
* instead.
*/
// When regcode is set to this value, code is not emitted and size is computed
// instead.
#define JUST_CALC_SIZE ((char_u *)-1)
// Values for rs_state in regitem_T.
@ -297,11 +284,9 @@ typedef enum regstate_E {
RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match
} regstate_T;
/*
* Structure used to save the current input state, when it needs to be
* restored after trying a match. Used by reg_save() and reg_restore().
* Also stores the length of "backpos".
*/
// Structure used to save the current input state, when it needs to be
// restored after trying a match. Used by reg_save() and reg_restore().
// Also stores the length of "backpos".
typedef struct {
union {
char_u *ptr; // rex.input pointer, for single-line regexp
@ -327,12 +312,10 @@ typedef struct regbehind_S {
save_se_T save_end[NSUBEXP];
} regbehind_T;
/*
* When there are alternatives a regstate_T is put on the regstack to remember
* what we are doing.
* Before it may be another type of item, depending on rs_state, to remember
* more things.
*/
// When there are alternatives a regstate_T is put on the regstack to remember
// what we are doing.
// Before it may be another type of item, depending on rs_state, to remember
// more things.
typedef struct regitem_S {
regstate_T rs_state; // what we are doing, one of RS_ above
int16_t rs_no; // submatch nr or BEHIND/NOBEHIND
@ -359,69 +342,63 @@ typedef struct backpos_S {
regsave_T bp_pos; // last input position
} backpos_T;
/*
* "regstack" and "backpos" are used by regmatch(). They are kept over calls
* to avoid invoking malloc() and free() often.
* "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
* or regbehind_T.
* "backpos_T" is a table with backpos_T for BACK
*/
// "regstack" and "backpos" are used by regmatch(). They are kept over calls
// to avoid invoking malloc() and free() often.
// "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
// or regbehind_T.
// "backpos_T" is a table with backpos_T for BACK
static garray_T regstack = GA_EMPTY_INIT_VALUE;
static garray_T backpos = GA_EMPTY_INIT_VALUE;
static regsave_T behind_pos;
/*
* Both for regstack and backpos tables we use the following strategy of
* allocation (to reduce malloc/free calls):
* - Initial size is fairly small.
* - When needed, the tables are grown bigger (8 times at first, double after
* that).
* - After executing the match we free the memory only if the array has grown.
* Thus the memory is kept allocated when it's at the initial size.
* This makes it fast while not keeping a lot of memory allocated.
* A three times speed increase was observed when using many simple patterns.
*/
// Both for regstack and backpos tables we use the following strategy of
// allocation (to reduce malloc/free calls):
// - Initial size is fairly small.
// - When needed, the tables are grown bigger (8 times at first, double after
// that).
// - After executing the match we free the memory only if the array has grown.
// Thus the memory is kept allocated when it's at the initial size.
// This makes it fast while not keeping a lot of memory allocated.
// A three times speed increase was observed when using many simple patterns.
#define REGSTACK_INITIAL 2048
#define BACKPOS_INITIAL 64
/*
* Opcode notes:
*
* BRANCH The set of branches constituting a single choice are hooked
* together with their "next" pointers, since precedence prevents
* anything being concatenated to any individual branch. The
* "next" pointer of the last BRANCH in a choice points to the
* thing following the whole choice. This is also where the
* final "next" pointer of each individual branch points; each
* branch starts with the operand node of a BRANCH node.
*
* BACK Normal "next" pointers all implicitly point forward; BACK
* exists to make loop structures possible.
*
* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
* BRANCH structures using BACK. Simple cases (one character
* per match) are implemented with STAR and PLUS for speed
* and to minimize recursive plunges.
*
* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
* node, and defines the min and max limits to be used for that
* node.
*
* MOPEN,MCLOSE ...are numbered at compile time.
* ZOPEN,ZCLOSE ...ditto
*/
/*
* A node is one char of opcode followed by two chars of "next" pointer.
* "Next" pointers are stored as two 8-bit bytes, high order first. The
* value is a positive offset from the opcode of the node containing it.
* An operand, if any, simply follows the node. (Note that much of the
* code generation knows about this implicit relationship.)
*
* Using two bytes for the "next" pointer is vast overkill for most things,
* but allows patterns to get big without disasters.
*/
// Opcode notes:
//
// BRANCH The set of branches constituting a single choice are hooked
// together with their "next" pointers, since precedence prevents
// anything being concatenated to any individual branch. The
// "next" pointer of the last BRANCH in a choice points to the
// thing following the whole choice. This is also where the
// final "next" pointer of each individual branch points; each
// branch starts with the operand node of a BRANCH node.
//
// BACK Normal "next" pointers all implicitly point forward; BACK
// exists to make loop structures possible.
//
// STAR,PLUS '=', and complex '*' and '+', are implemented as circular
// BRANCH structures using BACK. Simple cases (one character
// per match) are implemented with STAR and PLUS for speed
// and to minimize recursive plunges.
//
// BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
// node, and defines the min and max limits to be used for that
// node.
//
// MOPEN,MCLOSE ...are numbered at compile time.
// ZOPEN,ZCLOSE ...ditto
///
//
//
// A node is one char of opcode followed by two chars of "next" pointer.
// "Next" pointers are stored as two 8-bit bytes, high order first. The
// value is a positive offset from the opcode of the node containing it.
// An operand, if any, simply follows the node. (Note that much of the
// code generation knows about this implicit relationship.)
//
// Using two bytes for the "next" pointer is vast overkill for most things,
// but allows patterns to get big without disasters.
#define OP(p) ((int)(*(p)))
#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
#define OPERAND(p) ((p) + 3)
@ -449,9 +426,7 @@ static int regnarrate = 0;
# include "regexp_bt.c.generated.h"
#endif
/*
* Setup to parse the regexp. Used once to get the length and once to do it.
*/
// Setup to parse the regexp. Used once to get the length and once to do it.
static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp()
{
initchr(expr);
@ -484,9 +459,7 @@ static bool use_multibytecode(int c)
|| utf_iscomposing(c));
}
/*
* Emit (if appropriate) a byte of code
*/
// Emit (if appropriate) a byte of code
static void regc(int b)
{
if (regcode == JUST_CALC_SIZE) {
@ -496,9 +469,7 @@ static void regc(int b)
}
}
/*
* Emit (if appropriate) a multi-byte character of code
*/
// Emit (if appropriate) a multi-byte character of code
static void regmbc(int c)
{
if (regcode == JUST_CALC_SIZE) {
@ -508,11 +479,9 @@ static void regmbc(int c)
}
}
/*
* Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8.
* NOTE: When changing this function, also change nfa_emit_equi_class()
*/
// Produce the bytes for equivalence class "c".
// Currently only handles latin1, latin9 and utf-8.
// NOTE: When changing this function, also change nfa_emit_equi_class()
static void reg_equi_class(int c)
{
{
@ -1481,10 +1450,8 @@ static void reg_equi_class(int c)
regmbc(c);
}
/*
* Emit a node.
* Return pointer to generated code.
*/
// Emit a node.
// Return pointer to generated code.
static char_u *regnode(int op)
{
char_u *ret;
@ -1500,9 +1467,7 @@ static char_u *regnode(int op)
return ret;
}
/*
* Write a four bytes number at "p" and return pointer to the next char.
*/
// Write a four bytes number at "p" and return pointer to the next char.
static char_u *re_put_uint32(char_u *p, uint32_t val)
{
*p++ = (char_u)((val >> 24) & 0377);
@ -1512,11 +1477,9 @@ static char_u *re_put_uint32(char_u *p, uint32_t val)
return p;
}
/*
* regnext - dig the "next" pointer out of a node
* Returns NULL when calculating size, when there is no next item and when
* there is an error.
*/
// regnext - dig the "next" pointer out of a node
// Returns NULL when calculating size, when there is no next item and when
// there is an error.
static char_u *regnext(char_u *p)
FUNC_ATTR_NONNULL_ALL
{
@ -1573,9 +1536,7 @@ static void regtail(char_u *p, char_u *val)
}
}
/*
* Like regtail, on item after a BRANCH; nop if none.
*/
// Like regtail, on item after a BRANCH; nop if none.
static void regoptail(char_u *p, char_u *val)
{
// When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless"
@ -1587,11 +1548,9 @@ static void regoptail(char_u *p, char_u *val)
regtail(OPERAND(p), val);
}
/*
* Insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
// Insert an operator in front of already-emitted operand
//
// Means relocating the operand.
static void reginsert(int op, char_u *opnd)
{
char_u *src;
@ -1615,10 +1574,8 @@ static void reginsert(int op, char_u *opnd)
*place = NUL;
}
/*
* Insert an operator in front of already-emitted operand.
* Add a number to the operator.
*/
// Insert an operator in front of already-emitted operand.
// Add a number to the operator.
static void reginsert_nr(int op, long val, char_u *opnd)
{
char_u *src;
@ -1644,12 +1601,10 @@ static void reginsert_nr(int op, long val, char_u *opnd)
re_put_uint32(place, (uint32_t)val);
}
/*
* Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
*/
// Insert an operator in front of already-emitted operand.
// The operator has the given limit values as operands. Also set next pointer.
//
// Means relocating the operand.
static void reginsert_limits(int op, long minval, long maxval, char_u *opnd)
{
char_u *src;
@ -1704,13 +1659,11 @@ static int seen_endbrace(int refnum)
return true;
}
/*
* Parse the lowest level.
*
* Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and
* faster to run. Don't do this when one_exactly is set.
*/
// Parse the lowest level.
//
// Optimization: gobbles an entire sequence of ordinary characters so that
// it can turn them into a single node, which is smaller to store and
// faster to run. Don't do this when one_exactly is set.
static char_u *regatom(int *flagp)
{
char_u *ret;
@ -2289,8 +2242,7 @@ collection:
if (c_class != 0) {
// produce equivalence class
reg_equi_class(c_class);
} else if ((c_class =
get_coll_element(&regparse)) != 0) {
} else if ((c_class = get_coll_element(&regparse)) != 0) {
// produce a collating element
regmbc(c_class);
} else {
@ -2466,7 +2418,7 @@ do_multibyte:
for (len = 0; c != NUL && (len == 0
|| (re_multi_type(peekchr()) == NOT_MULTI
&& !one_exactly
&& !is_Magic(c))); ++len) {
&& !is_Magic(c))); len++) {
c = no_Magic(c);
{
regmbc(c);
@ -2500,15 +2452,13 @@ do_multibyte:
return ret;
}
/*
* Parse something followed by possible [*+=].
*
* Note that the branching code sequences used for = and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as
* both the endmarker for their branch list and the body of the last branch.
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
// Parse something followed by possible [*+=].
//
// Note that the branching code sequences used for = and the general cases
// of * and + are somewhat optimized: they use the same NOTHING node as
// both the endmarker for their branch list and the body of the last branch.
// It might seem that this node could be dispensed with entirely, but the
// endmarker role is not redundant.
static char_u *regpiece(int *flagp)
{
char_u *ret;
@ -2644,10 +2594,8 @@ static char_u *regpiece(int *flagp)
return ret;
}
/*
* Parse one alternative of an | or & operator.
* Implements the concatenation operator.
*/
// Parse one alternative of an | or & operator.
// Implements the concatenation operator.
static char_u *regconcat(int *flagp)
{
char_u *first = NULL;
@ -2722,10 +2670,8 @@ static char_u *regconcat(int *flagp)
return first;
}
/*
* Parse one alternative of an | operator.
* Implements the & operator.
*/
// Parse one alternative of an | operator.
// Implements the & operator.
static char_u *regbranch(int *flagp)
{
char_u *ret;
@ -2874,27 +2820,25 @@ static char_u *reg(int paren, int *flagp)
return ret;
}
/*
* bt_regcomp() - compile a regular expression into internal code for the
* traditional back track matcher.
* Returns the program in allocated space. Returns NULL for an error.
*
* We can't allocate space until we know how big the compiled form will be,
* but we can't compile it (and thus know how big it is) until we've got a
* place to put the code. So we cheat: we compile it twice, once with code
* generation turned off and size counting turned on, and once "for real".
* This also means that we don't allocate space until we are sure that the
* thing really will compile successfully, and we never have to move the
* code and thus invalidate pointers into it. (Note that it has to be in
* one piece because free() must be able to free it all.)
*
* Whether upper/lower case is to be ignored is decided when executing the
* program, it does not matter here.
*
* Beware that the optimization-preparation code in here knows about some
* of the structure of the compiled regexp.
* "re_flags": RE_MAGIC and/or RE_STRING.
*/
// bt_regcomp() - compile a regular expression into internal code for the
// traditional back track matcher.
// Returns the program in allocated space. Returns NULL for an error.
//
// We can't allocate space until we know how big the compiled form will be,
// but we can't compile it (and thus know how big it is) until we've got a
// place to put the code. So we cheat: we compile it twice, once with code
// generation turned off and size counting turned on, and once "for real".
// This also means that we don't allocate space until we are sure that the
// thing really will compile successfully, and we never have to move the
// code and thus invalidate pointers into it. (Note that it has to be in
// one piece because free() must be able to free it all.)
//
// Whether upper/lower case is to be ignored is decided when executing the
// program, it does not matter here.
//
// Beware that the optimization-preparation code in here knows about some
// of the structure of the compiled regexp.
// "re_flags": RE_MAGIC and/or RE_STRING.
static regprog_T *bt_regcomp(char_u *expr, int re_flags)
{
char_u *scan;
@ -2999,19 +2943,15 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)
return (regprog_T *)r;
}
/*
* Check if during the previous call to vim_regcomp the EOL item "$" has been
* found. This is messy, but it works fine.
*/
// Check if during the previous call to vim_regcomp the EOL item "$" has been
// found. This is messy, but it works fine.
int vim_regcomp_had_eol(void)
{
return had_eol;
}
/*
* Get a number after a backslash that is inside [].
* When nothing is recognized return a backslash.
*/
// Get a number after a backslash that is inside [].
// When nothing is recognized return a backslash.
static int coll_get_char(void)
{
int64_t nr = -1;
@ -3037,9 +2977,7 @@ static int coll_get_char(void)
return (int)nr;
}
/*
* Free a compiled regexp program, returned by bt_regcomp().
*/
// Free a compiled regexp program, returned by bt_regcomp().
static void bt_regfree(regprog_T *prog)
{
xfree(prog);
@ -3047,11 +2985,9 @@ static void bt_regfree(regprog_T *prog)
#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
/*
* The arguments from BRACE_LIMITS are stored here. They are actually local
* to regmatch(), but they are here to reduce the amount of stack space used
* (it can be called recursively many times).
*/
// The arguments from BRACE_LIMITS are stored here. They are actually local
// to regmatch(), but they are here to reduce the amount of stack space used
// (it can be called recursively many times).
static long bl_minval;
static long bl_maxval;
@ -3108,13 +3044,11 @@ static bool reg_save_equal(const regsave_T *save)
else /* NOLINT */ \
*(pp) = (savep)->se_u.ptr; }
/*
* Tentatively set the sub-expression start to the current position (after
* calling regmatch() they will have changed). Need to save the existing
* values for when there is no match.
* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
* depending on REG_MULTI.
*/
// Tentatively set the sub-expression start to the current position (after
// calling regmatch() they will have changed). Need to save the existing
// values for when there is no match.
// Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
// depending on REG_MULTI.
static void save_se_multi(save_se_T *savep, lpos_T *posp)
{
savep->se_u.pos = *posp;
@ -3494,10 +3428,8 @@ do_class:
return (int)count;
}
/*
* Push an item onto the regstack.
* Returns pointer to new item. Returns NULL when out of memory.
*/
// Push an item onto the regstack.
// Returns pointer to new item. Returns NULL when out of memory.
static regitem_T *regstack_push(regstate_T state, char_u *scan)
{
regitem_T *rp;
@ -3516,9 +3448,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan)
return rp;
}
/*
* Pop an item from the regstack.
*/
// Pop an item from the regstack.
static void regstack_pop(char_u **scan)
{
regitem_T *rp;
@ -4643,7 +4573,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
// Pop the state. Restore pointers when there is no match.
if (status == RA_NOMATCH) {
reg_restore(&rp->rs_un.regsave, &backpos);
--brace_count[rp->rs_no]; // decrement match count
brace_count[rp->rs_no]--; // decrement match count
}
regstack_pop(&scan);
break;
@ -4653,7 +4583,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
if (status == RA_NOMATCH) {
// There was no match, but we did find enough matches.
reg_restore(&rp->rs_un.regsave, &backpos);
--brace_count[rp->rs_no];
brace_count[rp->rs_no]--;
// continue with the items after "\{}"
status = RA_CONT;
}
@ -5247,9 +5177,7 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T
return bt_regexec_both(NULL, col, tm, timed_out);
}
/*
* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
*/
// Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
static int re_num_cmp(uint32_t val, char_u *scan)
{
uint32_t n = (uint32_t)OPERAND_MIN(scan);
@ -5265,9 +5193,7 @@ static int re_num_cmp(uint32_t val, char_u *scan)
#ifdef BT_REGEXP_DUMP
/*
* regdump - dump a regexp onto stdout in vaguely comprehensible form
*/
// regdump - dump a regexp onto stdout in vaguely comprehensible form
static void regdump(char_u *pattern, bt_regprog_T *r)
{
char_u *s;
@ -5353,9 +5279,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r)
#ifdef REGEXP_DEBUG
/*
* regprop - printable representation of opcode
*/
// regprop - printable representation of opcode
static char_u *regprop(char_u *op)
{
char *p;

View File

@ -1,13 +1,11 @@
/*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*
* This is NOT the original regular expression code as written by Henry
* Spencer. This code has been modified specifically for use with Vim, and
* should not be used apart from compiling Vim. If you want a good regular
* expression library, get the original code.
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*/
// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
//
// This is NOT the original regular expression code as written by Henry
// Spencer. This code has been modified specifically for use with Vim, and
// should not be used apart from compiling Vim. If you want a good regular
// expression library, get the original code.
//
// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
#ifndef NVIM_REGEXP_DEFS_H
#define NVIM_REGEXP_DEFS_H
@ -17,18 +15,14 @@
#include "nvim/pos.h"
#include "nvim/types.h"
/*
* The number of sub-matches is limited to 10.
* The first one (index 0) is the whole match, referenced with "\0".
* The second one (index 1) is the first sub-match, referenced with "\1".
* This goes up to the tenth (index 9), referenced with "\9".
*/
// The number of sub-matches is limited to 10.
// The first one (index 0) is the whole match, referenced with "\0".
// The second one (index 1) is the first sub-match, referenced with "\1".
// This goes up to the tenth (index 9), referenced with "\9".
#define NSUBEXP 10
/*
* In the NFA engine: how many braces are allowed.
* TODO(RE): Use dynamic memory allocation instead of static, like here
*/
// In the NFA engine: how many braces are allowed.
// TODO(RE): Use dynamic memory allocation instead of static, like here
#define NFA_MAX_BRACES 20
// In the NFA engine: how many states are allowed.
@ -61,11 +55,9 @@ typedef struct {
#include "nvim/buffer_defs.h"
/*
* Structure returned by vim_regcomp() to pass on to vim_regexec().
* This is the general structure. For the actual matcher, two specific
* structures are used. See code below.
*/
// Structure returned by vim_regcomp() to pass on to vim_regexec().
// This is the general structure. For the actual matcher, two specific
// structures are used. See code below.
struct regprog {
regengine_T *engine;
unsigned regflags;
@ -74,11 +66,9 @@ struct regprog {
bool re_in_use; ///< prog is being executed
};
/*
* Structure used by the back track matcher.
* These fields are only to be used in regexp.c!
* See regexp.c for an explanation.
*/
// Structure used by the back track matcher.
// These fields are only to be used in regexp.c!
// See regexp.c for an explanation.
typedef struct {
// These four members implement regprog_T.
regengine_T *engine;
@ -107,9 +97,7 @@ struct nfa_state {
int val;
};
/*
* Structure used by the NFA matcher.
*/
// Structure used by the NFA matcher.
typedef struct {
// These four members implement regprog_T.
regengine_T *engine;
@ -133,11 +121,9 @@ typedef struct {
nfa_state_T state[1]; // actually longer..
} nfa_regprog_T;
/*
* Structure to be used for single-line matching.
* Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
* When there is no match, the pointer is NULL.
*/
// Structure to be used for single-line matching.
// Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
// When there is no match, the pointer is NULL.
typedef struct {
regprog_T *regprog;
char *startp[NSUBEXP];
@ -145,11 +131,9 @@ typedef struct {
bool rm_ic;
} regmatch_T;
/*
* Structure used to store external references: "\z\(\)" to "\z\1".
* Use a reference count to avoid the need to copy this around. When it goes
* from 1 to zero the matches need to be freed.
*/
// Structure used to store external references: "\z\(\)" to "\z\1".
// Use a reference count to avoid the need to copy this around. When it goes
// from 1 to zero the matches need to be freed.
struct reg_extmatch {
int16_t refcnt;
char_u *matches[NSUBEXP];

View File

@ -1,11 +1,9 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/*
* NFA regular expression implementation.
*
* This file is included in "regexp.c".
*/
// NFA regular expression implementation.
//
// This file is included in "regexp.c".
#include <assert.h>
#include <inttypes.h>
@ -383,10 +381,8 @@ static void nfa_regcomp_start(char_u *expr, int re_flags)
regcomp_start(expr, re_flags);
}
/*
* Figure out if the NFA state list starts with an anchor, must match at start
* of the line.
*/
// Figure out if the NFA state list starts with an anchor, must match at start
// of the line.
static int nfa_get_reganch(nfa_state_T *start, int depth)
{
nfa_state_T *p = start;
@ -441,10 +437,8 @@ static int nfa_get_reganch(nfa_state_T *start, int depth)
return 0;
}
/*
* Figure out if the NFA state list starts with a character which must match
* at start of the match.
*/
// Figure out if the NFA state list starts with a character which must match
// at start of the match.
static int nfa_get_regstart(nfa_state_T *start, int depth)
{
nfa_state_T *p = start;
@ -521,11 +515,9 @@ static int nfa_get_regstart(nfa_state_T *start, int depth)
return 0;
}
/*
* Figure out if the NFA state list contains just literal text and nothing
* else. If so return a string in allocated memory with what must match after
* regstart. Otherwise return NULL.
*/
// Figure out if the NFA state list contains just literal text and nothing
// else. If so return a string in allocated memory with what must match after
// regstart. Otherwise return NULL.
static char_u *nfa_get_match_text(nfa_state_T *start)
{
nfa_state_T *p = start;
@ -557,10 +549,8 @@ static char_u *nfa_get_match_text(nfa_state_T *start)
return ret;
}
/*
* Allocate more space for post_start. Called when
* running above the estimated number of states.
*/
// Allocate more space for post_start. Called when
// running above the estimated number of states.
static void realloc_post_list(void)
{
// For weird patterns the number of states can be very high. Increasing by
@ -572,16 +562,14 @@ static void realloc_post_list(void)
post_start = new_start;
}
/*
* Search between "start" and "end" and try to recognize a
* character class in expanded form. For example [0-9].
* On success, return the id the character class to be emitted.
* On failure, return 0 (=FAIL)
* Start points to the first char of the range, while end should point
* to the closing brace.
* Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
* need to be interpreted as [a-zA-Z].
*/
// Search between "start" and "end" and try to recognize a
// character class in expanded form. For example [0-9].
// On success, return the id the character class to be emitted.
// On failure, return 0 (=FAIL)
// Start points to the first char of the range, while end should point
// to the closing brace.
// Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
// need to be interpreted as [a-zA-Z].
static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
{
#define CLASS_not 0x80
@ -700,14 +688,12 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
return FAIL;
}
/*
* Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8.
* Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
* equivalent to 'a OR b OR c'
*
* NOTE! When changing this function, also update reg_equi_class()
*/
// Produce the bytes for equivalence class "c".
// Currently only handles latin1, latin9 and utf-8.
// Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
// equivalent to 'a OR b OR c'
//
// NOTE! When changing this function, also update reg_equi_class()
static void nfa_emit_equi_class(int c)
{
#define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
@ -1778,26 +1764,22 @@ static void nfa_emit_equi_class(int c)
#undef EMIT2
}
/*
* Code to parse regular expression.
*
* We try to reuse parsing functions in regexp.c to
* minimize surprise and keep the syntax consistent.
*/
// Code to parse regular expression.
//
// We try to reuse parsing functions in regexp.c to
// minimize surprise and keep the syntax consistent.
/*
* Parse the lowest level.
*
* An atom can be one of a long list of items. Many atoms match one character
* in the text. It is often an ordinary character or a character class.
* Braces can be used to make a pattern into an atom. The "\z(\)" construct
* is only for syntax highlighting.
*
* atom ::= ordinary-atom
* or \( pattern \)
* or \%( pattern \)
* or \z( pattern \)
*/
// Parse the lowest level.
//
// An atom can be one of a long list of items. Many atoms match one character
// in the text. It is often an ordinary character or a character class.
// Braces can be used to make a pattern into an atom. The "\z(\)" construct
// is only for syntax highlighting.
//
// atom ::= ordinary-atom
// or \( pattern \)
// or \%( pattern \)
// or \z( pattern \)
static int nfa_regatom(void)
{
int c;
@ -1862,9 +1844,7 @@ static int nfa_regatom(void)
// "\_x" is character class plus newline
FALLTHROUGH;
/*
* Character classes.
*/
// Character classes.
case Magic('.'):
case Magic('i'):
case Magic('I'):
@ -2228,24 +2208,20 @@ static int nfa_regatom(void)
case Magic('['):
collection:
/*
* [abc] uses NFA_START_COLL - NFA_END_COLL
* [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
* Each character is produced as a regular state, using
* NFA_CONCAT to bind them together.
* Besides normal characters there can be:
* - character classes NFA_CLASS_*
* - ranges, two characters followed by NFA_RANGE.
*/
// [abc] uses NFA_START_COLL - NFA_END_COLL
// [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
// Each character is produced as a regular state, using
// NFA_CONCAT to bind them together.
// Besides normal characters there can be:
// - character classes NFA_CLASS_*
// - ranges, two characters followed by NFA_RANGE.
p = (char_u *)regparse;
endp = skip_anyof((char *)p);
if (*endp == ']') {
/*
* Try to reverse engineer character classes. For example,
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
* and perform the necessary substitutions in the NFA.
*/
// Try to reverse engineer character classes. For example,
// recognize that [0-9] stands for \d and [A-Za-z_] for \h,
// and perform the necessary substitutions in the NFA.
int result = nfa_recognize_char_class((char_u *)regparse, endp, extra == NFA_ADD_NL);
if (result != FAIL) {
if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) {
@ -2259,10 +2235,8 @@ collection:
MB_PTR_ADV(regparse);
return OK;
}
/*
* Failed to recognize a character class. Use the simple
* version that turns [abc] into 'a' OR 'b' OR 'c'
*/
// Failed to recognize a character class. Use the simple
// version that turns [abc] into 'a' OR 'b' OR 'c'
startc = -1;
negated = false;
if (*regparse == '^') { // negated range
@ -2554,16 +2528,14 @@ nfa_do_multibyte:
return OK;
}
/*
* Parse something followed by possible [*+=].
*
* A piece is an atom, possibly followed by a multi, an indication of how many
* times the atom can be matched. Example: "a*" matches any sequence of "a"
* characters: "", "a", "aa", etc.
*
* piece ::= atom
* or atom multi
*/
// Parse something followed by possible [*+=].
//
// A piece is an atom, possibly followed by a multi, an indication of how many
// times the atom can be matched. Example: "a*" matches any sequence of "a"
// characters: "", "a", "aa", etc.
//
// piece ::= atom
// or atom multi
static int nfa_regpiece(void)
{
int i;
@ -2601,17 +2573,15 @@ static int nfa_regpiece(void)
break;
case Magic('+'):
/*
* Trick: Normally, (a*)\+ would match the whole input "aaa". The
* first and only submatch would be "aaa". But the backtracking
* engine interprets the plus as "try matching one more time", and
* a* matches a second time at the end of the input, the empty
* string.
* The submatch will be the empty string.
*
* In order to be consistent with the old engine, we replace
* <atom>+ with <atom><atom>*
*/
// Trick: Normally, (a*)\+ would match the whole input "aaa". The
// first and only submatch would be "aaa". But the backtracking
// engine interprets the plus as "try matching one more time", and
// a* matches a second time at the end of the input, the empty
// string.
// The submatch will be the empty string.
//
// In order to be consistent with the old engine, we replace
// <atom>+ with <atom><atom>*
restore_parse_state(&old_state);
curchr = -1;
if (nfa_regatom() == FAIL) {
@ -2770,16 +2740,14 @@ static int nfa_regpiece(void)
return OK;
}
/*
* Parse one or more pieces, concatenated. It matches a match for the
* first piece, followed by a match for the second piece, etc. Example:
* "f[0-9]b", first matches "f", then a digit and then "b".
*
* concat ::= piece
* or piece piece
* or piece piece piece
* etc.
*/
// Parse one or more pieces, concatenated. It matches a match for the
// first piece, followed by a match for the second piece, etc. Example:
// "f[0-9]b", first matches "f", then a digit and then "b".
//
// concat ::= piece
// or piece piece
// or piece piece piece
// etc.
static int nfa_regconcat(void)
{
bool cont = true;
@ -2843,18 +2811,16 @@ static int nfa_regconcat(void)
return OK;
}
/*
* Parse a branch, one or more concats, separated by "\&". It matches the
* last concat, but only if all the preceding concats also match at the same
* position. Examples:
* "foobeep\&..." matches "foo" in "foobeep".
* ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
*
* branch ::= concat
* or concat \& concat
* or concat \& concat \& concat
* etc.
*/
// Parse a branch, one or more concats, separated by "\&". It matches the
// last concat, but only if all the preceding concats also match at the same
// position. Examples:
// "foobeep\&..." matches "foo" in "foobeep".
// ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
//
// branch ::= concat
// or concat \& concat
// or concat \& concat \& concat
// etc.
static int nfa_regbranch(void)
{
int old_post_pos;
@ -3311,9 +3277,7 @@ static FILE *log_fd;
static char_u e_log_open_failed[] =
N_("Could not open temporary log file for writing, displaying on stderr... ");
/*
* Print the postfix notation of the current regexp.
*/
// Print the postfix notation of the current regexp.
static void nfa_postfix_dump(char_u *expr, int retval)
{
int *p;
@ -3341,9 +3305,7 @@ static void nfa_postfix_dump(char_u *expr, int retval)
}
}
/*
* Print the NFA starting with a root node "state".
*/
// Print the NFA starting with a root node "state".
static void nfa_print_state(FILE *debugf, nfa_state_T *state)
{
garray_T indent;
@ -3413,9 +3375,7 @@ static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent)
ga_append(indent, NUL);
}
/*
* Print the NFA state machine.
*/
// Print the NFA state machine.
static void nfa_dump(nfa_regprog_T *prog)
{
FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a");
@ -3437,12 +3397,10 @@ static void nfa_dump(nfa_regprog_T *prog)
fclose(debugf);
}
}
#endif /* REGEXP_DEBUG */
#endif // REGEXP_DEBUG
/*
* Parse r.e. @expr and convert it into postfix form.
* Return the postfix string on success, NULL otherwise.
*/
// Parse r.e. @expr and convert it into postfix form.
// Return the postfix string on success, NULL otherwise.
static int *re2post(void)
{
if (nfa_reg(REG_NOPAREN) == FAIL) {
@ -3454,18 +3412,14 @@ static int *re2post(void)
// NB. Some of the code below is inspired by Russ's.
/*
* Represents an NFA state plus zero or one or two arrows exiting.
* if c == MATCH, no arrows out; matching state.
* If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
* If c < 256, labeled arrow with character c to out.
*/
// Represents an NFA state plus zero or one or two arrows exiting.
// if c == MATCH, no arrows out; matching state.
// If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
// If c < 256, labeled arrow with character c to out.
static nfa_state_T *state_ptr; // points to nfa_prog->state
/*
* Allocate and initialize nfa_state_T.
*/
// Allocate and initialize nfa_state_T.
static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
{
nfa_state_T *s;
@ -3488,16 +3442,12 @@ static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
return s;
}
/*
* A partially built NFA without the matching state filled in.
* Frag_T.start points at the start state.
* Frag_T.out is a list of places that need to be set to the
* next state for this fragment.
*/
// A partially built NFA without the matching state filled in.
// Frag_T.start points at the start state.
// Frag_T.out is a list of places that need to be set to the
// next state for this fragment.
/*
* Initialize a Frag_T struct and return it.
*/
// Initialize a Frag_T struct and return it.
static Frag_T frag(nfa_state_T *start, Ptrlist *out)
{
Frag_T n;
@ -3507,9 +3457,7 @@ static Frag_T frag(nfa_state_T *start, Ptrlist *out)
return n;
}
/*
* Create singleton list containing just outp.
*/
// Create singleton list containing just outp.
static Ptrlist *list1(nfa_state_T **outp)
{
Ptrlist *l;
@ -3519,9 +3467,7 @@ static Ptrlist *list1(nfa_state_T **outp)
return l;
}
/*
* Patch the list of states at out to point to start.
*/
// Patch the list of states at out to point to start.
static void patch(Ptrlist *l, nfa_state_T *s)
{
Ptrlist *next;
@ -3532,9 +3478,7 @@ static void patch(Ptrlist *l, nfa_state_T *s)
}
}
/*
* Join the two lists l1 and l2, returning the combination.
*/
// Join the two lists l1 and l2, returning the combination.
static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
{
Ptrlist *oldl1;
@ -3547,9 +3491,7 @@ static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
return oldl1;
}
/*
* Stack used for transforming postfix form into NFA.
*/
// Stack used for transforming postfix form into NFA.
static Frag_T empty;
static void st_error(int *postfix, int *end, int *p)
@ -3592,9 +3534,7 @@ static void st_error(int *postfix, int *end, int *p)
emsg(_("E874: (NFA) Could not pop the stack!"));
}
/*
* Push an item onto the stack.
*/
// Push an item onto the stack.
static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
{
Frag_T *stackp = *p;
@ -3606,9 +3546,7 @@ static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
*p = *p + 1;
}
/*
* Pop an item from the stack.
*/
// Pop an item from the stack.
static Frag_T st_pop(Frag_T **p, Frag_T *stack)
{
Frag_T *stackp;
@ -3621,10 +3559,8 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack)
return **p;
}
/*
* Estimate the maximum byte length of anything matching "state".
* When unknown or unlimited return -1.
*/
// Estimate the maximum byte length of anything matching "state".
// When unknown or unlimited return -1.
static int nfa_max_width(nfa_state_T *startstate, int depth)
{
int l, r;
@ -3827,10 +3763,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
return -1;
}
/*
* Convert a postfix form into its equivalent NFA.
* Return the NFA start state on success, NULL otherwise.
*/
// Convert a postfix form into its equivalent NFA.
// Return the NFA start state on success, NULL otherwise.
static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
{
int *p;
@ -3866,7 +3800,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
stack_end = stack + (nstate + 1);
}
for (p = postfix; p < end; ++p) {
for (p = postfix; p < end; p++) {
switch (*p) {
case NFA_CONCAT:
// Concatenation.
@ -4350,15 +4284,13 @@ theend:
#undef PUSH
}
/*
* After building the NFA program, inspect it to add optimization hints.
*/
// After building the NFA program, inspect it to add optimization hints.
static void nfa_postprocess(nfa_regprog_T *prog)
{
int i;
int c;
for (i = 0; i < prog->nstate; ++i) {
for (i = 0; i < prog->nstate; i++) {
c = prog->state[i].c;
if (c == NFA_START_INVISIBLE
|| c == NFA_START_INVISIBLE_NEG
@ -4490,9 +4422,7 @@ static void clear_sub(regsub_T *sub)
sub->in_use = 0;
}
/*
* Copy the submatches from "from" to "to".
*/
// Copy the submatches from "from" to "to".
static void copy_sub(regsub_T *to, regsub_T *from)
{
to->in_use = from->in_use;
@ -4508,9 +4438,7 @@ static void copy_sub(regsub_T *to, regsub_T *from)
}
}
/*
* Like copy_sub() but exclude the main match.
*/
// Like copy_sub() but exclude the main match.
static void copy_sub_off(regsub_T *to, regsub_T *from)
{
if (to->in_use < from->in_use) {
@ -4528,9 +4456,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from)
}
}
/*
* Like copy_sub() but only do the end of the main match if \ze is present.
*/
// Like copy_sub() but only do the end of the main match if \ze is present.
static void copy_ze_off(regsub_T *to, regsub_T *from)
{
if (rex.nfa_has_zend) {
@ -4954,7 +4880,7 @@ static regsubs_T *addstate(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_ar
// When called from addstate_here() do insert before
// existing states.
if (add_here) {
for (k = 0; k < l->n && k < listindex; ++k) {
for (k = 0; k < l->n && k < listindex; k++) {
if (l->t[k].state->id == state->id) {
found = true;
break;
@ -5094,7 +5020,7 @@ skip_add:
save_in_use = -1;
} else {
save_in_use = sub->in_use;
for (i = sub->in_use; i < subidx; ++i) {
for (i = sub->in_use; i < subidx; i++) {
sub->list.multi[i].start_lnum = -1;
sub->list.multi[i].end_lnum = -1;
}
@ -5115,7 +5041,7 @@ skip_add:
save_in_use = -1;
} else {
save_in_use = sub->in_use;
for (i = sub->in_use; i < subidx; ++i) {
for (i = sub->in_use; i < subidx; i++) {
sub->list.line[i].start = NULL;
sub->list.line[i].end = NULL;
}
@ -5314,9 +5240,7 @@ static regsubs_T *addstate_here(nfa_list_T *l, nfa_state_T *state, regsubs_T *su
return r;
}
/*
* Check character class "class" against current character c.
*/
// Check character class "class" against current character c.
static int check_char_class(int class, int c)
{
switch (class) {
@ -5502,11 +5426,9 @@ static int match_zref(int subidx, int *bytelen)
return false;
}
/*
* Save list IDs for all NFA states of "prog" into "list".
* Also reset the IDs to zero.
* Only used for the recursive value lastlist[1].
*/
// Save list IDs for all NFA states of "prog" into "list".
// Also reset the IDs to zero.
// Only used for the recursive value lastlist[1].
static void nfa_save_listids(nfa_regprog_T *prog, int *list)
{
int i;
@ -5521,9 +5443,7 @@ static void nfa_save_listids(nfa_regprog_T *prog, int *list)
}
}
/*
* Restore list IDs from "list" to all NFA states.
*/
// Restore list IDs from "list" to all NFA states.
static void nfa_restore_listids(nfa_regprog_T *prog, int *list)
{
int i;
@ -5547,11 +5467,9 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos)
return val == pos;
}
/*
* Recursively call nfa_regmatch()
* "pim" is NULL or contains info about a Postponed Invisible Match (start
* position).
*/
// Recursively call nfa_regmatch()
// "pim" is NULL or contains info about a Postponed Invisible Match (start
// position).
static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog,
regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len)
FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7)
@ -5691,12 +5609,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T
return result;
}
/*
* Estimate the chance of a match with "state" failing.
* empty match: 0
* NFA_ANY: 1
* specific character: 99
*/
// Estimate the chance of a match with "state" failing.
// empty match: 0
// NFA_ANY: 1
// specific character: 99
static int failure_chance(nfa_state_T *state, int depth)
{
int c = state->c;
@ -5851,9 +5767,7 @@ static int failure_chance(nfa_state_T *state, int depth)
return 50;
}
/*
* Skip until the char "c" we know a match must start with.
*/
// Skip until the char "c" we know a match must start with.
static int skip_to_start(int c, colnr_T *colp)
{
const char_u *const s = cstrchr(rex.line + *colp, c);
@ -5864,11 +5778,9 @@ static int skip_to_start(int c, colnr_T *colp)
return OK;
}
/*
* Check for a match with match_text.
* Called after skip_to_start() has found regstart.
* Returns zero for no match, 1 for a match.
*/
// Check for a match with match_text.
// Called after skip_to_start() has found regstart.
// Returns zero for no match, 1 for a match.
static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
{
#define PTR2LEN(x) utf_ptr2len(x)
@ -6038,9 +5950,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
add_off = clen; \
}
/*
* Run for each character.
*/
// Run for each character.
for (;;) {
int curc = utf_ptr2char((char *)rex.input);
int clen = utfc_ptr2len((char *)rex.input);
@ -6086,9 +5996,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
#ifdef NFA_REGEXP_DEBUG_LOG
fprintf(debug, "\n-------------------\n");
#endif
/*
* If the state lists are empty we can stop.
*/
// If the state lists are empty we can stop.
if (thislist->n == 0) {
break;
}
@ -6131,10 +6039,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
}
#endif
/*
* Handle the possible codes of the current state.
* The most important is NFA_MATCH.
*/
// Handle the possible codes of the current state.
// The most important is NFA_MATCH.
add_state = NULL;
add_here = false;
add_count = 0;
@ -7525,10 +7431,8 @@ theend:
return retval;
}
/*
* Compile a regular expression into internal code for the NFA matcher.
* Returns the program in allocated space. Returns NULL for an error.
*/
// Compile a regular expression into internal code for the NFA matcher.
// Returns the program in allocated space. Returns NULL for an error.
static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
{
nfa_regprog_T *prog = NULL;
@ -7554,11 +7458,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
goto fail; // Cascaded (syntax?) error
}
/*
* In order to build the NFA, we parse the input regexp twice:
* 1. first pass to count size (so we can allocate space)
* 2. second to emit code
*/
// In order to build the NFA, we parse the input regexp twice:
// 1. first pass to count size (so we can allocate space)
// 2. second to emit code
#ifdef REGEXP_DEBUG
{
FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a");
@ -7573,10 +7475,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
}
#endif
/*
* PASS 1
* Count number of NFA states in "nstate". Do not build the NFA.
*/
// PASS 1
// Count number of NFA states in "nstate". Do not build the NFA.
post2nfa(postfix, post_ptr, true);
// allocate the regprog with space for the compiled regexp
@ -7585,10 +7485,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
state_ptr = prog->state;
prog->re_in_use = false;
/*
* PASS 2
* Build the NFA
*/
// PASS 2
// Build the NFA
prog->start = post2nfa(postfix, post_ptr, false);
if (prog->start == NULL) {
goto fail;
@ -7632,9 +7530,7 @@ fail:
goto out;
}
/*
* Free a compiled regexp program, returned by nfa_regcomp().
*/
// Free a compiled regexp program, returned by nfa_regcomp().
static void nfa_regfree(regprog_T *prog)
{
if (prog != NULL) {

View File

@ -2465,9 +2465,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
aff_entry->ae_cond = (char_u *)getroom_save(spin, (char_u *)items[4]);
if (*items[0] == 'P') {
sprintf((char *)buf, "^%s", items[4]);
sprintf((char *)buf, "^%s", items[4]); // NOLINT(runtime/printf)
} else {
sprintf((char *)buf, "%s$", items[4]);
sprintf((char *)buf, "%s$", items[4]); // NOLINT(runtime/printf)
}
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING + RE_STRICT);
if (aff_entry->ae_prog == NULL) {
@ -2514,8 +2514,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
onecap_copy((char_u *)items[4], buf, true);
aff_entry->ae_cond = (char_u *)getroom_save(spin, buf);
if (aff_entry->ae_cond != NULL) {
sprintf((char *)buf, "^%s",
aff_entry->ae_cond);
sprintf((char *)buf, "^%s", aff_entry->ae_cond); // NOLINT(runtime/printf)
vim_regfree(aff_entry->ae_prog);
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING);
}

View File

@ -1035,9 +1035,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
: va_arg(ap, long long)); // NOLINT (runtime/int)
break;
case 'z':
arg = (tvs
? (ptrdiff_t)tv_nr(tvs, &arg_idx)
: va_arg(ap, ptrdiff_t));
arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t));
break;
}
if (arg > 0) {
@ -1049,19 +1047,13 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// unsigned
switch (length_modifier) {
case '\0':
uarg = (unsigned int)(tvs
? tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned int));
uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
break;
case 'h':
uarg = (uint16_t)(tvs
? tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned int));
uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
break;
case 'l':
uarg = (tvs
? (unsigned long)tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned long));
uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long));
break;
case '2':
uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int)
@ -1071,9 +1063,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
: va_arg(ap, unsigned long long)); // NOLINT (runtime/int)
break;
case 'z':
uarg = (tvs
? (size_t)tv_nr(tvs, &arg_idx)
: va_arg(ap, size_t));
uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t));
break;
}
arg_sign = (uarg != 0);

View File

@ -1655,8 +1655,7 @@ static int syn_current_attr(const bool syncing, const bool displaying, bool *con
&& (spp->sp_type == SPTYPE_MATCH
|| spp->sp_type == SPTYPE_START)
&& (current_next_list != NULL
? in_id_list(NULL, current_next_list,
&spp->sp_syn, 0)
? in_id_list(NULL, current_next_list, &spp->sp_syn, 0)
: (cur_si == NULL
? !(spp->sp_flags & HL_CONTAINED)
: in_id_list(cur_si,