refactor: more clint (#20910)

This commit is contained in:
Lewis Russell 2022-11-07 10:21:44 +00:00 committed by GitHub
parent e9c1cb71f8
commit bdb98de2d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 594 additions and 876 deletions

View File

@ -122,8 +122,8 @@ int eexe_mod_op(typval_T *const tv1, const typval_T *const tv2, const char *cons
break; break;
} }
const float_T f = (tv2->v_type == VAR_FLOAT const float_T f = (tv2->v_type == VAR_FLOAT
? tv2->vval.v_float ? tv2->vval.v_float
: (float_T)tv_get_number(tv2)); : (float_T)tv_get_number(tv2));
switch (*op) { switch (*op) {
case '+': case '+':
tv1->vval.v_float += f; break; tv1->vval.v_float += f; break;

View File

@ -729,7 +729,7 @@ void ex_hardcopy(exarg_T *eap)
} }
assert(prtpos.bytes_printed <= SIZE_MAX / 100); assert(prtpos.bytes_printed <= SIZE_MAX / 100);
sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), // NOLINT(runtime/printf)
page_count + 1 + side, page_count + 1 + side,
prtpos.bytes_printed * 100 / bytes_to_print); prtpos.bytes_printed * 100 / bytes_to_print);
if (!mch_print_begin_page((char_u *)IObuff)) { if (!mch_print_begin_page((char_u *)IObuff)) {
@ -750,8 +750,7 @@ void ex_hardcopy(exarg_T *eap)
prtpos.file_line); prtpos.file_line);
} }
for (page_line = 0; page_line < settings.lines_per_page; for (page_line = 0; page_line < settings.lines_per_page; page_line++) {
++page_line) {
prtpos.column = hardcopy_line(&settings, prtpos.column = hardcopy_line(&settings,
page_line, &prtpos); page_line, &prtpos);
if (prtpos.column == 0) { if (prtpos.column == 0) {
@ -2440,8 +2439,7 @@ bool mch_print_begin(prt_settings_T *psettings)
prt_dsc_font_resource("DocumentNeededResources", &prt_ps_courier_font); prt_dsc_font_resource("DocumentNeededResources", &prt_ps_courier_font);
} }
if (prt_out_mbyte) { if (prt_out_mbyte) {
prt_dsc_font_resource((prt_use_courier ? NULL prt_dsc_font_resource((prt_use_courier ? NULL : "DocumentNeededResources"), &prt_ps_mb_font);
: "DocumentNeededResources"), &prt_ps_mb_font);
if (!prt_custom_cmap) { if (!prt_custom_cmap) {
prt_dsc_resources(NULL, "cmap", prt_cmap); prt_dsc_resources(NULL, "cmap", prt_cmap);
} }
@ -2990,7 +2988,7 @@ int mch_print_text_out(char_u *const textp, size_t len)
ga_append(&prt_ps_buffer, '\\'); break; ga_append(&prt_ps_buffer, '\\'); break;
default: default:
sprintf((char *)ch_buff, "%03o", (unsigned int)ch); sprintf((char *)ch_buff, "%03o", (unsigned int)ch); // NOLINT(runtime/printf)
ga_append(&prt_ps_buffer, (char)ch_buff[0]); ga_append(&prt_ps_buffer, (char)ch_buff[0]);
ga_append(&prt_ps_buffer, (char)ch_buff[1]); ga_append(&prt_ps_buffer, (char)ch_buff[1]);
ga_append(&prt_ps_buffer, (char)ch_buff[2]); ga_append(&prt_ps_buffer, (char)ch_buff[2]);

View File

@ -1550,7 +1550,7 @@ void show_utf8(void)
} }
clen = utf_ptr2len((char *)line + i); clen = utf_ptr2len((char *)line + i);
} }
sprintf((char *)IObuff + rlen, "%02x ", sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf)
(line[i] == NL) ? NUL : line[i]); // NUL is stored as NL (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL
clen--; clen--;
rlen += (int)strlen(IObuff + rlen); rlen += (int)strlen(IObuff + rlen);

View File

@ -2135,13 +2135,13 @@ static int ml_append_int(buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, b
buf->b_ml.ml_stack_top = stack_idx + 1; // truncate stack buf->b_ml.ml_stack_top = stack_idx + 1; // truncate stack
if (lineadd) { if (lineadd) {
--(buf->b_ml.ml_stack_top); (buf->b_ml.ml_stack_top)--;
// fix line count for rest of blocks in the stack // fix line count for rest of blocks in the stack
ml_lineadd(buf, lineadd); ml_lineadd(buf, lineadd);
// fix stack itself // fix stack itself
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high += buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
lineadd; lineadd;
++(buf->b_ml.ml_stack_top); (buf->b_ml.ml_stack_top)++;
} }
// We are finished, break the loop here. // We are finished, break the loop here.
@ -2428,7 +2428,7 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high += buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
buf->b_ml.ml_locked_lineadd; buf->b_ml.ml_locked_lineadd;
} }
++(buf->b_ml.ml_stack_top); (buf->b_ml.ml_stack_top)++;
break; break;
} }
@ -2698,11 +2698,11 @@ static bhdr_T *ml_find_line(buf_T *buf, linenr_T lnum, int action)
&& buf->b_ml.ml_locked_high >= lnum) { && buf->b_ml.ml_locked_high >= lnum) {
// remember to update pointer blocks and stack later // remember to update pointer blocks and stack later
if (action == ML_INSERT) { if (action == ML_INSERT) {
++(buf->b_ml.ml_locked_lineadd); (buf->b_ml.ml_locked_lineadd)++;
++(buf->b_ml.ml_locked_high); (buf->b_ml.ml_locked_high)++;
} else if (action == ML_DELETE) { } else if (action == ML_DELETE) {
--(buf->b_ml.ml_locked_lineadd); (buf->b_ml.ml_locked_lineadd)--;
--(buf->b_ml.ml_locked_high); (buf->b_ml.ml_locked_high)--;
} }
return buf->b_ml.ml_locked; return buf->b_ml.ml_locked;
} }

View File

@ -95,9 +95,8 @@ bool msgpack_rpc_to_object(const msgpack_object *const obj, Object *const arg)
dest = conv(((String) { \ dest = conv(((String) { \
.size = obj->via.attr.size, \ .size = obj->via.attr.size, \
.data = (obj->via.attr.ptr == NULL || obj->via.attr.size == 0 \ .data = (obj->via.attr.ptr == NULL || obj->via.attr.size == 0 \
? xmemdupz("", 0) \ ? xmemdupz("", 0) \
: xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), \ : xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), })); \
})); \
break; \ break; \
} }
STR_CASE(MSGPACK_OBJECT_STR, str, cur.mobj, *cur.aobj, STRING_OBJ) STR_CASE(MSGPACK_OBJECT_STR, str, cur.mobj, *cur.aobj, STRING_OBJ)

View File

@ -2266,7 +2266,7 @@ bool find_decl(char_u *ptr, size_t len, bool locally, bool thisblock, int flags_
// Put "\V" before the pattern to avoid that the special meaning of "." // Put "\V" before the pattern to avoid that the special meaning of "."
// and "~" causes trouble. // and "~" causes trouble.
assert(len <= INT_MAX); assert(len <= INT_MAX);
sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", // NOLINT(runtime/printf)
(int)len, ptr); (int)len, ptr);
old_pos = curwin->w_cursor; old_pos = curwin->w_cursor;
save_p_ws = p_ws; save_p_ws = p_ws;

View File

@ -1,9 +1,7 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check // This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/* // Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
*/
// By default: do not create debugging logs or files related to regular // By default: do not create debugging logs or files related to regular
// expressions, even when compiling with -DDEBUG. // expressions, even when compiling with -DDEBUG.
@ -41,21 +39,17 @@
# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
#endif #endif
/* // Magic characters have a special meaning, they don't match literally.
* Magic characters have a special meaning, they don't match literally. // Magic characters are negative. This separates them from literal characters
* Magic characters are negative. This separates them from literal characters // (possibly multi-byte). Only ASCII characters can be Magic.
* (possibly multi-byte). Only ASCII characters can be Magic.
*/
#define Magic(x) ((int)(x) - 256) #define Magic(x) ((int)(x) - 256)
#define un_Magic(x) ((x) + 256) #define un_Magic(x) ((x) + 256)
#define is_Magic(x) ((x) < 0) #define is_Magic(x) ((x) < 0)
/* // We should define ftpr as a pointer to a function returning a pointer to
* We should define ftpr as a pointer to a function returning a pointer to // a function returning a pointer to a function ...
* a function returning a pointer to a function ... // This is impossible, so we declare a pointer to a function returning a
* This is impossible, so we declare a pointer to a function returning a // pointer to a function returning void. This should work for all compilers.
* pointer to a function returning void. This should work for all compilers.
*/
typedef void (*(*fptr_T)(int *, int))(void); typedef void (*(*fptr_T)(int *, int))(void);
static int no_Magic(int x) static int no_Magic(int x)
@ -143,28 +137,24 @@ static int re_multi_type(int c)
static char *reg_prev_sub = NULL; static char *reg_prev_sub = NULL;
/* // REGEXP_INRANGE contains all characters which are always special in a []
* REGEXP_INRANGE contains all characters which are always special in a [] // range after '\'.
* range after '\'. // REGEXP_ABBR contains all characters which act as abbreviations after '\'.
* REGEXP_ABBR contains all characters which act as abbreviations after '\'. // These are:
* These are: // \n - New line (NL).
* \n - New line (NL). // \r - Carriage Return (CR).
* \r - Carriage Return (CR). // \t - Tab (TAB).
* \t - Tab (TAB). // \e - Escape (ESC).
* \e - Escape (ESC). // \b - Backspace (Ctrl_H).
* \b - Backspace (Ctrl_H). // \d - Character code in decimal, eg \d123
* \d - Character code in decimal, eg \d123 // \o - Character code in octal, eg \o80
* \o - Character code in octal, eg \o80 // \x - Character code in hex, eg \x4a
* \x - Character code in hex, eg \x4a // \u - Multibyte character code, eg \u20ac
* \u - Multibyte character code, eg \u20ac // \U - Long multibyte character code, eg \U12345678
* \U - Long multibyte character code, eg \U12345678
*/
static char REGEXP_INRANGE[] = "]^-n\\"; static char REGEXP_INRANGE[] = "]^-n\\";
static char REGEXP_ABBR[] = "nrtebdoxuU"; static char REGEXP_ABBR[] = "nrtebdoxuU";
/* // Translate '\x' to its control character, except "\n", which is Magic.
* Translate '\x' to its control character, except "\n", which is Magic.
*/
static int backslash_trans(int c) static int backslash_trans(int c)
{ {
switch (c) { switch (c) {
@ -239,10 +229,8 @@ static int get_char_class(char **pp)
return CLASS_NONE; return CLASS_NONE;
} }
/* // Specific version of character class functions.
* Specific version of character class functions. // Using a table to keep this fast.
* Using a table to keep this fast.
*/
static int16_t class_tab[256]; static int16_t class_tab[256];
#define RI_DIGIT 0x01 #define RI_DIGIT 0x01
@ -325,9 +313,7 @@ static int reg_string; // matching with a string instead of a buffer
// line // line
static int reg_strict; // "[abc" is illegal static int reg_strict; // "[abc" is illegal
/* // META contains all characters that may be magic, except '^' and '$'.
* META contains all characters that may be magic, except '^' and '$'.
*/
// uncrustify:off // uncrustify:off
@ -391,11 +377,9 @@ int re_multiline(const regprog_T *prog)
return prog->regflags & RF_HASNL; return prog->regflags & RF_HASNL;
} }
/* // Check for an equivalence class name "[=a=]". "pp" points to the '['.
* Check for an equivalence class name "[=a=]". "pp" points to the '['. // Returns a character representing the class. Zero means that no item was
* Returns a character representing the class. Zero means that no item was // recognized. Otherwise "pp" is advanced to after the item.
* recognized. Otherwise "pp" is advanced to after the item.
*/
static int get_equi_class(char **pp) static int get_equi_class(char **pp)
{ {
int c; int c;
@ -413,12 +397,10 @@ static int get_equi_class(char **pp)
return 0; return 0;
} }
/* // Check for a collating element "[.a.]". "pp" points to the '['.
* Check for a collating element "[.a.]". "pp" points to the '['. // Returns a character. Zero means that no item was recognized. Otherwise
* Returns a character. Zero means that no item was recognized. Otherwise // "pp" is advanced to after the item.
* "pp" is advanced to after the item. // Currently only single characters are recognized!
* Currently only single characters are recognized!
*/
static int get_coll_element(char **pp) static int get_coll_element(char **pp)
{ {
int c; int c;
@ -562,9 +544,7 @@ static int prevchr_len; // byte length of previous char
static int at_start; // True when on the first character static int at_start; // True when on the first character
static int prev_at_start; // True when on the second character static int prev_at_start; // True when on the second character
/* // Start parsing at "str".
* Start parsing at "str".
*/
static void initchr(char_u *str) static void initchr(char_u *str)
{ {
regparse = (char *)str; regparse = (char *)str;
@ -574,10 +554,8 @@ static void initchr(char_u *str)
prev_at_start = false; prev_at_start = false;
} }
/* // Save the current parse state, so that it can be restored and parsing
* Save the current parse state, so that it can be restored and parsing // starts in the same state again.
* starts in the same state again.
*/
static void save_parse_state(parse_state_T *ps) static void save_parse_state(parse_state_T *ps)
{ {
ps->regparse = (char_u *)regparse; ps->regparse = (char_u *)regparse;
@ -591,9 +569,7 @@ static void save_parse_state(parse_state_T *ps)
ps->regnpar = regnpar; ps->regnpar = regnpar;
} }
/* // Restore a previously saved parse state.
* Restore a previously saved parse state.
*/
static void restore_parse_state(parse_state_T *ps) static void restore_parse_state(parse_state_T *ps)
{ {
regparse = (char *)ps->regparse; regparse = (char *)ps->regparse;
@ -607,9 +583,7 @@ static void restore_parse_state(parse_state_T *ps)
regnpar = ps->regnpar; regnpar = ps->regnpar;
} }
/* // Get the next character without advancing.
* Get the next character without advancing.
*/
static int peekchr(void) static int peekchr(void)
{ {
static int after_slash = false; static int after_slash = false;
@ -736,9 +710,7 @@ static int peekchr(void)
after_slash--; after_slash--;
curchr = toggle_Magic(curchr); curchr = toggle_Magic(curchr);
} else if (vim_strchr(REGEXP_ABBR, c)) { } else if (vim_strchr(REGEXP_ABBR, c)) {
/* // Handle abbreviations, like "\t" for TAB -- webb
* Handle abbreviations, like "\t" for TAB -- webb
*/
curchr = backslash_trans(c); curchr = backslash_trans(c);
} else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) { } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) {
curchr = toggle_Magic(c); curchr = toggle_Magic(c);
@ -757,9 +729,7 @@ static int peekchr(void)
return curchr; return curchr;
} }
/* // Eat one lexed character. Do this in a way that we can undo it.
* Eat one lexed character. Do this in a way that we can undo it.
*/
static void skipchr(void) static void skipchr(void)
{ {
// peekchr() eats a backslash, do the same here // peekchr() eats a backslash, do the same here
@ -781,10 +751,8 @@ static void skipchr(void)
nextchr = -1; nextchr = -1;
} }
/* // Skip a character while keeping the value of prev_at_start for at_start.
* Skip a character while keeping the value of prev_at_start for at_start. // prevchr and prevprevchr are also kept.
* prevchr and prevprevchr are also kept.
*/
static void skipchr_keepstart(void) static void skipchr_keepstart(void)
{ {
int as = prev_at_start; int as = prev_at_start;
@ -797,10 +765,8 @@ static void skipchr_keepstart(void)
prevprevchr = prpr; prevprevchr = prpr;
} }
/* // Get the next character from the pattern. We know about magic and such, so
* Get the next character from the pattern. We know about magic and such, so // therefore we need a lexical analyzer.
* therefore we need a lexical analyzer.
*/
static int getchr(void) static int getchr(void)
{ {
int chr = peekchr(); int chr = peekchr();
@ -809,9 +775,7 @@ static int getchr(void)
return chr; return chr;
} }
/* // put character back. Works only once!
* put character back. Works only once!
*/
static void ungetchr(void) static void ungetchr(void)
{ {
nextchr = curchr; nextchr = curchr;
@ -825,15 +789,13 @@ static void ungetchr(void)
regparse -= prevchr_len; regparse -= prevchr_len;
} }
/* // Get and return the value of the hex string at the current position.
* Get and return the value of the hex string at the current position. // Return -1 if there is no valid hex number.
* Return -1 if there is no valid hex number. // The position is updated:
* The position is updated: // blahblah\%x20asdf
* blahblah\%x20asdf // before-^ ^-after
* before-^ ^-after // The parameter controls the maximum number of input characters. This will be
* The parameter controls the maximum number of input characters. This will be // 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
*/
static int64_t gethexchrs(int maxinputlen) static int64_t gethexchrs(int maxinputlen)
{ {
int64_t nr = 0; int64_t nr = 0;
@ -856,10 +818,8 @@ static int64_t gethexchrs(int maxinputlen)
return nr; return nr;
} }
/* // Get and return the value of the decimal string immediately after the
* Get and return the value of the decimal string immediately after the // current position. Return -1 for invalid. Consumes all digits.
* current position. Return -1 for invalid. Consumes all digits.
*/
static int64_t getdecchrs(void) static int64_t getdecchrs(void)
{ {
int64_t nr = 0; int64_t nr = 0;
@ -883,14 +843,12 @@ static int64_t getdecchrs(void)
return nr; return nr;
} }
/* // get and return the value of the octal string immediately after the current
* get and return the value of the octal string immediately after the current // position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle // numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't // treat 8 or 9 as recognised characters. Position is updated:
* treat 8 or 9 as recognised characters. Position is updated: // blahblah\%o210asdf
* blahblah\%o210asdf // before-^ ^-after
* before-^ ^-after
*/
static int64_t getoctchrs(void) static int64_t getoctchrs(void)
{ {
int64_t nr = 0; int64_t nr = 0;
@ -913,12 +871,10 @@ static int64_t getoctchrs(void)
return nr; return nr;
} }
/* // read_limits - Read two integers to be taken as a minimum and maximum.
* read_limits - Read two integers to be taken as a minimum and maximum. // If the first character is '-', then the range is reversed.
* If the first character is '-', then the range is reversed. // Should end with 'end'. If minval is missing, zero is default, if maxval is
* Should end with 'end'. If minval is missing, zero is default, if maxval is // missing, a very big number is the default.
* missing, a very big number is the default.
*/
static int read_limits(long *minval, long *maxval) static int read_limits(long *minval, long *maxval)
{ {
int reverse = false; int reverse = false;
@ -950,10 +906,8 @@ static int read_limits(long *minval, long *maxval)
EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL); EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL);
} }
/* // Reverse the range if there was a '-', or make sure it is in the right
* Reverse the range if there was a '-', or make sure it is in the right // order otherwise.
* order otherwise.
*/
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) { if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) {
tmp = *minval; tmp = *minval;
*minval = *maxval; *minval = *maxval;
@ -963,13 +917,9 @@ static int read_limits(long *minval, long *maxval)
return OK; return OK;
} }
/* // vim_regexec and friends
* vim_regexec and friends
*/
/* // Global work variables for vim_regexec().
* Global work variables for vim_regexec().
*/
// Sometimes need to save a copy of a line. Since alloc()/free() is very // Sometimes need to save a copy of a line. Since alloc()/free() is very
// slow, we keep one allocated piece of memory and only re-allocate it when // slow, we keep one allocated piece of memory and only re-allocate it when
@ -1052,9 +1002,7 @@ static bool reg_iswordc(int c)
return vim_iswordc_buf(c, rex.reg_buf); return vim_iswordc_buf(c, rex.reg_buf);
} }
/* // Get pointer to the line "lnum", which is relative to "reg_firstlnum".
* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
*/
static char_u *reg_getline(linenr_T lnum) static char_u *reg_getline(linenr_T lnum)
{ {
// when looking behind for a match/no-match lnum is negative. But we // when looking behind for a match/no-match lnum is negative. But we
@ -1077,9 +1025,7 @@ static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
// true if using multi-line regexp. // true if using multi-line regexp.
#define REG_MULTI (rex.reg_match == NULL) #define REG_MULTI (rex.reg_match == NULL)
/* // Create a new extmatch and mark it as referenced once.
* Create a new extmatch and mark it as referenced once.
*/
static reg_extmatch_T *make_extmatch(void) static reg_extmatch_T *make_extmatch(void)
FUNC_ATTR_NONNULL_RET FUNC_ATTR_NONNULL_RET
{ {
@ -1088,9 +1034,7 @@ static reg_extmatch_T *make_extmatch(void)
return em; return em;
} }
/* // Add a reference to an extmatch.
* Add a reference to an extmatch.
*/
reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
{ {
if (em != NULL) { if (em != NULL) {
@ -1099,10 +1043,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
return em; return em;
} }
/* // Remove a reference to an extmatch. If there are no references left, free
* Remove a reference to an extmatch. If there are no references left, free // the info.
* the info.
*/
void unref_extmatch(reg_extmatch_T *em) void unref_extmatch(reg_extmatch_T *em)
{ {
int i; int i;
@ -1201,10 +1143,8 @@ static bool reg_match_visual(void)
return true; return true;
} }
/* // Check the regexp program for its magic number.
* Check the regexp program for its magic number. // Return true if it's wrong.
* Return true if it's wrong.
*/
static int prog_magic_wrong(void) static int prog_magic_wrong(void)
{ {
regprog_T *prog; regprog_T *prog;
@ -1222,11 +1162,9 @@ static int prog_magic_wrong(void)
return false; return false;
} }
/* // Cleanup the subexpressions, if this wasn't done yet.
* Cleanup the subexpressions, if this wasn't done yet. // This construction is used to clear the subexpressions only when they are
* This construction is used to clear the subexpressions only when they are // used (to increase speed).
* used (to increase speed).
*/
static void cleanup_subexpr(void) static void cleanup_subexpr(void)
{ {
if (rex.need_clear_subexpr) { if (rex.need_clear_subexpr) {
@ -1265,12 +1203,10 @@ static void reg_nextline(void)
fast_breakcheck(); fast_breakcheck();
} }
/* // Check whether a backreference matches.
* Check whether a backreference matches. // Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
* Returns RA_FAIL, RA_NOMATCH or RA_MATCH. // If "bytelen" is not NULL, it is set to the byte length of the match in the
* If "bytelen" is not NULL, it is set to the byte length of the match in the // last line.
* last line.
*/
static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum,
colnr_T end_col, int *bytelen) colnr_T end_col, int *bytelen)
{ {
@ -1449,9 +1385,9 @@ static int cstrncmp(char *s1, char *s2, int *n)
c1 = mb_ptr2char_adv((const char_u **)&str1); c1 = mb_ptr2char_adv((const char_u **)&str1);
c2 = mb_ptr2char_adv((const char_u **)&str2); c2 = mb_ptr2char_adv((const char_u **)&str2);
/* decompose the character if necessary, into 'base' characters // decompose the character if necessary, into 'base' characters
* because I don't care about Arabic, I will hard-code the Hebrew // because I don't care about Arabic, I will hard-code the Hebrew
* which I *do* care about! So sue me... */ // which I *do* care about! So sue me...
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) { if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
// decomposition necessary? // decomposition necessary?
mb_decompose(c1, &c11, &junk, &junk); mb_decompose(c1, &c11, &junk, &junk);
@ -1566,7 +1502,7 @@ char *regtilde(char *source, int magic, bool preview)
int len; int len;
int prevlen; int prevlen;
for (p = newsub; *p; ++p) { for (p = newsub; *p; p++) {
if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) {
if (reg_prev_sub != NULL) { if (reg_prev_sub != NULL) {
// length = len(newsub) - 1 + len(prev_sub) + 1 // length = len(newsub) - 1 + len(prev_sub) + 1
@ -1871,12 +1807,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
*s = CAR; *s = CAR;
} else if (*s == '\\' && s[1] != NUL) { } else if (*s == '\\' && s[1] != NUL) {
s++; s++;
/* Change NL to CR here too, so that this works: // Change NL to CR here too, so that this works:
* :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: // :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
* abc\ // abc{backslash}
* def // def
* Not when called from vim_regexec_nl(). // Not when called from vim_regexec_nl().
*/
if (*s == NL && !rsm.sm_line_lbr) { if (*s == NL && !rsm.sm_line_lbr) {
*s = CAR; *s = CAR;
} }
@ -2172,10 +2107,8 @@ char *reg_submatch(int no)
if (rsm.sm_match == NULL) { if (rsm.sm_match == NULL) {
ssize_t len; ssize_t len;
/* // First round: compute the length and allocate memory.
* First round: compute the length and allocate memory. // Second round: copy the text.
* Second round: copy the text.
*/
for (round = 1; round <= 2; round++) { for (round = 1; round <= 2; round++) {
lnum = rsm.sm_mmatch->startpos[no].lnum; lnum = rsm.sm_mmatch->startpos[no].lnum;
if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) { if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) {
@ -2216,7 +2149,7 @@ char *reg_submatch(int no)
len++; len++;
} }
if (round == 2) { if (round == 2) {
STRNCPY(retval + len, reg_getline_submatch(lnum), STRNCPY(retval + len, reg_getline_submatch(lnum), // NOLINT(runtime/printf)
rsm.sm_mmatch->endpos[no].col); rsm.sm_mmatch->endpos[no].col);
} }
len += rsm.sm_mmatch->endpos[no].col; len += rsm.sm_mmatch->endpos[no].col;
@ -2327,12 +2260,10 @@ static char_u regname[][30] = {
}; };
#endif #endif
/* // Compile a regular expression into internal code.
* Compile a regular expression into internal code. // Returns the program in allocated memory.
* Returns the program in allocated memory. // Use vim_regfree() to free the memory.
* Use vim_regfree() to free the memory. // Returns NULL for an error.
* Returns NULL for an error.
*/
regprog_T *vim_regcomp(char *expr_arg, int re_flags) regprog_T *vim_regcomp(char *expr_arg, int re_flags)
{ {
regprog_T *prog = NULL; regprog_T *prog = NULL;
@ -2413,9 +2344,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
return prog; return prog;
} }
/* // Free a compiled regexp program, returned by vim_regcomp().
* Free a compiled regexp program, returned by vim_regcomp().
*/
void vim_regfree(regprog_T *prog) void vim_regfree(regprog_T *prog)
{ {
if (prog != NULL) { if (prog != NULL) {

View File

@ -1,137 +1,130 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check // This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/* // Backtracking regular expression implementation.
* //
* Backtracking regular expression implementation. // This file is included in "regexp.c".
* //
* This file is included in "regexp.c". // NOTICE:
* //
* NOTICE: // This is NOT the original regular expression code as written by Henry
* // Spencer. This code has been modified specifically for use with the VIM
* This is NOT the original regular expression code as written by Henry // editor, and should not be used separately from Vim. If you want a good
* Spencer. This code has been modified specifically for use with the VIM // regular expression library, get the original code. The copyright notice
* editor, and should not be used separately from Vim. If you want a good // that follows is from the original.
* regular expression library, get the original code. The copyright notice //
* that follows is from the original. // END NOTICE
* //
* END NOTICE // Copyright (c) 1986 by University of Toronto.
* // Written by Henry Spencer. Not derived from licensed software.
* Copyright (c) 1986 by University of Toronto. //
* Written by Henry Spencer. Not derived from licensed software. // Permission is granted to anyone to use this software for any
* // purpose on any computer system, and to redistribute it freely,
* Permission is granted to anyone to use this software for any // subject to the following restrictions:
* purpose on any computer system, and to redistribute it freely, //
* subject to the following restrictions: // 1. The author is not responsible for the consequences of use of
* // this software, no matter how awful, even if they arise
* 1. The author is not responsible for the consequences of use of // from defects in it.
* this software, no matter how awful, even if they arise //
* from defects in it. // 2. The origin of this software must not be misrepresented, either
* // by explicit claim or by omission.
* 2. The origin of this software must not be misrepresented, either //
* by explicit claim or by omission. // 3. Altered versions must be plainly marked as such, and must not
* // be misrepresented as being the original software.
* 3. Altered versions must be plainly marked as such, and must not //
* be misrepresented as being the original software. // Beware that some of this code is subtly aware of the way operator
* // precedence is structured in regular expressions. Serious changes in
* Beware that some of this code is subtly aware of the way operator // regular-expression syntax might require a total rethink.
* precedence is structured in regular expressions. Serious changes in //
* regular-expression syntax might require a total rethink. // Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
* // Webb, Ciaran McCreesh and Bram Moolenaar.
* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert // Named character class support added by Walter Briscoe (1998 Jul 01)
* Webb, Ciaran McCreesh and Bram Moolenaar.
* Named character class support added by Walter Briscoe (1998 Jul 01)
*/
/* // The "internal use only" fields in regexp_defs.h are present to pass info from
* The "internal use only" fields in regexp_defs.h are present to pass info from // compile to execute that permits the execute phase to run lots faster on
* compile to execute that permits the execute phase to run lots faster on // simple cases. They are:
* simple cases. They are: //
* // regstart char that must begin a match; NUL if none obvious; Can be a
* regstart char that must begin a match; NUL if none obvious; Can be a // multi-byte character.
* multi-byte character. // reganch is the match anchored (at beginning-of-line only)?
* reganch is the match anchored (at beginning-of-line only)? // regmust string (pointer into program) that match must include, or NULL
* regmust string (pointer into program) that match must include, or NULL // regmlen length of regmust string
* regmlen length of regmust string // regflags RF_ values or'ed together
* regflags RF_ values or'ed together //
* // Regstart and reganch permit very fast decisions on suitable starting points
* Regstart and reganch permit very fast decisions on suitable starting points // for a match, cutting down the work a lot. Regmust permits fast rejection
* for a match, cutting down the work a lot. Regmust permits fast rejection // of lines that cannot possibly match. The regmust tests are costly enough
* of lines that cannot possibly match. The regmust tests are costly enough // that vim_regcomp() supplies a regmust only if the r.e. contains something
* that vim_regcomp() supplies a regmust only if the r.e. contains something // potentially expensive (at present, the only such thing detected is * or +
* potentially expensive (at present, the only such thing detected is * or + // at the start of the r.e., which can involve a lot of backup). Regmlen is
* at the start of the r.e., which can involve a lot of backup). Regmlen is // supplied because the test in vim_regexec() needs it and vim_regcomp() is
* supplied because the test in vim_regexec() needs it and vim_regcomp() is // computing it anyway.
* computing it anyway.
*/
/* // Structure for regexp "program". This is essentially a linear encoding
* Structure for regexp "program". This is essentially a linear encoding // of a nondeterministic finite-state machine (aka syntax charts or
* of a nondeterministic finite-state machine (aka syntax charts or // "railroad normal form" in parsing technology). Each node is an opcode
* "railroad normal form" in parsing technology). Each node is an opcode // plus a "next" pointer, possibly plus an operand. "Next" pointers of
* plus a "next" pointer, possibly plus an operand. "Next" pointers of // all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" // pointer with a BRANCH on both ends of it is connecting two alternatives.
* pointer with a BRANCH on both ends of it is connecting two alternatives. // (Here we have one of the subtle syntax dependencies: an individual BRANCH
* (Here we have one of the subtle syntax dependencies: an individual BRANCH // (as opposed to a collection of them) is never concatenated with anything
* (as opposed to a collection of them) is never concatenated with anything // because of operator precedence). The "next" pointer of a BRACES_COMPLEX
* because of operator precedence). The "next" pointer of a BRACES_COMPLEX // node points to the node after the stuff to be repeated.
* node points to the node after the stuff to be repeated. // The operand of some types of node is a literal string; for others, it is a
* The operand of some types of node is a literal string; for others, it is a // node leading into a sub-FSM. In particular, the operand of a BRANCH node
* node leading into a sub-FSM. In particular, the operand of a BRANCH node // is the first node of the branch.
* is the first node of the branch. // (NB this is *not* a tree structure: the tail of the branch connects to the
* (NB this is *not* a tree structure: the tail of the branch connects to the // thing following the set of BRANCHes.)
* thing following the set of BRANCHes.) //
* // pattern is coded like:
* pattern is coded like: //
* // +-----------------+
* +-----------------+ // | V
* | V // <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
* <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END // | ^ | ^
* | ^ | ^ // +------+ +----------+
* +------+ +----------+ //
* //
* // +------------------+
* +------------------+ // V |
* V | // <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END // | | ^ ^
* | | ^ ^ // | +---------------+ |
* | +---------------+ | // +---------------------------------------------+
* +---------------------------------------------+ //
* //
* // +----------------------+
* +----------------------+ // V |
* V | // <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END // | | ^ ^
* | | ^ ^ // | +-----------+ |
* | +-----------+ | // +--------------------------------------------------+
* +--------------------------------------------------+ //
* //
* // +-------------------------+
* +-------------------------+ // V |
* V | // <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END // | | ^
* | | ^ // | +----------------+
* | +----------------+ // +-----------------------------------------------+
* +-----------------------------------------------+ //
* //
* // <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END // | | ^ ^
* | | ^ ^ // | +----------------+ |
* | +----------------+ | // +--------------------------------+
* +--------------------------------+ //
* // +---------+
* +---------+ // | V
* | V // \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END // | | | | ^ ^
* | | | | ^ ^ // | | | +-----+ |
* | | | +-----+ | // | | +----------------+ |
* | | +----------------+ | // | +---------------------------+ |
* | +---------------------------+ | // +------------------------------------------------------+
* +------------------------------------------------------+ //
* // They all start with a BRANCH for "\|" alternatives, even when there is only
* They all start with a BRANCH for "\|" alternatives, even when there is only // one alternative.
* one alternative.
*/
#include <assert.h> #include <assert.h>
#include <inttypes.h> #include <inttypes.h>
@ -141,9 +134,7 @@
#include "nvim/garray.h" #include "nvim/garray.h"
#include "nvim/regexp.h" #include "nvim/regexp.h"
/* // The opcodes are:
* The opcodes are:
*/
// definition number opnd? meaning // definition number opnd? meaning
#define END 0 // End of program or NOMATCH operand. #define END 0 // End of program or NOMATCH operand.
@ -240,9 +231,7 @@
#define RE_VISUAL 208 // Match Visual area #define RE_VISUAL 208 // Match Visual area
#define RE_COMPOSING 209 // any composing characters #define RE_COMPOSING 209 // any composing characters
/* // Flags to be passed up and down.
* Flags to be passed up and down.
*/
#define HASWIDTH 0x1 // Known never to match null string. #define HASWIDTH 0x1 // Known never to match null string.
#define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand. #define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand.
#define SPSTART 0x4 // Starts with * or +. #define SPSTART 0x4 // Starts with * or +.
@ -273,10 +262,8 @@ static int classcodes[] = {
UPPER, NUPPER UPPER, NUPPER
}; };
/* // When regcode is set to this value, code is not emitted and size is computed
* When regcode is set to this value, code is not emitted and size is computed // instead.
* instead.
*/
#define JUST_CALC_SIZE ((char_u *)-1) #define JUST_CALC_SIZE ((char_u *)-1)
// Values for rs_state in regitem_T. // Values for rs_state in regitem_T.
@ -297,11 +284,9 @@ typedef enum regstate_E {
RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match
} regstate_T; } regstate_T;
/* // Structure used to save the current input state, when it needs to be
* Structure used to save the current input state, when it needs to be // restored after trying a match. Used by reg_save() and reg_restore().
* restored after trying a match. Used by reg_save() and reg_restore(). // Also stores the length of "backpos".
* Also stores the length of "backpos".
*/
typedef struct { typedef struct {
union { union {
char_u *ptr; // rex.input pointer, for single-line regexp char_u *ptr; // rex.input pointer, for single-line regexp
@ -327,12 +312,10 @@ typedef struct regbehind_S {
save_se_T save_end[NSUBEXP]; save_se_T save_end[NSUBEXP];
} regbehind_T; } regbehind_T;
/* // When there are alternatives a regstate_T is put on the regstack to remember
* When there are alternatives a regstate_T is put on the regstack to remember // what we are doing.
* what we are doing. // Before it may be another type of item, depending on rs_state, to remember
* Before it may be another type of item, depending on rs_state, to remember // more things.
* more things.
*/
typedef struct regitem_S { typedef struct regitem_S {
regstate_T rs_state; // what we are doing, one of RS_ above regstate_T rs_state; // what we are doing, one of RS_ above
int16_t rs_no; // submatch nr or BEHIND/NOBEHIND int16_t rs_no; // submatch nr or BEHIND/NOBEHIND
@ -359,69 +342,63 @@ typedef struct backpos_S {
regsave_T bp_pos; // last input position regsave_T bp_pos; // last input position
} backpos_T; } backpos_T;
/* // "regstack" and "backpos" are used by regmatch(). They are kept over calls
* "regstack" and "backpos" are used by regmatch(). They are kept over calls // to avoid invoking malloc() and free() often.
* to avoid invoking malloc() and free() often. // "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
* "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T // or regbehind_T.
* or regbehind_T. // "backpos_T" is a table with backpos_T for BACK
* "backpos_T" is a table with backpos_T for BACK
*/
static garray_T regstack = GA_EMPTY_INIT_VALUE; static garray_T regstack = GA_EMPTY_INIT_VALUE;
static garray_T backpos = GA_EMPTY_INIT_VALUE; static garray_T backpos = GA_EMPTY_INIT_VALUE;
static regsave_T behind_pos; static regsave_T behind_pos;
/* // Both for regstack and backpos tables we use the following strategy of
* Both for regstack and backpos tables we use the following strategy of // allocation (to reduce malloc/free calls):
* allocation (to reduce malloc/free calls): // - Initial size is fairly small.
* - Initial size is fairly small. // - When needed, the tables are grown bigger (8 times at first, double after
* - When needed, the tables are grown bigger (8 times at first, double after // that).
* that). // - After executing the match we free the memory only if the array has grown.
* - After executing the match we free the memory only if the array has grown. // Thus the memory is kept allocated when it's at the initial size.
* Thus the memory is kept allocated when it's at the initial size. // This makes it fast while not keeping a lot of memory allocated.
* This makes it fast while not keeping a lot of memory allocated. // A three times speed increase was observed when using many simple patterns.
* A three times speed increase was observed when using many simple patterns.
*/
#define REGSTACK_INITIAL 2048 #define REGSTACK_INITIAL 2048
#define BACKPOS_INITIAL 64 #define BACKPOS_INITIAL 64
/* // Opcode notes:
* Opcode notes: //
* // BRANCH The set of branches constituting a single choice are hooked
* BRANCH The set of branches constituting a single choice are hooked // together with their "next" pointers, since precedence prevents
* together with their "next" pointers, since precedence prevents // anything being concatenated to any individual branch. The
* anything being concatenated to any individual branch. The // "next" pointer of the last BRANCH in a choice points to the
* "next" pointer of the last BRANCH in a choice points to the // thing following the whole choice. This is also where the
* thing following the whole choice. This is also where the // final "next" pointer of each individual branch points; each
* final "next" pointer of each individual branch points; each // branch starts with the operand node of a BRANCH node.
* branch starts with the operand node of a BRANCH node. //
* // BACK Normal "next" pointers all implicitly point forward; BACK
* BACK Normal "next" pointers all implicitly point forward; BACK // exists to make loop structures possible.
* exists to make loop structures possible. //
* // STAR,PLUS '=', and complex '*' and '+', are implemented as circular
* STAR,PLUS '=', and complex '*' and '+', are implemented as circular // BRANCH structures using BACK. Simple cases (one character
* BRANCH structures using BACK. Simple cases (one character // per match) are implemented with STAR and PLUS for speed
* per match) are implemented with STAR and PLUS for speed // and to minimize recursive plunges.
* and to minimize recursive plunges. //
* // BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX // node, and defines the min and max limits to be used for that
* node, and defines the min and max limits to be used for that // node.
* node. //
* // MOPEN,MCLOSE ...are numbered at compile time.
* MOPEN,MCLOSE ...are numbered at compile time. // ZOPEN,ZCLOSE ...ditto
* ZOPEN,ZCLOSE ...ditto ///
*/ //
//
/* // A node is one char of opcode followed by two chars of "next" pointer.
* A node is one char of opcode followed by two chars of "next" pointer. // "Next" pointers are stored as two 8-bit bytes, high order first. The
* "Next" pointers are stored as two 8-bit bytes, high order first. The // value is a positive offset from the opcode of the node containing it.
* value is a positive offset from the opcode of the node containing it. // An operand, if any, simply follows the node. (Note that much of the
* An operand, if any, simply follows the node. (Note that much of the // code generation knows about this implicit relationship.)
* code generation knows about this implicit relationship.) //
* // Using two bytes for the "next" pointer is vast overkill for most things,
* Using two bytes for the "next" pointer is vast overkill for most things, // but allows patterns to get big without disasters.
* but allows patterns to get big without disasters.
*/
#define OP(p) ((int)(*(p))) #define OP(p) ((int)(*(p)))
#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) #define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
#define OPERAND(p) ((p) + 3) #define OPERAND(p) ((p) + 3)
@ -449,9 +426,7 @@ static int regnarrate = 0;
# include "regexp_bt.c.generated.h" # include "regexp_bt.c.generated.h"
#endif #endif
/* // Setup to parse the regexp. Used once to get the length and once to do it.
* Setup to parse the regexp. Used once to get the length and once to do it.
*/
static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp() static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp()
{ {
initchr(expr); initchr(expr);
@ -484,9 +459,7 @@ static bool use_multibytecode(int c)
|| utf_iscomposing(c)); || utf_iscomposing(c));
} }
/* // Emit (if appropriate) a byte of code
* Emit (if appropriate) a byte of code
*/
static void regc(int b) static void regc(int b)
{ {
if (regcode == JUST_CALC_SIZE) { if (regcode == JUST_CALC_SIZE) {
@ -496,9 +469,7 @@ static void regc(int b)
} }
} }
/* // Emit (if appropriate) a multi-byte character of code
* Emit (if appropriate) a multi-byte character of code
*/
static void regmbc(int c) static void regmbc(int c)
{ {
if (regcode == JUST_CALC_SIZE) { if (regcode == JUST_CALC_SIZE) {
@ -508,11 +479,9 @@ static void regmbc(int c)
} }
} }
/* // Produce the bytes for equivalence class "c".
* Produce the bytes for equivalence class "c". // Currently only handles latin1, latin9 and utf-8.
* Currently only handles latin1, latin9 and utf-8. // NOTE: When changing this function, also change nfa_emit_equi_class()
* NOTE: When changing this function, also change nfa_emit_equi_class()
*/
static void reg_equi_class(int c) static void reg_equi_class(int c)
{ {
{ {
@ -1481,10 +1450,8 @@ static void reg_equi_class(int c)
regmbc(c); regmbc(c);
} }
/* // Emit a node.
* Emit a node. // Return pointer to generated code.
* Return pointer to generated code.
*/
static char_u *regnode(int op) static char_u *regnode(int op)
{ {
char_u *ret; char_u *ret;
@ -1500,9 +1467,7 @@ static char_u *regnode(int op)
return ret; return ret;
} }
/* // Write a four bytes number at "p" and return pointer to the next char.
* Write a four bytes number at "p" and return pointer to the next char.
*/
static char_u *re_put_uint32(char_u *p, uint32_t val) static char_u *re_put_uint32(char_u *p, uint32_t val)
{ {
*p++ = (char_u)((val >> 24) & 0377); *p++ = (char_u)((val >> 24) & 0377);
@ -1512,11 +1477,9 @@ static char_u *re_put_uint32(char_u *p, uint32_t val)
return p; return p;
} }
/* // regnext - dig the "next" pointer out of a node
* regnext - dig the "next" pointer out of a node // Returns NULL when calculating size, when there is no next item and when
* Returns NULL when calculating size, when there is no next item and when // there is an error.
* there is an error.
*/
static char_u *regnext(char_u *p) static char_u *regnext(char_u *p)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_ALL
{ {
@ -1573,9 +1536,7 @@ static void regtail(char_u *p, char_u *val)
} }
} }
/* // Like regtail, on item after a BRANCH; nop if none.
* Like regtail, on item after a BRANCH; nop if none.
*/
static void regoptail(char_u *p, char_u *val) static void regoptail(char_u *p, char_u *val)
{ {
// When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless"
@ -1587,11 +1548,9 @@ static void regoptail(char_u *p, char_u *val)
regtail(OPERAND(p), val); regtail(OPERAND(p), val);
} }
/* // Insert an operator in front of already-emitted operand
* Insert an operator in front of already-emitted operand //
* // Means relocating the operand.
* Means relocating the operand.
*/
static void reginsert(int op, char_u *opnd) static void reginsert(int op, char_u *opnd)
{ {
char_u *src; char_u *src;
@ -1615,10 +1574,8 @@ static void reginsert(int op, char_u *opnd)
*place = NUL; *place = NUL;
} }
/* // Insert an operator in front of already-emitted operand.
* Insert an operator in front of already-emitted operand. // Add a number to the operator.
* Add a number to the operator.
*/
static void reginsert_nr(int op, long val, char_u *opnd) static void reginsert_nr(int op, long val, char_u *opnd)
{ {
char_u *src; char_u *src;
@ -1644,12 +1601,10 @@ static void reginsert_nr(int op, long val, char_u *opnd)
re_put_uint32(place, (uint32_t)val); re_put_uint32(place, (uint32_t)val);
} }
/* // Insert an operator in front of already-emitted operand.
* Insert an operator in front of already-emitted operand. // The operator has the given limit values as operands. Also set next pointer.
* The operator has the given limit values as operands. Also set next pointer. //
* // Means relocating the operand.
* Means relocating the operand.
*/
static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) static void reginsert_limits(int op, long minval, long maxval, char_u *opnd)
{ {
char_u *src; char_u *src;
@ -1704,13 +1659,11 @@ static int seen_endbrace(int refnum)
return true; return true;
} }
/* // Parse the lowest level.
* Parse the lowest level. //
* // Optimization: gobbles an entire sequence of ordinary characters so that
* Optimization: gobbles an entire sequence of ordinary characters so that // it can turn them into a single node, which is smaller to store and
* it can turn them into a single node, which is smaller to store and // faster to run. Don't do this when one_exactly is set.
* faster to run. Don't do this when one_exactly is set.
*/
static char_u *regatom(int *flagp) static char_u *regatom(int *flagp)
{ {
char_u *ret; char_u *ret;
@ -2289,8 +2242,7 @@ collection:
if (c_class != 0) { if (c_class != 0) {
// produce equivalence class // produce equivalence class
reg_equi_class(c_class); reg_equi_class(c_class);
} else if ((c_class = } else if ((c_class = get_coll_element(&regparse)) != 0) {
get_coll_element(&regparse)) != 0) {
// produce a collating element // produce a collating element
regmbc(c_class); regmbc(c_class);
} else { } else {
@ -2466,7 +2418,7 @@ do_multibyte:
for (len = 0; c != NUL && (len == 0 for (len = 0; c != NUL && (len == 0
|| (re_multi_type(peekchr()) == NOT_MULTI || (re_multi_type(peekchr()) == NOT_MULTI
&& !one_exactly && !one_exactly
&& !is_Magic(c))); ++len) { && !is_Magic(c))); len++) {
c = no_Magic(c); c = no_Magic(c);
{ {
regmbc(c); regmbc(c);
@ -2500,15 +2452,13 @@ do_multibyte:
return ret; return ret;
} }
/* // Parse something followed by possible [*+=].
* Parse something followed by possible [*+=]. //
* // Note that the branching code sequences used for = and the general cases
* Note that the branching code sequences used for = and the general cases // of * and + are somewhat optimized: they use the same NOTHING node as
* of * and + are somewhat optimized: they use the same NOTHING node as // both the endmarker for their branch list and the body of the last branch.
* both the endmarker for their branch list and the body of the last branch. // It might seem that this node could be dispensed with entirely, but the
* It might seem that this node could be dispensed with entirely, but the // endmarker role is not redundant.
* endmarker role is not redundant.
*/
static char_u *regpiece(int *flagp) static char_u *regpiece(int *flagp)
{ {
char_u *ret; char_u *ret;
@ -2644,10 +2594,8 @@ static char_u *regpiece(int *flagp)
return ret; return ret;
} }
/* // Parse one alternative of an | or & operator.
* Parse one alternative of an | or & operator. // Implements the concatenation operator.
* Implements the concatenation operator.
*/
static char_u *regconcat(int *flagp) static char_u *regconcat(int *flagp)
{ {
char_u *first = NULL; char_u *first = NULL;
@ -2722,10 +2670,8 @@ static char_u *regconcat(int *flagp)
return first; return first;
} }
/* // Parse one alternative of an | operator.
* Parse one alternative of an | operator. // Implements the & operator.
* Implements the & operator.
*/
static char_u *regbranch(int *flagp) static char_u *regbranch(int *flagp)
{ {
char_u *ret; char_u *ret;
@ -2874,27 +2820,25 @@ static char_u *reg(int paren, int *flagp)
return ret; return ret;
} }
/* // bt_regcomp() - compile a regular expression into internal code for the
* bt_regcomp() - compile a regular expression into internal code for the // traditional back track matcher.
* traditional back track matcher. // Returns the program in allocated space. Returns NULL for an error.
* Returns the program in allocated space. Returns NULL for an error. //
* // We can't allocate space until we know how big the compiled form will be,
* We can't allocate space until we know how big the compiled form will be, // but we can't compile it (and thus know how big it is) until we've got a
* but we can't compile it (and thus know how big it is) until we've got a // place to put the code. So we cheat: we compile it twice, once with code
* place to put the code. So we cheat: we compile it twice, once with code // generation turned off and size counting turned on, and once "for real".
* generation turned off and size counting turned on, and once "for real". // This also means that we don't allocate space until we are sure that the
* This also means that we don't allocate space until we are sure that the // thing really will compile successfully, and we never have to move the
* thing really will compile successfully, and we never have to move the // code and thus invalidate pointers into it. (Note that it has to be in
* code and thus invalidate pointers into it. (Note that it has to be in // one piece because free() must be able to free it all.)
* one piece because free() must be able to free it all.) //
* // Whether upper/lower case is to be ignored is decided when executing the
* Whether upper/lower case is to be ignored is decided when executing the // program, it does not matter here.
* program, it does not matter here. //
* // Beware that the optimization-preparation code in here knows about some
* Beware that the optimization-preparation code in here knows about some // of the structure of the compiled regexp.
* of the structure of the compiled regexp. // "re_flags": RE_MAGIC and/or RE_STRING.
* "re_flags": RE_MAGIC and/or RE_STRING.
*/
static regprog_T *bt_regcomp(char_u *expr, int re_flags) static regprog_T *bt_regcomp(char_u *expr, int re_flags)
{ {
char_u *scan; char_u *scan;
@ -2999,19 +2943,15 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)
return (regprog_T *)r; return (regprog_T *)r;
} }
/* // Check if during the previous call to vim_regcomp the EOL item "$" has been
* Check if during the previous call to vim_regcomp the EOL item "$" has been // found. This is messy, but it works fine.
* found. This is messy, but it works fine.
*/
int vim_regcomp_had_eol(void) int vim_regcomp_had_eol(void)
{ {
return had_eol; return had_eol;
} }
/* // Get a number after a backslash that is inside [].
* Get a number after a backslash that is inside []. // When nothing is recognized return a backslash.
* When nothing is recognized return a backslash.
*/
static int coll_get_char(void) static int coll_get_char(void)
{ {
int64_t nr = -1; int64_t nr = -1;
@ -3037,9 +2977,7 @@ static int coll_get_char(void)
return (int)nr; return (int)nr;
} }
/* // Free a compiled regexp program, returned by bt_regcomp().
* Free a compiled regexp program, returned by bt_regcomp().
*/
static void bt_regfree(regprog_T *prog) static void bt_regfree(regprog_T *prog)
{ {
xfree(prog); xfree(prog);
@ -3047,11 +2985,9 @@ static void bt_regfree(regprog_T *prog)
#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) #define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
/* // The arguments from BRACE_LIMITS are stored here. They are actually local
* The arguments from BRACE_LIMITS are stored here. They are actually local // to regmatch(), but they are here to reduce the amount of stack space used
* to regmatch(), but they are here to reduce the amount of stack space used // (it can be called recursively many times).
* (it can be called recursively many times).
*/
static long bl_minval; static long bl_minval;
static long bl_maxval; static long bl_maxval;
@ -3108,13 +3044,11 @@ static bool reg_save_equal(const regsave_T *save)
else /* NOLINT */ \ else /* NOLINT */ \
*(pp) = (savep)->se_u.ptr; } *(pp) = (savep)->se_u.ptr; }
/* // Tentatively set the sub-expression start to the current position (after
* Tentatively set the sub-expression start to the current position (after // calling regmatch() they will have changed). Need to save the existing
* calling regmatch() they will have changed). Need to save the existing // values for when there is no match.
* values for when there is no match. // Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), // depending on REG_MULTI.
* depending on REG_MULTI.
*/
static void save_se_multi(save_se_T *savep, lpos_T *posp) static void save_se_multi(save_se_T *savep, lpos_T *posp)
{ {
savep->se_u.pos = *posp; savep->se_u.pos = *posp;
@ -3494,10 +3428,8 @@ do_class:
return (int)count; return (int)count;
} }
/* // Push an item onto the regstack.
* Push an item onto the regstack. // Returns pointer to new item. Returns NULL when out of memory.
* Returns pointer to new item. Returns NULL when out of memory.
*/
static regitem_T *regstack_push(regstate_T state, char_u *scan) static regitem_T *regstack_push(regstate_T state, char_u *scan)
{ {
regitem_T *rp; regitem_T *rp;
@ -3516,9 +3448,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan)
return rp; return rp;
} }
/* // Pop an item from the regstack.
* Pop an item from the regstack.
*/
static void regstack_pop(char_u **scan) static void regstack_pop(char_u **scan)
{ {
regitem_T *rp; regitem_T *rp;
@ -4643,7 +4573,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
// Pop the state. Restore pointers when there is no match. // Pop the state. Restore pointers when there is no match.
if (status == RA_NOMATCH) { if (status == RA_NOMATCH) {
reg_restore(&rp->rs_un.regsave, &backpos); reg_restore(&rp->rs_un.regsave, &backpos);
--brace_count[rp->rs_no]; // decrement match count brace_count[rp->rs_no]--; // decrement match count
} }
regstack_pop(&scan); regstack_pop(&scan);
break; break;
@ -4653,7 +4583,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
if (status == RA_NOMATCH) { if (status == RA_NOMATCH) {
// There was no match, but we did find enough matches. // There was no match, but we did find enough matches.
reg_restore(&rp->rs_un.regsave, &backpos); reg_restore(&rp->rs_un.regsave, &backpos);
--brace_count[rp->rs_no]; brace_count[rp->rs_no]--;
// continue with the items after "\{}" // continue with the items after "\{}"
status = RA_CONT; status = RA_CONT;
} }
@ -5247,9 +5177,7 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T
return bt_regexec_both(NULL, col, tm, timed_out); return bt_regexec_both(NULL, col, tm, timed_out);
} }
/* // Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
*/
static int re_num_cmp(uint32_t val, char_u *scan) static int re_num_cmp(uint32_t val, char_u *scan)
{ {
uint32_t n = (uint32_t)OPERAND_MIN(scan); uint32_t n = (uint32_t)OPERAND_MIN(scan);
@ -5265,9 +5193,7 @@ static int re_num_cmp(uint32_t val, char_u *scan)
#ifdef BT_REGEXP_DUMP #ifdef BT_REGEXP_DUMP
/* // regdump - dump a regexp onto stdout in vaguely comprehensible form
* regdump - dump a regexp onto stdout in vaguely comprehensible form
*/
static void regdump(char_u *pattern, bt_regprog_T *r) static void regdump(char_u *pattern, bt_regprog_T *r)
{ {
char_u *s; char_u *s;
@ -5353,9 +5279,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r)
#ifdef REGEXP_DEBUG #ifdef REGEXP_DEBUG
/* // regprop - printable representation of opcode
* regprop - printable representation of opcode
*/
static char_u *regprop(char_u *op) static char_u *regprop(char_u *op)
{ {
char *p; char *p;

View File

@ -1,13 +1,11 @@
/* // NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE //
* // This is NOT the original regular expression code as written by Henry
* This is NOT the original regular expression code as written by Henry // Spencer. This code has been modified specifically for use with Vim, and
* Spencer. This code has been modified specifically for use with Vim, and // should not be used apart from compiling Vim. If you want a good regular
* should not be used apart from compiling Vim. If you want a good regular // expression library, get the original code.
* expression library, get the original code. //
* // NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*/
#ifndef NVIM_REGEXP_DEFS_H #ifndef NVIM_REGEXP_DEFS_H
#define NVIM_REGEXP_DEFS_H #define NVIM_REGEXP_DEFS_H
@ -17,18 +15,14 @@
#include "nvim/pos.h" #include "nvim/pos.h"
#include "nvim/types.h" #include "nvim/types.h"
/* // The number of sub-matches is limited to 10.
* The number of sub-matches is limited to 10. // The first one (index 0) is the whole match, referenced with "\0".
* The first one (index 0) is the whole match, referenced with "\0". // The second one (index 1) is the first sub-match, referenced with "\1".
* The second one (index 1) is the first sub-match, referenced with "\1". // This goes up to the tenth (index 9), referenced with "\9".
* This goes up to the tenth (index 9), referenced with "\9".
*/
#define NSUBEXP 10 #define NSUBEXP 10
/* // In the NFA engine: how many braces are allowed.
* In the NFA engine: how many braces are allowed. // TODO(RE): Use dynamic memory allocation instead of static, like here
* TODO(RE): Use dynamic memory allocation instead of static, like here
*/
#define NFA_MAX_BRACES 20 #define NFA_MAX_BRACES 20
// In the NFA engine: how many states are allowed. // In the NFA engine: how many states are allowed.
@ -61,11 +55,9 @@ typedef struct {
#include "nvim/buffer_defs.h" #include "nvim/buffer_defs.h"
/* // Structure returned by vim_regcomp() to pass on to vim_regexec().
* Structure returned by vim_regcomp() to pass on to vim_regexec(). // This is the general structure. For the actual matcher, two specific
* This is the general structure. For the actual matcher, two specific // structures are used. See code below.
* structures are used. See code below.
*/
struct regprog { struct regprog {
regengine_T *engine; regengine_T *engine;
unsigned regflags; unsigned regflags;
@ -74,11 +66,9 @@ struct regprog {
bool re_in_use; ///< prog is being executed bool re_in_use; ///< prog is being executed
}; };
/* // Structure used by the back track matcher.
* Structure used by the back track matcher. // These fields are only to be used in regexp.c!
* These fields are only to be used in regexp.c! // See regexp.c for an explanation.
* See regexp.c for an explanation.
*/
typedef struct { typedef struct {
// These four members implement regprog_T. // These four members implement regprog_T.
regengine_T *engine; regengine_T *engine;
@ -107,9 +97,7 @@ struct nfa_state {
int val; int val;
}; };
/* // Structure used by the NFA matcher.
* Structure used by the NFA matcher.
*/
typedef struct { typedef struct {
// These four members implement regprog_T. // These four members implement regprog_T.
regengine_T *engine; regengine_T *engine;
@ -133,11 +121,9 @@ typedef struct {
nfa_state_T state[1]; // actually longer.. nfa_state_T state[1]; // actually longer..
} nfa_regprog_T; } nfa_regprog_T;
/* // Structure to be used for single-line matching.
* Structure to be used for single-line matching. // Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
* Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". // When there is no match, the pointer is NULL.
* When there is no match, the pointer is NULL.
*/
typedef struct { typedef struct {
regprog_T *regprog; regprog_T *regprog;
char *startp[NSUBEXP]; char *startp[NSUBEXP];
@ -145,11 +131,9 @@ typedef struct {
bool rm_ic; bool rm_ic;
} regmatch_T; } regmatch_T;
/* // Structure used to store external references: "\z\(\)" to "\z\1".
* Structure used to store external references: "\z\(\)" to "\z\1". // Use a reference count to avoid the need to copy this around. When it goes
* Use a reference count to avoid the need to copy this around. When it goes // from 1 to zero the matches need to be freed.
* from 1 to zero the matches need to be freed.
*/
struct reg_extmatch { struct reg_extmatch {
int16_t refcnt; int16_t refcnt;
char_u *matches[NSUBEXP]; char_u *matches[NSUBEXP];

View File

@ -1,11 +1,9 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check // This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
/* // NFA regular expression implementation.
* NFA regular expression implementation. //
* // This file is included in "regexp.c".
* This file is included in "regexp.c".
*/
#include <assert.h> #include <assert.h>
#include <inttypes.h> #include <inttypes.h>
@ -383,10 +381,8 @@ static void nfa_regcomp_start(char_u *expr, int re_flags)
regcomp_start(expr, re_flags); regcomp_start(expr, re_flags);
} }
/* // Figure out if the NFA state list starts with an anchor, must match at start
* Figure out if the NFA state list starts with an anchor, must match at start // of the line.
* of the line.
*/
static int nfa_get_reganch(nfa_state_T *start, int depth) static int nfa_get_reganch(nfa_state_T *start, int depth)
{ {
nfa_state_T *p = start; nfa_state_T *p = start;
@ -441,10 +437,8 @@ static int nfa_get_reganch(nfa_state_T *start, int depth)
return 0; return 0;
} }
/* // Figure out if the NFA state list starts with a character which must match
* Figure out if the NFA state list starts with a character which must match // at start of the match.
* at start of the match.
*/
static int nfa_get_regstart(nfa_state_T *start, int depth) static int nfa_get_regstart(nfa_state_T *start, int depth)
{ {
nfa_state_T *p = start; nfa_state_T *p = start;
@ -521,11 +515,9 @@ static int nfa_get_regstart(nfa_state_T *start, int depth)
return 0; return 0;
} }
/* // Figure out if the NFA state list contains just literal text and nothing
* Figure out if the NFA state list contains just literal text and nothing // else. If so return a string in allocated memory with what must match after
* else. If so return a string in allocated memory with what must match after // regstart. Otherwise return NULL.
* regstart. Otherwise return NULL.
*/
static char_u *nfa_get_match_text(nfa_state_T *start) static char_u *nfa_get_match_text(nfa_state_T *start)
{ {
nfa_state_T *p = start; nfa_state_T *p = start;
@ -557,10 +549,8 @@ static char_u *nfa_get_match_text(nfa_state_T *start)
return ret; return ret;
} }
/* // Allocate more space for post_start. Called when
* Allocate more space for post_start. Called when // running above the estimated number of states.
* running above the estimated number of states.
*/
static void realloc_post_list(void) static void realloc_post_list(void)
{ {
// For weird patterns the number of states can be very high. Increasing by // For weird patterns the number of states can be very high. Increasing by
@ -572,16 +562,14 @@ static void realloc_post_list(void)
post_start = new_start; post_start = new_start;
} }
/* // Search between "start" and "end" and try to recognize a
* Search between "start" and "end" and try to recognize a // character class in expanded form. For example [0-9].
* character class in expanded form. For example [0-9]. // On success, return the id the character class to be emitted.
* On success, return the id the character class to be emitted. // On failure, return 0 (=FAIL)
* On failure, return 0 (=FAIL) // Start points to the first char of the range, while end should point
* Start points to the first char of the range, while end should point // to the closing brace.
* to the closing brace. // Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
* Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may // need to be interpreted as [a-zA-Z].
* need to be interpreted as [a-zA-Z].
*/
static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
{ {
#define CLASS_not 0x80 #define CLASS_not 0x80
@ -700,14 +688,12 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
return FAIL; return FAIL;
} }
/* // Produce the bytes for equivalence class "c".
* Produce the bytes for equivalence class "c". // Currently only handles latin1, latin9 and utf-8.
* Currently only handles latin1, latin9 and utf-8. // Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
* Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is // equivalent to 'a OR b OR c'
* equivalent to 'a OR b OR c' //
* // NOTE! When changing this function, also update reg_equi_class()
* NOTE! When changing this function, also update reg_equi_class()
*/
static void nfa_emit_equi_class(int c) static void nfa_emit_equi_class(int c)
{ {
#define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT); #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
@ -1778,26 +1764,22 @@ static void nfa_emit_equi_class(int c)
#undef EMIT2 #undef EMIT2
} }
/* // Code to parse regular expression.
* Code to parse regular expression. //
* // We try to reuse parsing functions in regexp.c to
* We try to reuse parsing functions in regexp.c to // minimize surprise and keep the syntax consistent.
* minimize surprise and keep the syntax consistent.
*/
/* // Parse the lowest level.
* Parse the lowest level. //
* // An atom can be one of a long list of items. Many atoms match one character
* An atom can be one of a long list of items. Many atoms match one character // in the text. It is often an ordinary character or a character class.
* in the text. It is often an ordinary character or a character class. // Braces can be used to make a pattern into an atom. The "\z(\)" construct
* Braces can be used to make a pattern into an atom. The "\z(\)" construct // is only for syntax highlighting.
* is only for syntax highlighting. //
* // atom ::= ordinary-atom
* atom ::= ordinary-atom // or \( pattern \)
* or \( pattern \) // or \%( pattern \)
* or \%( pattern \) // or \z( pattern \)
* or \z( pattern \)
*/
static int nfa_regatom(void) static int nfa_regatom(void)
{ {
int c; int c;
@ -1862,9 +1844,7 @@ static int nfa_regatom(void)
// "\_x" is character class plus newline // "\_x" is character class plus newline
FALLTHROUGH; FALLTHROUGH;
/* // Character classes.
* Character classes.
*/
case Magic('.'): case Magic('.'):
case Magic('i'): case Magic('i'):
case Magic('I'): case Magic('I'):
@ -2228,24 +2208,20 @@ static int nfa_regatom(void)
case Magic('['): case Magic('['):
collection: collection:
/* // [abc] uses NFA_START_COLL - NFA_END_COLL
* [abc] uses NFA_START_COLL - NFA_END_COLL // [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
* [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL // Each character is produced as a regular state, using
* Each character is produced as a regular state, using // NFA_CONCAT to bind them together.
* NFA_CONCAT to bind them together. // Besides normal characters there can be:
* Besides normal characters there can be: // - character classes NFA_CLASS_*
* - character classes NFA_CLASS_* // - ranges, two characters followed by NFA_RANGE.
* - ranges, two characters followed by NFA_RANGE.
*/
p = (char_u *)regparse; p = (char_u *)regparse;
endp = skip_anyof((char *)p); endp = skip_anyof((char *)p);
if (*endp == ']') { if (*endp == ']') {
/* // Try to reverse engineer character classes. For example,
* Try to reverse engineer character classes. For example, // recognize that [0-9] stands for \d and [A-Za-z_] for \h,
* recognize that [0-9] stands for \d and [A-Za-z_] for \h, // and perform the necessary substitutions in the NFA.
* and perform the necessary substitutions in the NFA.
*/
int result = nfa_recognize_char_class((char_u *)regparse, endp, extra == NFA_ADD_NL); int result = nfa_recognize_char_class((char_u *)regparse, endp, extra == NFA_ADD_NL);
if (result != FAIL) { if (result != FAIL) {
if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) { if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) {
@ -2259,10 +2235,8 @@ collection:
MB_PTR_ADV(regparse); MB_PTR_ADV(regparse);
return OK; return OK;
} }
/* // Failed to recognize a character class. Use the simple
* Failed to recognize a character class. Use the simple // version that turns [abc] into 'a' OR 'b' OR 'c'
* version that turns [abc] into 'a' OR 'b' OR 'c'
*/
startc = -1; startc = -1;
negated = false; negated = false;
if (*regparse == '^') { // negated range if (*regparse == '^') { // negated range
@ -2554,16 +2528,14 @@ nfa_do_multibyte:
return OK; return OK;
} }
/* // Parse something followed by possible [*+=].
* Parse something followed by possible [*+=]. //
* // A piece is an atom, possibly followed by a multi, an indication of how many
* A piece is an atom, possibly followed by a multi, an indication of how many // times the atom can be matched. Example: "a*" matches any sequence of "a"
* times the atom can be matched. Example: "a*" matches any sequence of "a" // characters: "", "a", "aa", etc.
* characters: "", "a", "aa", etc. //
* // piece ::= atom
* piece ::= atom // or atom multi
* or atom multi
*/
static int nfa_regpiece(void) static int nfa_regpiece(void)
{ {
int i; int i;
@ -2601,17 +2573,15 @@ static int nfa_regpiece(void)
break; break;
case Magic('+'): case Magic('+'):
/* // Trick: Normally, (a*)\+ would match the whole input "aaa". The
* Trick: Normally, (a*)\+ would match the whole input "aaa". The // first and only submatch would be "aaa". But the backtracking
* first and only submatch would be "aaa". But the backtracking // engine interprets the plus as "try matching one more time", and
* engine interprets the plus as "try matching one more time", and // a* matches a second time at the end of the input, the empty
* a* matches a second time at the end of the input, the empty // string.
* string. // The submatch will be the empty string.
* The submatch will be the empty string. //
* // In order to be consistent with the old engine, we replace
* In order to be consistent with the old engine, we replace // <atom>+ with <atom><atom>*
* <atom>+ with <atom><atom>*
*/
restore_parse_state(&old_state); restore_parse_state(&old_state);
curchr = -1; curchr = -1;
if (nfa_regatom() == FAIL) { if (nfa_regatom() == FAIL) {
@ -2770,16 +2740,14 @@ static int nfa_regpiece(void)
return OK; return OK;
} }
/* // Parse one or more pieces, concatenated. It matches a match for the
* Parse one or more pieces, concatenated. It matches a match for the // first piece, followed by a match for the second piece, etc. Example:
* first piece, followed by a match for the second piece, etc. Example: // "f[0-9]b", first matches "f", then a digit and then "b".
* "f[0-9]b", first matches "f", then a digit and then "b". //
* // concat ::= piece
* concat ::= piece // or piece piece
* or piece piece // or piece piece piece
* or piece piece piece // etc.
* etc.
*/
static int nfa_regconcat(void) static int nfa_regconcat(void)
{ {
bool cont = true; bool cont = true;
@ -2843,18 +2811,16 @@ static int nfa_regconcat(void)
return OK; return OK;
} }
/* // Parse a branch, one or more concats, separated by "\&". It matches the
* Parse a branch, one or more concats, separated by "\&". It matches the // last concat, but only if all the preceding concats also match at the same
* last concat, but only if all the preceding concats also match at the same // position. Examples:
* position. Examples: // "foobeep\&..." matches "foo" in "foobeep".
* "foobeep\&..." matches "foo" in "foobeep". // ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
* ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob" //
* // branch ::= concat
* branch ::= concat // or concat \& concat
* or concat \& concat // or concat \& concat \& concat
* or concat \& concat \& concat // etc.
* etc.
*/
static int nfa_regbranch(void) static int nfa_regbranch(void)
{ {
int old_post_pos; int old_post_pos;
@ -3311,9 +3277,7 @@ static FILE *log_fd;
static char_u e_log_open_failed[] = static char_u e_log_open_failed[] =
N_("Could not open temporary log file for writing, displaying on stderr... "); N_("Could not open temporary log file for writing, displaying on stderr... ");
/* // Print the postfix notation of the current regexp.
* Print the postfix notation of the current regexp.
*/
static void nfa_postfix_dump(char_u *expr, int retval) static void nfa_postfix_dump(char_u *expr, int retval)
{ {
int *p; int *p;
@ -3341,9 +3305,7 @@ static void nfa_postfix_dump(char_u *expr, int retval)
} }
} }
/* // Print the NFA starting with a root node "state".
* Print the NFA starting with a root node "state".
*/
static void nfa_print_state(FILE *debugf, nfa_state_T *state) static void nfa_print_state(FILE *debugf, nfa_state_T *state)
{ {
garray_T indent; garray_T indent;
@ -3413,9 +3375,7 @@ static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent)
ga_append(indent, NUL); ga_append(indent, NUL);
} }
/* // Print the NFA state machine.
* Print the NFA state machine.
*/
static void nfa_dump(nfa_regprog_T *prog) static void nfa_dump(nfa_regprog_T *prog)
{ {
FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a"); FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a");
@ -3437,12 +3397,10 @@ static void nfa_dump(nfa_regprog_T *prog)
fclose(debugf); fclose(debugf);
} }
} }
#endif /* REGEXP_DEBUG */ #endif // REGEXP_DEBUG
/* // Parse r.e. @expr and convert it into postfix form.
* Parse r.e. @expr and convert it into postfix form. // Return the postfix string on success, NULL otherwise.
* Return the postfix string on success, NULL otherwise.
*/
static int *re2post(void) static int *re2post(void)
{ {
if (nfa_reg(REG_NOPAREN) == FAIL) { if (nfa_reg(REG_NOPAREN) == FAIL) {
@ -3454,18 +3412,14 @@ static int *re2post(void)
// NB. Some of the code below is inspired by Russ's. // NB. Some of the code below is inspired by Russ's.
/* // Represents an NFA state plus zero or one or two arrows exiting.
* Represents an NFA state plus zero or one or two arrows exiting. // if c == MATCH, no arrows out; matching state.
* if c == MATCH, no arrows out; matching state. // If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
* If c == SPLIT, unlabeled arrows to out and out1 (if != NULL). // If c < 256, labeled arrow with character c to out.
* If c < 256, labeled arrow with character c to out.
*/
static nfa_state_T *state_ptr; // points to nfa_prog->state static nfa_state_T *state_ptr; // points to nfa_prog->state
/* // Allocate and initialize nfa_state_T.
* Allocate and initialize nfa_state_T.
*/
static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1) static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
{ {
nfa_state_T *s; nfa_state_T *s;
@ -3488,16 +3442,12 @@ static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
return s; return s;
} }
/* // A partially built NFA without the matching state filled in.
* A partially built NFA without the matching state filled in. // Frag_T.start points at the start state.
* Frag_T.start points at the start state. // Frag_T.out is a list of places that need to be set to the
* Frag_T.out is a list of places that need to be set to the // next state for this fragment.
* next state for this fragment.
*/
/* // Initialize a Frag_T struct and return it.
* Initialize a Frag_T struct and return it.
*/
static Frag_T frag(nfa_state_T *start, Ptrlist *out) static Frag_T frag(nfa_state_T *start, Ptrlist *out)
{ {
Frag_T n; Frag_T n;
@ -3507,9 +3457,7 @@ static Frag_T frag(nfa_state_T *start, Ptrlist *out)
return n; return n;
} }
/* // Create singleton list containing just outp.
* Create singleton list containing just outp.
*/
static Ptrlist *list1(nfa_state_T **outp) static Ptrlist *list1(nfa_state_T **outp)
{ {
Ptrlist *l; Ptrlist *l;
@ -3519,9 +3467,7 @@ static Ptrlist *list1(nfa_state_T **outp)
return l; return l;
} }
/* // Patch the list of states at out to point to start.
* Patch the list of states at out to point to start.
*/
static void patch(Ptrlist *l, nfa_state_T *s) static void patch(Ptrlist *l, nfa_state_T *s)
{ {
Ptrlist *next; Ptrlist *next;
@ -3532,9 +3478,7 @@ static void patch(Ptrlist *l, nfa_state_T *s)
} }
} }
/* // Join the two lists l1 and l2, returning the combination.
* Join the two lists l1 and l2, returning the combination.
*/
static Ptrlist *append(Ptrlist *l1, Ptrlist *l2) static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
{ {
Ptrlist *oldl1; Ptrlist *oldl1;
@ -3547,9 +3491,7 @@ static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
return oldl1; return oldl1;
} }
/* // Stack used for transforming postfix form into NFA.
* Stack used for transforming postfix form into NFA.
*/
static Frag_T empty; static Frag_T empty;
static void st_error(int *postfix, int *end, int *p) static void st_error(int *postfix, int *end, int *p)
@ -3592,9 +3534,7 @@ static void st_error(int *postfix, int *end, int *p)
emsg(_("E874: (NFA) Could not pop the stack!")); emsg(_("E874: (NFA) Could not pop the stack!"));
} }
/* // Push an item onto the stack.
* Push an item onto the stack.
*/
static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end) static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
{ {
Frag_T *stackp = *p; Frag_T *stackp = *p;
@ -3606,9 +3546,7 @@ static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
*p = *p + 1; *p = *p + 1;
} }
/* // Pop an item from the stack.
* Pop an item from the stack.
*/
static Frag_T st_pop(Frag_T **p, Frag_T *stack) static Frag_T st_pop(Frag_T **p, Frag_T *stack)
{ {
Frag_T *stackp; Frag_T *stackp;
@ -3621,10 +3559,8 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack)
return **p; return **p;
} }
/* // Estimate the maximum byte length of anything matching "state".
* Estimate the maximum byte length of anything matching "state". // When unknown or unlimited return -1.
* When unknown or unlimited return -1.
*/
static int nfa_max_width(nfa_state_T *startstate, int depth) static int nfa_max_width(nfa_state_T *startstate, int depth)
{ {
int l, r; int l, r;
@ -3827,10 +3763,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
return -1; return -1;
} }
/* // Convert a postfix form into its equivalent NFA.
* Convert a postfix form into its equivalent NFA. // Return the NFA start state on success, NULL otherwise.
* Return the NFA start state on success, NULL otherwise.
*/
static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
{ {
int *p; int *p;
@ -3866,7 +3800,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
stack_end = stack + (nstate + 1); stack_end = stack + (nstate + 1);
} }
for (p = postfix; p < end; ++p) { for (p = postfix; p < end; p++) {
switch (*p) { switch (*p) {
case NFA_CONCAT: case NFA_CONCAT:
// Concatenation. // Concatenation.
@ -4350,15 +4284,13 @@ theend:
#undef PUSH #undef PUSH
} }
/* // After building the NFA program, inspect it to add optimization hints.
* After building the NFA program, inspect it to add optimization hints.
*/
static void nfa_postprocess(nfa_regprog_T *prog) static void nfa_postprocess(nfa_regprog_T *prog)
{ {
int i; int i;
int c; int c;
for (i = 0; i < prog->nstate; ++i) { for (i = 0; i < prog->nstate; i++) {
c = prog->state[i].c; c = prog->state[i].c;
if (c == NFA_START_INVISIBLE if (c == NFA_START_INVISIBLE
|| c == NFA_START_INVISIBLE_NEG || c == NFA_START_INVISIBLE_NEG
@ -4490,9 +4422,7 @@ static void clear_sub(regsub_T *sub)
sub->in_use = 0; sub->in_use = 0;
} }
/* // Copy the submatches from "from" to "to".
* Copy the submatches from "from" to "to".
*/
static void copy_sub(regsub_T *to, regsub_T *from) static void copy_sub(regsub_T *to, regsub_T *from)
{ {
to->in_use = from->in_use; to->in_use = from->in_use;
@ -4508,9 +4438,7 @@ static void copy_sub(regsub_T *to, regsub_T *from)
} }
} }
/* // Like copy_sub() but exclude the main match.
* Like copy_sub() but exclude the main match.
*/
static void copy_sub_off(regsub_T *to, regsub_T *from) static void copy_sub_off(regsub_T *to, regsub_T *from)
{ {
if (to->in_use < from->in_use) { if (to->in_use < from->in_use) {
@ -4528,9 +4456,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from)
} }
} }
/* // Like copy_sub() but only do the end of the main match if \ze is present.
* Like copy_sub() but only do the end of the main match if \ze is present.
*/
static void copy_ze_off(regsub_T *to, regsub_T *from) static void copy_ze_off(regsub_T *to, regsub_T *from)
{ {
if (rex.nfa_has_zend) { if (rex.nfa_has_zend) {
@ -4954,7 +4880,7 @@ static regsubs_T *addstate(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_ar
// When called from addstate_here() do insert before // When called from addstate_here() do insert before
// existing states. // existing states.
if (add_here) { if (add_here) {
for (k = 0; k < l->n && k < listindex; ++k) { for (k = 0; k < l->n && k < listindex; k++) {
if (l->t[k].state->id == state->id) { if (l->t[k].state->id == state->id) {
found = true; found = true;
break; break;
@ -5094,7 +5020,7 @@ skip_add:
save_in_use = -1; save_in_use = -1;
} else { } else {
save_in_use = sub->in_use; save_in_use = sub->in_use;
for (i = sub->in_use; i < subidx; ++i) { for (i = sub->in_use; i < subidx; i++) {
sub->list.multi[i].start_lnum = -1; sub->list.multi[i].start_lnum = -1;
sub->list.multi[i].end_lnum = -1; sub->list.multi[i].end_lnum = -1;
} }
@ -5115,7 +5041,7 @@ skip_add:
save_in_use = -1; save_in_use = -1;
} else { } else {
save_in_use = sub->in_use; save_in_use = sub->in_use;
for (i = sub->in_use; i < subidx; ++i) { for (i = sub->in_use; i < subidx; i++) {
sub->list.line[i].start = NULL; sub->list.line[i].start = NULL;
sub->list.line[i].end = NULL; sub->list.line[i].end = NULL;
} }
@ -5314,9 +5240,7 @@ static regsubs_T *addstate_here(nfa_list_T *l, nfa_state_T *state, regsubs_T *su
return r; return r;
} }
/* // Check character class "class" against current character c.
* Check character class "class" against current character c.
*/
static int check_char_class(int class, int c) static int check_char_class(int class, int c)
{ {
switch (class) { switch (class) {
@ -5502,11 +5426,9 @@ static int match_zref(int subidx, int *bytelen)
return false; return false;
} }
/* // Save list IDs for all NFA states of "prog" into "list".
* Save list IDs for all NFA states of "prog" into "list". // Also reset the IDs to zero.
* Also reset the IDs to zero. // Only used for the recursive value lastlist[1].
* Only used for the recursive value lastlist[1].
*/
static void nfa_save_listids(nfa_regprog_T *prog, int *list) static void nfa_save_listids(nfa_regprog_T *prog, int *list)
{ {
int i; int i;
@ -5521,9 +5443,7 @@ static void nfa_save_listids(nfa_regprog_T *prog, int *list)
} }
} }
/* // Restore list IDs from "list" to all NFA states.
* Restore list IDs from "list" to all NFA states.
*/
static void nfa_restore_listids(nfa_regprog_T *prog, int *list) static void nfa_restore_listids(nfa_regprog_T *prog, int *list)
{ {
int i; int i;
@ -5547,11 +5467,9 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos)
return val == pos; return val == pos;
} }
/* // Recursively call nfa_regmatch()
* Recursively call nfa_regmatch() // "pim" is NULL or contains info about a Postponed Invisible Match (start
* "pim" is NULL or contains info about a Postponed Invisible Match (start // position).
* position).
*/
static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog,
regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len)
FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7) FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7)
@ -5691,12 +5609,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T
return result; return result;
} }
/* // Estimate the chance of a match with "state" failing.
* Estimate the chance of a match with "state" failing. // empty match: 0
* empty match: 0 // NFA_ANY: 1
* NFA_ANY: 1 // specific character: 99
* specific character: 99
*/
static int failure_chance(nfa_state_T *state, int depth) static int failure_chance(nfa_state_T *state, int depth)
{ {
int c = state->c; int c = state->c;
@ -5851,9 +5767,7 @@ static int failure_chance(nfa_state_T *state, int depth)
return 50; return 50;
} }
/* // Skip until the char "c" we know a match must start with.
* Skip until the char "c" we know a match must start with.
*/
static int skip_to_start(int c, colnr_T *colp) static int skip_to_start(int c, colnr_T *colp)
{ {
const char_u *const s = cstrchr(rex.line + *colp, c); const char_u *const s = cstrchr(rex.line + *colp, c);
@ -5864,11 +5778,9 @@ static int skip_to_start(int c, colnr_T *colp)
return OK; return OK;
} }
/* // Check for a match with match_text.
* Check for a match with match_text. // Called after skip_to_start() has found regstart.
* Called after skip_to_start() has found regstart. // Returns zero for no match, 1 for a match.
* Returns zero for no match, 1 for a match.
*/
static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
{ {
#define PTR2LEN(x) utf_ptr2len(x) #define PTR2LEN(x) utf_ptr2len(x)
@ -6038,9 +5950,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
add_off = clen; \ add_off = clen; \
} }
/* // Run for each character.
* Run for each character.
*/
for (;;) { for (;;) {
int curc = utf_ptr2char((char *)rex.input); int curc = utf_ptr2char((char *)rex.input);
int clen = utfc_ptr2len((char *)rex.input); int clen = utfc_ptr2len((char *)rex.input);
@ -6086,9 +5996,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
#ifdef NFA_REGEXP_DEBUG_LOG #ifdef NFA_REGEXP_DEBUG_LOG
fprintf(debug, "\n-------------------\n"); fprintf(debug, "\n-------------------\n");
#endif #endif
/* // If the state lists are empty we can stop.
* If the state lists are empty we can stop.
*/
if (thislist->n == 0) { if (thislist->n == 0) {
break; break;
} }
@ -6131,10 +6039,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
} }
#endif #endif
/* // Handle the possible codes of the current state.
* Handle the possible codes of the current state. // The most important is NFA_MATCH.
* The most important is NFA_MATCH.
*/
add_state = NULL; add_state = NULL;
add_here = false; add_here = false;
add_count = 0; add_count = 0;
@ -7525,10 +7431,8 @@ theend:
return retval; return retval;
} }
/* // Compile a regular expression into internal code for the NFA matcher.
* Compile a regular expression into internal code for the NFA matcher. // Returns the program in allocated space. Returns NULL for an error.
* Returns the program in allocated space. Returns NULL for an error.
*/
static regprog_T *nfa_regcomp(char_u *expr, int re_flags) static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
{ {
nfa_regprog_T *prog = NULL; nfa_regprog_T *prog = NULL;
@ -7554,11 +7458,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
goto fail; // Cascaded (syntax?) error goto fail; // Cascaded (syntax?) error
} }
/* // In order to build the NFA, we parse the input regexp twice:
* In order to build the NFA, we parse the input regexp twice: // 1. first pass to count size (so we can allocate space)
* 1. first pass to count size (so we can allocate space) // 2. second to emit code
* 2. second to emit code
*/
#ifdef REGEXP_DEBUG #ifdef REGEXP_DEBUG
{ {
FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a"); FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a");
@ -7573,10 +7475,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
} }
#endif #endif
/* // PASS 1
* PASS 1 // Count number of NFA states in "nstate". Do not build the NFA.
* Count number of NFA states in "nstate". Do not build the NFA.
*/
post2nfa(postfix, post_ptr, true); post2nfa(postfix, post_ptr, true);
// allocate the regprog with space for the compiled regexp // allocate the regprog with space for the compiled regexp
@ -7585,10 +7485,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
state_ptr = prog->state; state_ptr = prog->state;
prog->re_in_use = false; prog->re_in_use = false;
/* // PASS 2
* PASS 2 // Build the NFA
* Build the NFA
*/
prog->start = post2nfa(postfix, post_ptr, false); prog->start = post2nfa(postfix, post_ptr, false);
if (prog->start == NULL) { if (prog->start == NULL) {
goto fail; goto fail;
@ -7632,9 +7530,7 @@ fail:
goto out; goto out;
} }
/* // Free a compiled regexp program, returned by nfa_regcomp().
* Free a compiled regexp program, returned by nfa_regcomp().
*/
static void nfa_regfree(regprog_T *prog) static void nfa_regfree(regprog_T *prog)
{ {
if (prog != NULL) { if (prog != NULL) {

View File

@ -2465,9 +2465,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
aff_entry->ae_cond = (char_u *)getroom_save(spin, (char_u *)items[4]); aff_entry->ae_cond = (char_u *)getroom_save(spin, (char_u *)items[4]);
if (*items[0] == 'P') { if (*items[0] == 'P') {
sprintf((char *)buf, "^%s", items[4]); sprintf((char *)buf, "^%s", items[4]); // NOLINT(runtime/printf)
} else { } else {
sprintf((char *)buf, "%s$", items[4]); sprintf((char *)buf, "%s$", items[4]); // NOLINT(runtime/printf)
} }
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING + RE_STRICT); aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING + RE_STRICT);
if (aff_entry->ae_prog == NULL) { if (aff_entry->ae_prog == NULL) {
@ -2514,8 +2514,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
onecap_copy((char_u *)items[4], buf, true); onecap_copy((char_u *)items[4], buf, true);
aff_entry->ae_cond = (char_u *)getroom_save(spin, buf); aff_entry->ae_cond = (char_u *)getroom_save(spin, buf);
if (aff_entry->ae_cond != NULL) { if (aff_entry->ae_cond != NULL) {
sprintf((char *)buf, "^%s", sprintf((char *)buf, "^%s", aff_entry->ae_cond); // NOLINT(runtime/printf)
aff_entry->ae_cond);
vim_regfree(aff_entry->ae_prog); vim_regfree(aff_entry->ae_prog);
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING); aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING);
} }
@ -3614,7 +3613,7 @@ static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afff
if (store_aff_word(spin, newword, ae->ae_flags, if (store_aff_word(spin, newword, ae->ae_flags,
affile, &affile->af_suff, xht, affile, &affile->af_suff, xht,
use_condit & (xht == NULL use_condit & (xht == NULL
? ~0 : ~CONDIT_SUF), ? ~0 : ~CONDIT_SUF),
use_flags, use_pfxlist, pfxlen) == FAIL) { use_flags, use_pfxlist, pfxlen) == FAIL) {
retval = FAIL; retval = FAIL;
} }

View File

@ -1035,9 +1035,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
: va_arg(ap, long long)); // NOLINT (runtime/int) : va_arg(ap, long long)); // NOLINT (runtime/int)
break; break;
case 'z': case 'z':
arg = (tvs arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t));
? (ptrdiff_t)tv_nr(tvs, &arg_idx)
: va_arg(ap, ptrdiff_t));
break; break;
} }
if (arg > 0) { if (arg > 0) {
@ -1049,19 +1047,13 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// unsigned // unsigned
switch (length_modifier) { switch (length_modifier) {
case '\0': case '\0':
uarg = (unsigned int)(tvs uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
? tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned int));
break; break;
case 'h': case 'h':
uarg = (uint16_t)(tvs uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
? tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned int));
break; break;
case 'l': case 'l':
uarg = (tvs uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long));
? (unsigned long)tv_nr(tvs, &arg_idx)
: va_arg(ap, unsigned long));
break; break;
case '2': case '2':
uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int) uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int)
@ -1071,9 +1063,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
: va_arg(ap, unsigned long long)); // NOLINT (runtime/int) : va_arg(ap, unsigned long long)); // NOLINT (runtime/int)
break; break;
case 'z': case 'z':
uarg = (tvs uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t));
? (size_t)tv_nr(tvs, &arg_idx)
: va_arg(ap, size_t));
break; break;
} }
arg_sign = (uarg != 0); arg_sign = (uarg != 0);

View File

@ -1655,13 +1655,12 @@ static int syn_current_attr(const bool syncing, const bool displaying, bool *con
&& (spp->sp_type == SPTYPE_MATCH && (spp->sp_type == SPTYPE_MATCH
|| spp->sp_type == SPTYPE_START) || spp->sp_type == SPTYPE_START)
&& (current_next_list != NULL && (current_next_list != NULL
? in_id_list(NULL, current_next_list, ? in_id_list(NULL, current_next_list, &spp->sp_syn, 0)
&spp->sp_syn, 0) : (cur_si == NULL
: (cur_si == NULL ? !(spp->sp_flags & HL_CONTAINED)
? !(spp->sp_flags & HL_CONTAINED) : in_id_list(cur_si,
: in_id_list(cur_si, cur_si->si_cont_list, &spp->sp_syn,
cur_si->si_cont_list, &spp->sp_syn, spp->sp_flags & HL_CONTAINED)))) {
spp->sp_flags & HL_CONTAINED)))) {
// If we already tried matching in this line, and // If we already tried matching in this line, and
// there isn't a match before next_match_col, skip // there isn't a match before next_match_col, skip
// this item. // this item.
@ -2788,9 +2787,9 @@ static keyentry_T *match_keyword(char *keyword, hashtab_T *ht, stateitem_T *cur_
if (current_next_list != 0 if (current_next_list != 0
? in_id_list(NULL, current_next_list, &kp->k_syn, 0) ? in_id_list(NULL, current_next_list, &kp->k_syn, 0)
: (cur_si == NULL : (cur_si == NULL
? !(kp->flags & HL_CONTAINED) ? !(kp->flags & HL_CONTAINED)
: in_id_list(cur_si, cur_si->si_cont_list, : in_id_list(cur_si, cur_si->si_cont_list,
&kp->k_syn, kp->flags & HL_CONTAINED))) { &kp->k_syn, kp->flags & HL_CONTAINED))) {
return kp; return kp;
} }
} }

View File

@ -628,8 +628,8 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
GET_CCS(ret, pline); GET_CCS(ret, pline);
ret.data.cmp.inv = (schar == '<'); ret.data.cmp.inv = (schar == '<');
ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
? kExprCmpGreaterOrEqual ? kExprCmpGreaterOrEqual
: kExprCmpGreater); : kExprCmpGreater);
break; break;
} }
@ -1963,8 +1963,8 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
|| ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
&& ((*kv_Z(ast_stack, 1))->type && ((*kv_Z(ast_stack, 1))->type
!= kExprNodeConcatOrSubscript)))) != kExprNodeConcatOrSubscript))))
? kELFlagAllowFloat ? kELFlagAllowFloat
: 0)); : 0));
LexExprToken cur_token = viml_pexpr_next_token(pstate, LexExprToken cur_token = viml_pexpr_next_token(pstate,
want_node_to_lexer_flags[want_node] | want_node_to_lexer_flags[want_node] |
lexer_additional_flags); lexer_additional_flags);
@ -2031,9 +2031,9 @@ viml_pexpr_parse_process_token:
const bool node_is_key = ( const bool node_is_key = (
is_concat_or_subscript is_concat_or_subscript
&& (cur_token.type == kExprLexPlainIdentifier && (cur_token.type == kExprLexPlainIdentifier
? (!cur_token.data.var.autoload ? (!cur_token.data.var.autoload
&& cur_token.data.var.scope == kExprVarScopeMissing) && cur_token.data.var.scope == kExprVarScopeMissing)
: (cur_token.type == kExprLexNumber)) : (cur_token.type == kExprLexNumber))
&& prev_token.type != kExprLexSpacing); && prev_token.type != kExprLexSpacing);
if (is_concat_or_subscript && !node_is_key) { if (is_concat_or_subscript && !node_is_key) {
// Note: in Vim "d. a" (this is the reason behind `prev_token.type != // Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
@ -2707,14 +2707,14 @@ viml_pexpr_parse_figure_brace_closing_error:
break; break;
case kExprLexPlainIdentifier: { case kExprLexPlainIdentifier: {
const ExprVarScope scope = (cur_token.type == kExprLexInvalid const ExprVarScope scope = (cur_token.type == kExprLexInvalid
? kExprVarScopeMissing ? kExprVarScopeMissing
: cur_token.data.var.scope); : cur_token.data.var.scope);
if (want_node == kENodeValue) { if (want_node == kENodeValue) {
want_node = kENodeOperator; want_node = kENodeOperator;
NEW_NODE_WITH_CUR_POS(cur_node, NEW_NODE_WITH_CUR_POS(cur_node,
(node_is_key (node_is_key
? kExprNodePlainKey ? kExprNodePlainKey
: kExprNodePlainIdentifier)); : kExprNodePlainIdentifier));
cur_node->data.var.scope = scope; cur_node->data.var.scope = scope;
const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2);
cur_node->data.var.ident = (pline.data + cur_token.start.col cur_node->data.var.ident = (pline.data + cur_token.start.col
@ -2732,8 +2732,8 @@ viml_pexpr_parse_figure_brace_closing_error:
scope_shift), scope_shift),
cur_token.len - scope_shift, cur_token.len - scope_shift,
(node_is_key (node_is_key
? HL(IdentifierKey) ? HL(IdentifierKey)
: HL(IdentifierName))); : HL(IdentifierName)));
} else { } else {
if (scope == kExprVarScopeMissing) { if (scope == kExprVarScopeMissing) {
// uncrustify:off // uncrustify:off
@ -2902,15 +2902,15 @@ viml_pexpr_parse_no_paren_closing_error: {}
// different error numbers: "E114: Missing quote" and // different error numbers: "E114: Missing quote" and
// "E115: Missing quote". // "E115: Missing quote".
ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double
? _("E114: Missing double quote: %.*s") ? _("E114: Missing double quote: %.*s")
: _("E115: Missing single quote: %.*s"))); : _("E115: Missing single quote: %.*s")));
} }
if (want_node == kENodeOperator) { if (want_node == kENodeOperator) {
OP_MISSING; OP_MISSING;
} }
NEW_NODE_WITH_CUR_POS(cur_node, (is_double NEW_NODE_WITH_CUR_POS(cur_node, (is_double
? kExprNodeDoubleQuotedString ? kExprNodeDoubleQuotedString
: kExprNodeSingleQuotedString)); : kExprNodeSingleQuotedString));
*top_node_p = cur_node; *top_node_p = cur_node;
parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid); parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid);
want_node = kENodeOperator; want_node = kENodeOperator;