mirror of
https://github.com/neovim/neovim.git
synced 2025-02-25 18:55:25 -06:00
refactor: more clint (#20910)
This commit is contained in:
parent
e9c1cb71f8
commit
bdb98de2d1
@ -122,8 +122,8 @@ int eexe_mod_op(typval_T *const tv1, const typval_T *const tv2, const char *cons
|
||||
break;
|
||||
}
|
||||
const float_T f = (tv2->v_type == VAR_FLOAT
|
||||
? tv2->vval.v_float
|
||||
: (float_T)tv_get_number(tv2));
|
||||
? tv2->vval.v_float
|
||||
: (float_T)tv_get_number(tv2));
|
||||
switch (*op) {
|
||||
case '+':
|
||||
tv1->vval.v_float += f; break;
|
||||
|
@ -729,7 +729,7 @@ void ex_hardcopy(exarg_T *eap)
|
||||
}
|
||||
|
||||
assert(prtpos.bytes_printed <= SIZE_MAX / 100);
|
||||
sprintf((char *)IObuff, _("Printing page %d (%zu%%)"),
|
||||
sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), // NOLINT(runtime/printf)
|
||||
page_count + 1 + side,
|
||||
prtpos.bytes_printed * 100 / bytes_to_print);
|
||||
if (!mch_print_begin_page((char_u *)IObuff)) {
|
||||
@ -750,8 +750,7 @@ void ex_hardcopy(exarg_T *eap)
|
||||
prtpos.file_line);
|
||||
}
|
||||
|
||||
for (page_line = 0; page_line < settings.lines_per_page;
|
||||
++page_line) {
|
||||
for (page_line = 0; page_line < settings.lines_per_page; page_line++) {
|
||||
prtpos.column = hardcopy_line(&settings,
|
||||
page_line, &prtpos);
|
||||
if (prtpos.column == 0) {
|
||||
@ -2440,8 +2439,7 @@ bool mch_print_begin(prt_settings_T *psettings)
|
||||
prt_dsc_font_resource("DocumentNeededResources", &prt_ps_courier_font);
|
||||
}
|
||||
if (prt_out_mbyte) {
|
||||
prt_dsc_font_resource((prt_use_courier ? NULL
|
||||
: "DocumentNeededResources"), &prt_ps_mb_font);
|
||||
prt_dsc_font_resource((prt_use_courier ? NULL : "DocumentNeededResources"), &prt_ps_mb_font);
|
||||
if (!prt_custom_cmap) {
|
||||
prt_dsc_resources(NULL, "cmap", prt_cmap);
|
||||
}
|
||||
@ -2990,7 +2988,7 @@ int mch_print_text_out(char_u *const textp, size_t len)
|
||||
ga_append(&prt_ps_buffer, '\\'); break;
|
||||
|
||||
default:
|
||||
sprintf((char *)ch_buff, "%03o", (unsigned int)ch);
|
||||
sprintf((char *)ch_buff, "%03o", (unsigned int)ch); // NOLINT(runtime/printf)
|
||||
ga_append(&prt_ps_buffer, (char)ch_buff[0]);
|
||||
ga_append(&prt_ps_buffer, (char)ch_buff[1]);
|
||||
ga_append(&prt_ps_buffer, (char)ch_buff[2]);
|
||||
|
@ -1550,7 +1550,7 @@ void show_utf8(void)
|
||||
}
|
||||
clen = utf_ptr2len((char *)line + i);
|
||||
}
|
||||
sprintf((char *)IObuff + rlen, "%02x ",
|
||||
sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf)
|
||||
(line[i] == NL) ? NUL : line[i]); // NUL is stored as NL
|
||||
clen--;
|
||||
rlen += (int)strlen(IObuff + rlen);
|
||||
|
@ -2135,13 +2135,13 @@ static int ml_append_int(buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, b
|
||||
buf->b_ml.ml_stack_top = stack_idx + 1; // truncate stack
|
||||
|
||||
if (lineadd) {
|
||||
--(buf->b_ml.ml_stack_top);
|
||||
(buf->b_ml.ml_stack_top)--;
|
||||
// fix line count for rest of blocks in the stack
|
||||
ml_lineadd(buf, lineadd);
|
||||
// fix stack itself
|
||||
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
|
||||
lineadd;
|
||||
++(buf->b_ml.ml_stack_top);
|
||||
(buf->b_ml.ml_stack_top)++;
|
||||
}
|
||||
|
||||
// We are finished, break the loop here.
|
||||
@ -2428,7 +2428,7 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
|
||||
buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high +=
|
||||
buf->b_ml.ml_locked_lineadd;
|
||||
}
|
||||
++(buf->b_ml.ml_stack_top);
|
||||
(buf->b_ml.ml_stack_top)++;
|
||||
|
||||
break;
|
||||
}
|
||||
@ -2698,11 +2698,11 @@ static bhdr_T *ml_find_line(buf_T *buf, linenr_T lnum, int action)
|
||||
&& buf->b_ml.ml_locked_high >= lnum) {
|
||||
// remember to update pointer blocks and stack later
|
||||
if (action == ML_INSERT) {
|
||||
++(buf->b_ml.ml_locked_lineadd);
|
||||
++(buf->b_ml.ml_locked_high);
|
||||
(buf->b_ml.ml_locked_lineadd)++;
|
||||
(buf->b_ml.ml_locked_high)++;
|
||||
} else if (action == ML_DELETE) {
|
||||
--(buf->b_ml.ml_locked_lineadd);
|
||||
--(buf->b_ml.ml_locked_high);
|
||||
(buf->b_ml.ml_locked_lineadd)--;
|
||||
(buf->b_ml.ml_locked_high)--;
|
||||
}
|
||||
return buf->b_ml.ml_locked;
|
||||
}
|
||||
|
@ -95,9 +95,8 @@ bool msgpack_rpc_to_object(const msgpack_object *const obj, Object *const arg)
|
||||
dest = conv(((String) { \
|
||||
.size = obj->via.attr.size, \
|
||||
.data = (obj->via.attr.ptr == NULL || obj->via.attr.size == 0 \
|
||||
? xmemdupz("", 0) \
|
||||
: xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), \
|
||||
})); \
|
||||
? xmemdupz("", 0) \
|
||||
: xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), })); \
|
||||
break; \
|
||||
}
|
||||
STR_CASE(MSGPACK_OBJECT_STR, str, cur.mobj, *cur.aobj, STRING_OBJ)
|
||||
|
@ -2266,7 +2266,7 @@ bool find_decl(char_u *ptr, size_t len, bool locally, bool thisblock, int flags_
|
||||
// Put "\V" before the pattern to avoid that the special meaning of "."
|
||||
// and "~" causes trouble.
|
||||
assert(len <= INT_MAX);
|
||||
sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s",
|
||||
sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", // NOLINT(runtime/printf)
|
||||
(int)len, ptr);
|
||||
old_pos = curwin->w_cursor;
|
||||
save_p_ws = p_ws;
|
||||
|
@ -1,9 +1,7 @@
|
||||
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
||||
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||||
|
||||
/*
|
||||
* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
|
||||
*/
|
||||
// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
|
||||
|
||||
// By default: do not create debugging logs or files related to regular
|
||||
// expressions, even when compiling with -DDEBUG.
|
||||
@ -41,21 +39,17 @@
|
||||
# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Magic characters have a special meaning, they don't match literally.
|
||||
* Magic characters are negative. This separates them from literal characters
|
||||
* (possibly multi-byte). Only ASCII characters can be Magic.
|
||||
*/
|
||||
// Magic characters have a special meaning, they don't match literally.
|
||||
// Magic characters are negative. This separates them from literal characters
|
||||
// (possibly multi-byte). Only ASCII characters can be Magic.
|
||||
#define Magic(x) ((int)(x) - 256)
|
||||
#define un_Magic(x) ((x) + 256)
|
||||
#define is_Magic(x) ((x) < 0)
|
||||
|
||||
/*
|
||||
* We should define ftpr as a pointer to a function returning a pointer to
|
||||
* a function returning a pointer to a function ...
|
||||
* This is impossible, so we declare a pointer to a function returning a
|
||||
* pointer to a function returning void. This should work for all compilers.
|
||||
*/
|
||||
// We should define ftpr as a pointer to a function returning a pointer to
|
||||
// a function returning a pointer to a function ...
|
||||
// This is impossible, so we declare a pointer to a function returning a
|
||||
// pointer to a function returning void. This should work for all compilers.
|
||||
typedef void (*(*fptr_T)(int *, int))(void);
|
||||
|
||||
static int no_Magic(int x)
|
||||
@ -143,28 +137,24 @@ static int re_multi_type(int c)
|
||||
|
||||
static char *reg_prev_sub = NULL;
|
||||
|
||||
/*
|
||||
* REGEXP_INRANGE contains all characters which are always special in a []
|
||||
* range after '\'.
|
||||
* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
|
||||
* These are:
|
||||
* \n - New line (NL).
|
||||
* \r - Carriage Return (CR).
|
||||
* \t - Tab (TAB).
|
||||
* \e - Escape (ESC).
|
||||
* \b - Backspace (Ctrl_H).
|
||||
* \d - Character code in decimal, eg \d123
|
||||
* \o - Character code in octal, eg \o80
|
||||
* \x - Character code in hex, eg \x4a
|
||||
* \u - Multibyte character code, eg \u20ac
|
||||
* \U - Long multibyte character code, eg \U12345678
|
||||
*/
|
||||
// REGEXP_INRANGE contains all characters which are always special in a []
|
||||
// range after '\'.
|
||||
// REGEXP_ABBR contains all characters which act as abbreviations after '\'.
|
||||
// These are:
|
||||
// \n - New line (NL).
|
||||
// \r - Carriage Return (CR).
|
||||
// \t - Tab (TAB).
|
||||
// \e - Escape (ESC).
|
||||
// \b - Backspace (Ctrl_H).
|
||||
// \d - Character code in decimal, eg \d123
|
||||
// \o - Character code in octal, eg \o80
|
||||
// \x - Character code in hex, eg \x4a
|
||||
// \u - Multibyte character code, eg \u20ac
|
||||
// \U - Long multibyte character code, eg \U12345678
|
||||
static char REGEXP_INRANGE[] = "]^-n\\";
|
||||
static char REGEXP_ABBR[] = "nrtebdoxuU";
|
||||
|
||||
/*
|
||||
* Translate '\x' to its control character, except "\n", which is Magic.
|
||||
*/
|
||||
// Translate '\x' to its control character, except "\n", which is Magic.
|
||||
static int backslash_trans(int c)
|
||||
{
|
||||
switch (c) {
|
||||
@ -239,10 +229,8 @@ static int get_char_class(char **pp)
|
||||
return CLASS_NONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Specific version of character class functions.
|
||||
* Using a table to keep this fast.
|
||||
*/
|
||||
// Specific version of character class functions.
|
||||
// Using a table to keep this fast.
|
||||
static int16_t class_tab[256];
|
||||
|
||||
#define RI_DIGIT 0x01
|
||||
@ -325,9 +313,7 @@ static int reg_string; // matching with a string instead of a buffer
|
||||
// line
|
||||
static int reg_strict; // "[abc" is illegal
|
||||
|
||||
/*
|
||||
* META contains all characters that may be magic, except '^' and '$'.
|
||||
*/
|
||||
// META contains all characters that may be magic, except '^' and '$'.
|
||||
|
||||
// uncrustify:off
|
||||
|
||||
@ -391,11 +377,9 @@ int re_multiline(const regprog_T *prog)
|
||||
return prog->regflags & RF_HASNL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for an equivalence class name "[=a=]". "pp" points to the '['.
|
||||
* Returns a character representing the class. Zero means that no item was
|
||||
* recognized. Otherwise "pp" is advanced to after the item.
|
||||
*/
|
||||
// Check for an equivalence class name "[=a=]". "pp" points to the '['.
|
||||
// Returns a character representing the class. Zero means that no item was
|
||||
// recognized. Otherwise "pp" is advanced to after the item.
|
||||
static int get_equi_class(char **pp)
|
||||
{
|
||||
int c;
|
||||
@ -413,12 +397,10 @@ static int get_equi_class(char **pp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a collating element "[.a.]". "pp" points to the '['.
|
||||
* Returns a character. Zero means that no item was recognized. Otherwise
|
||||
* "pp" is advanced to after the item.
|
||||
* Currently only single characters are recognized!
|
||||
*/
|
||||
// Check for a collating element "[.a.]". "pp" points to the '['.
|
||||
// Returns a character. Zero means that no item was recognized. Otherwise
|
||||
// "pp" is advanced to after the item.
|
||||
// Currently only single characters are recognized!
|
||||
static int get_coll_element(char **pp)
|
||||
{
|
||||
int c;
|
||||
@ -562,9 +544,7 @@ static int prevchr_len; // byte length of previous char
|
||||
static int at_start; // True when on the first character
|
||||
static int prev_at_start; // True when on the second character
|
||||
|
||||
/*
|
||||
* Start parsing at "str".
|
||||
*/
|
||||
// Start parsing at "str".
|
||||
static void initchr(char_u *str)
|
||||
{
|
||||
regparse = (char *)str;
|
||||
@ -574,10 +554,8 @@ static void initchr(char_u *str)
|
||||
prev_at_start = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the current parse state, so that it can be restored and parsing
|
||||
* starts in the same state again.
|
||||
*/
|
||||
// Save the current parse state, so that it can be restored and parsing
|
||||
// starts in the same state again.
|
||||
static void save_parse_state(parse_state_T *ps)
|
||||
{
|
||||
ps->regparse = (char_u *)regparse;
|
||||
@ -591,9 +569,7 @@ static void save_parse_state(parse_state_T *ps)
|
||||
ps->regnpar = regnpar;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore a previously saved parse state.
|
||||
*/
|
||||
// Restore a previously saved parse state.
|
||||
static void restore_parse_state(parse_state_T *ps)
|
||||
{
|
||||
regparse = (char *)ps->regparse;
|
||||
@ -607,9 +583,7 @@ static void restore_parse_state(parse_state_T *ps)
|
||||
regnpar = ps->regnpar;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the next character without advancing.
|
||||
*/
|
||||
// Get the next character without advancing.
|
||||
static int peekchr(void)
|
||||
{
|
||||
static int after_slash = false;
|
||||
@ -736,9 +710,7 @@ static int peekchr(void)
|
||||
after_slash--;
|
||||
curchr = toggle_Magic(curchr);
|
||||
} else if (vim_strchr(REGEXP_ABBR, c)) {
|
||||
/*
|
||||
* Handle abbreviations, like "\t" for TAB -- webb
|
||||
*/
|
||||
// Handle abbreviations, like "\t" for TAB -- webb
|
||||
curchr = backslash_trans(c);
|
||||
} else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) {
|
||||
curchr = toggle_Magic(c);
|
||||
@ -757,9 +729,7 @@ static int peekchr(void)
|
||||
return curchr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Eat one lexed character. Do this in a way that we can undo it.
|
||||
*/
|
||||
// Eat one lexed character. Do this in a way that we can undo it.
|
||||
static void skipchr(void)
|
||||
{
|
||||
// peekchr() eats a backslash, do the same here
|
||||
@ -781,10 +751,8 @@ static void skipchr(void)
|
||||
nextchr = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip a character while keeping the value of prev_at_start for at_start.
|
||||
* prevchr and prevprevchr are also kept.
|
||||
*/
|
||||
// Skip a character while keeping the value of prev_at_start for at_start.
|
||||
// prevchr and prevprevchr are also kept.
|
||||
static void skipchr_keepstart(void)
|
||||
{
|
||||
int as = prev_at_start;
|
||||
@ -797,10 +765,8 @@ static void skipchr_keepstart(void)
|
||||
prevprevchr = prpr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the next character from the pattern. We know about magic and such, so
|
||||
* therefore we need a lexical analyzer.
|
||||
*/
|
||||
// Get the next character from the pattern. We know about magic and such, so
|
||||
// therefore we need a lexical analyzer.
|
||||
static int getchr(void)
|
||||
{
|
||||
int chr = peekchr();
|
||||
@ -809,9 +775,7 @@ static int getchr(void)
|
||||
return chr;
|
||||
}
|
||||
|
||||
/*
|
||||
* put character back. Works only once!
|
||||
*/
|
||||
// put character back. Works only once!
|
||||
static void ungetchr(void)
|
||||
{
|
||||
nextchr = curchr;
|
||||
@ -825,15 +789,13 @@ static void ungetchr(void)
|
||||
regparse -= prevchr_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get and return the value of the hex string at the current position.
|
||||
* Return -1 if there is no valid hex number.
|
||||
* The position is updated:
|
||||
* blahblah\%x20asdf
|
||||
* before-^ ^-after
|
||||
* The parameter controls the maximum number of input characters. This will be
|
||||
* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
|
||||
*/
|
||||
// Get and return the value of the hex string at the current position.
|
||||
// Return -1 if there is no valid hex number.
|
||||
// The position is updated:
|
||||
// blahblah\%x20asdf
|
||||
// before-^ ^-after
|
||||
// The parameter controls the maximum number of input characters. This will be
|
||||
// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
|
||||
static int64_t gethexchrs(int maxinputlen)
|
||||
{
|
||||
int64_t nr = 0;
|
||||
@ -856,10 +818,8 @@ static int64_t gethexchrs(int maxinputlen)
|
||||
return nr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get and return the value of the decimal string immediately after the
|
||||
* current position. Return -1 for invalid. Consumes all digits.
|
||||
*/
|
||||
// Get and return the value of the decimal string immediately after the
|
||||
// current position. Return -1 for invalid. Consumes all digits.
|
||||
static int64_t getdecchrs(void)
|
||||
{
|
||||
int64_t nr = 0;
|
||||
@ -883,14 +843,12 @@ static int64_t getdecchrs(void)
|
||||
return nr;
|
||||
}
|
||||
|
||||
/*
|
||||
* get and return the value of the octal string immediately after the current
|
||||
* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
|
||||
* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
|
||||
* treat 8 or 9 as recognised characters. Position is updated:
|
||||
* blahblah\%o210asdf
|
||||
* before-^ ^-after
|
||||
*/
|
||||
// get and return the value of the octal string immediately after the current
|
||||
// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
|
||||
// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
|
||||
// treat 8 or 9 as recognised characters. Position is updated:
|
||||
// blahblah\%o210asdf
|
||||
// before-^ ^-after
|
||||
static int64_t getoctchrs(void)
|
||||
{
|
||||
int64_t nr = 0;
|
||||
@ -913,12 +871,10 @@ static int64_t getoctchrs(void)
|
||||
return nr;
|
||||
}
|
||||
|
||||
/*
|
||||
* read_limits - Read two integers to be taken as a minimum and maximum.
|
||||
* If the first character is '-', then the range is reversed.
|
||||
* Should end with 'end'. If minval is missing, zero is default, if maxval is
|
||||
* missing, a very big number is the default.
|
||||
*/
|
||||
// read_limits - Read two integers to be taken as a minimum and maximum.
|
||||
// If the first character is '-', then the range is reversed.
|
||||
// Should end with 'end'. If minval is missing, zero is default, if maxval is
|
||||
// missing, a very big number is the default.
|
||||
static int read_limits(long *minval, long *maxval)
|
||||
{
|
||||
int reverse = false;
|
||||
@ -950,10 +906,8 @@ static int read_limits(long *minval, long *maxval)
|
||||
EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reverse the range if there was a '-', or make sure it is in the right
|
||||
* order otherwise.
|
||||
*/
|
||||
// Reverse the range if there was a '-', or make sure it is in the right
|
||||
// order otherwise.
|
||||
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) {
|
||||
tmp = *minval;
|
||||
*minval = *maxval;
|
||||
@ -963,13 +917,9 @@ static int read_limits(long *minval, long *maxval)
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* vim_regexec and friends
|
||||
*/
|
||||
// vim_regexec and friends
|
||||
|
||||
/*
|
||||
* Global work variables for vim_regexec().
|
||||
*/
|
||||
// Global work variables for vim_regexec().
|
||||
|
||||
// Sometimes need to save a copy of a line. Since alloc()/free() is very
|
||||
// slow, we keep one allocated piece of memory and only re-allocate it when
|
||||
@ -1052,9 +1002,7 @@ static bool reg_iswordc(int c)
|
||||
return vim_iswordc_buf(c, rex.reg_buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
|
||||
*/
|
||||
// Get pointer to the line "lnum", which is relative to "reg_firstlnum".
|
||||
static char_u *reg_getline(linenr_T lnum)
|
||||
{
|
||||
// when looking behind for a match/no-match lnum is negative. But we
|
||||
@ -1077,9 +1025,7 @@ static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
|
||||
// true if using multi-line regexp.
|
||||
#define REG_MULTI (rex.reg_match == NULL)
|
||||
|
||||
/*
|
||||
* Create a new extmatch and mark it as referenced once.
|
||||
*/
|
||||
// Create a new extmatch and mark it as referenced once.
|
||||
static reg_extmatch_T *make_extmatch(void)
|
||||
FUNC_ATTR_NONNULL_RET
|
||||
{
|
||||
@ -1088,9 +1034,7 @@ static reg_extmatch_T *make_extmatch(void)
|
||||
return em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a reference to an extmatch.
|
||||
*/
|
||||
// Add a reference to an extmatch.
|
||||
reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
|
||||
{
|
||||
if (em != NULL) {
|
||||
@ -1099,10 +1043,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
|
||||
return em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a reference to an extmatch. If there are no references left, free
|
||||
* the info.
|
||||
*/
|
||||
// Remove a reference to an extmatch. If there are no references left, free
|
||||
// the info.
|
||||
void unref_extmatch(reg_extmatch_T *em)
|
||||
{
|
||||
int i;
|
||||
@ -1201,10 +1143,8 @@ static bool reg_match_visual(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the regexp program for its magic number.
|
||||
* Return true if it's wrong.
|
||||
*/
|
||||
// Check the regexp program for its magic number.
|
||||
// Return true if it's wrong.
|
||||
static int prog_magic_wrong(void)
|
||||
{
|
||||
regprog_T *prog;
|
||||
@ -1222,11 +1162,9 @@ static int prog_magic_wrong(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup the subexpressions, if this wasn't done yet.
|
||||
* This construction is used to clear the subexpressions only when they are
|
||||
* used (to increase speed).
|
||||
*/
|
||||
// Cleanup the subexpressions, if this wasn't done yet.
|
||||
// This construction is used to clear the subexpressions only when they are
|
||||
// used (to increase speed).
|
||||
static void cleanup_subexpr(void)
|
||||
{
|
||||
if (rex.need_clear_subexpr) {
|
||||
@ -1265,12 +1203,10 @@ static void reg_nextline(void)
|
||||
fast_breakcheck();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a backreference matches.
|
||||
* Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
|
||||
* If "bytelen" is not NULL, it is set to the byte length of the match in the
|
||||
* last line.
|
||||
*/
|
||||
// Check whether a backreference matches.
|
||||
// Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
|
||||
// If "bytelen" is not NULL, it is set to the byte length of the match in the
|
||||
// last line.
|
||||
static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum,
|
||||
colnr_T end_col, int *bytelen)
|
||||
{
|
||||
@ -1449,9 +1385,9 @@ static int cstrncmp(char *s1, char *s2, int *n)
|
||||
c1 = mb_ptr2char_adv((const char_u **)&str1);
|
||||
c2 = mb_ptr2char_adv((const char_u **)&str2);
|
||||
|
||||
/* decompose the character if necessary, into 'base' characters
|
||||
* because I don't care about Arabic, I will hard-code the Hebrew
|
||||
* which I *do* care about! So sue me... */
|
||||
// decompose the character if necessary, into 'base' characters
|
||||
// because I don't care about Arabic, I will hard-code the Hebrew
|
||||
// which I *do* care about! So sue me...
|
||||
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
|
||||
// decomposition necessary?
|
||||
mb_decompose(c1, &c11, &junk, &junk);
|
||||
@ -1566,7 +1502,7 @@ char *regtilde(char *source, int magic, bool preview)
|
||||
int len;
|
||||
int prevlen;
|
||||
|
||||
for (p = newsub; *p; ++p) {
|
||||
for (p = newsub; *p; p++) {
|
||||
if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) {
|
||||
if (reg_prev_sub != NULL) {
|
||||
// length = len(newsub) - 1 + len(prev_sub) + 1
|
||||
@ -1871,12 +1807,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
|
||||
*s = CAR;
|
||||
} else if (*s == '\\' && s[1] != NUL) {
|
||||
s++;
|
||||
/* Change NL to CR here too, so that this works:
|
||||
* :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
|
||||
* abc\
|
||||
* def
|
||||
* Not when called from vim_regexec_nl().
|
||||
*/
|
||||
// Change NL to CR here too, so that this works:
|
||||
// :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
|
||||
// abc{backslash}
|
||||
// def
|
||||
// Not when called from vim_regexec_nl().
|
||||
if (*s == NL && !rsm.sm_line_lbr) {
|
||||
*s = CAR;
|
||||
}
|
||||
@ -2172,10 +2107,8 @@ char *reg_submatch(int no)
|
||||
if (rsm.sm_match == NULL) {
|
||||
ssize_t len;
|
||||
|
||||
/*
|
||||
* First round: compute the length and allocate memory.
|
||||
* Second round: copy the text.
|
||||
*/
|
||||
// First round: compute the length and allocate memory.
|
||||
// Second round: copy the text.
|
||||
for (round = 1; round <= 2; round++) {
|
||||
lnum = rsm.sm_mmatch->startpos[no].lnum;
|
||||
if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) {
|
||||
@ -2216,7 +2149,7 @@ char *reg_submatch(int no)
|
||||
len++;
|
||||
}
|
||||
if (round == 2) {
|
||||
STRNCPY(retval + len, reg_getline_submatch(lnum),
|
||||
STRNCPY(retval + len, reg_getline_submatch(lnum), // NOLINT(runtime/printf)
|
||||
rsm.sm_mmatch->endpos[no].col);
|
||||
}
|
||||
len += rsm.sm_mmatch->endpos[no].col;
|
||||
@ -2327,12 +2260,10 @@ static char_u regname[][30] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Compile a regular expression into internal code.
|
||||
* Returns the program in allocated memory.
|
||||
* Use vim_regfree() to free the memory.
|
||||
* Returns NULL for an error.
|
||||
*/
|
||||
// Compile a regular expression into internal code.
|
||||
// Returns the program in allocated memory.
|
||||
// Use vim_regfree() to free the memory.
|
||||
// Returns NULL for an error.
|
||||
regprog_T *vim_regcomp(char *expr_arg, int re_flags)
|
||||
{
|
||||
regprog_T *prog = NULL;
|
||||
@ -2413,9 +2344,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
|
||||
return prog;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a compiled regexp program, returned by vim_regcomp().
|
||||
*/
|
||||
// Free a compiled regexp program, returned by vim_regcomp().
|
||||
void vim_regfree(regprog_T *prog)
|
||||
{
|
||||
if (prog != NULL) {
|
||||
|
@ -1,137 +1,130 @@
|
||||
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
||||
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||||
|
||||
/*
|
||||
*
|
||||
* Backtracking regular expression implementation.
|
||||
*
|
||||
* This file is included in "regexp.c".
|
||||
*
|
||||
* NOTICE:
|
||||
*
|
||||
* This is NOT the original regular expression code as written by Henry
|
||||
* Spencer. This code has been modified specifically for use with the VIM
|
||||
* editor, and should not be used separately from Vim. If you want a good
|
||||
* regular expression library, get the original code. The copyright notice
|
||||
* that follows is from the original.
|
||||
*
|
||||
* END NOTICE
|
||||
*
|
||||
* Copyright (c) 1986 by University of Toronto.
|
||||
* Written by Henry Spencer. Not derived from licensed software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any
|
||||
* purpose on any computer system, and to redistribute it freely,
|
||||
* subject to the following restrictions:
|
||||
*
|
||||
* 1. The author is not responsible for the consequences of use of
|
||||
* this software, no matter how awful, even if they arise
|
||||
* from defects in it.
|
||||
*
|
||||
* 2. The origin of this software must not be misrepresented, either
|
||||
* by explicit claim or by omission.
|
||||
*
|
||||
* 3. Altered versions must be plainly marked as such, and must not
|
||||
* be misrepresented as being the original software.
|
||||
*
|
||||
* Beware that some of this code is subtly aware of the way operator
|
||||
* precedence is structured in regular expressions. Serious changes in
|
||||
* regular-expression syntax might require a total rethink.
|
||||
*
|
||||
* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
|
||||
* Webb, Ciaran McCreesh and Bram Moolenaar.
|
||||
* Named character class support added by Walter Briscoe (1998 Jul 01)
|
||||
*/
|
||||
// Backtracking regular expression implementation.
|
||||
//
|
||||
// This file is included in "regexp.c".
|
||||
//
|
||||
// NOTICE:
|
||||
//
|
||||
// This is NOT the original regular expression code as written by Henry
|
||||
// Spencer. This code has been modified specifically for use with the VIM
|
||||
// editor, and should not be used separately from Vim. If you want a good
|
||||
// regular expression library, get the original code. The copyright notice
|
||||
// that follows is from the original.
|
||||
//
|
||||
// END NOTICE
|
||||
//
|
||||
// Copyright (c) 1986 by University of Toronto.
|
||||
// Written by Henry Spencer. Not derived from licensed software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any
|
||||
// purpose on any computer system, and to redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The author is not responsible for the consequences of use of
|
||||
// this software, no matter how awful, even if they arise
|
||||
// from defects in it.
|
||||
//
|
||||
// 2. The origin of this software must not be misrepresented, either
|
||||
// by explicit claim or by omission.
|
||||
//
|
||||
// 3. Altered versions must be plainly marked as such, and must not
|
||||
// be misrepresented as being the original software.
|
||||
//
|
||||
// Beware that some of this code is subtly aware of the way operator
|
||||
// precedence is structured in regular expressions. Serious changes in
|
||||
// regular-expression syntax might require a total rethink.
|
||||
//
|
||||
// Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
|
||||
// Webb, Ciaran McCreesh and Bram Moolenaar.
|
||||
// Named character class support added by Walter Briscoe (1998 Jul 01)
|
||||
|
||||
/*
|
||||
* The "internal use only" fields in regexp_defs.h are present to pass info from
|
||||
* compile to execute that permits the execute phase to run lots faster on
|
||||
* simple cases. They are:
|
||||
*
|
||||
* regstart char that must begin a match; NUL if none obvious; Can be a
|
||||
* multi-byte character.
|
||||
* reganch is the match anchored (at beginning-of-line only)?
|
||||
* regmust string (pointer into program) that match must include, or NULL
|
||||
* regmlen length of regmust string
|
||||
* regflags RF_ values or'ed together
|
||||
*
|
||||
* Regstart and reganch permit very fast decisions on suitable starting points
|
||||
* for a match, cutting down the work a lot. Regmust permits fast rejection
|
||||
* of lines that cannot possibly match. The regmust tests are costly enough
|
||||
* that vim_regcomp() supplies a regmust only if the r.e. contains something
|
||||
* potentially expensive (at present, the only such thing detected is * or +
|
||||
* at the start of the r.e., which can involve a lot of backup). Regmlen is
|
||||
* supplied because the test in vim_regexec() needs it and vim_regcomp() is
|
||||
* computing it anyway.
|
||||
*/
|
||||
// The "internal use only" fields in regexp_defs.h are present to pass info from
|
||||
// compile to execute that permits the execute phase to run lots faster on
|
||||
// simple cases. They are:
|
||||
//
|
||||
// regstart char that must begin a match; NUL if none obvious; Can be a
|
||||
// multi-byte character.
|
||||
// reganch is the match anchored (at beginning-of-line only)?
|
||||
// regmust string (pointer into program) that match must include, or NULL
|
||||
// regmlen length of regmust string
|
||||
// regflags RF_ values or'ed together
|
||||
//
|
||||
// Regstart and reganch permit very fast decisions on suitable starting points
|
||||
// for a match, cutting down the work a lot. Regmust permits fast rejection
|
||||
// of lines that cannot possibly match. The regmust tests are costly enough
|
||||
// that vim_regcomp() supplies a regmust only if the r.e. contains something
|
||||
// potentially expensive (at present, the only such thing detected is * or +
|
||||
// at the start of the r.e., which can involve a lot of backup). Regmlen is
|
||||
// supplied because the test in vim_regexec() needs it and vim_regcomp() is
|
||||
// computing it anyway.
|
||||
|
||||
/*
|
||||
* Structure for regexp "program". This is essentially a linear encoding
|
||||
* of a nondeterministic finite-state machine (aka syntax charts or
|
||||
* "railroad normal form" in parsing technology). Each node is an opcode
|
||||
* plus a "next" pointer, possibly plus an operand. "Next" pointers of
|
||||
* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
|
||||
* pointer with a BRANCH on both ends of it is connecting two alternatives.
|
||||
* (Here we have one of the subtle syntax dependencies: an individual BRANCH
|
||||
* (as opposed to a collection of them) is never concatenated with anything
|
||||
* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
|
||||
* node points to the node after the stuff to be repeated.
|
||||
* The operand of some types of node is a literal string; for others, it is a
|
||||
* node leading into a sub-FSM. In particular, the operand of a BRANCH node
|
||||
* is the first node of the branch.
|
||||
* (NB this is *not* a tree structure: the tail of the branch connects to the
|
||||
* thing following the set of BRANCHes.)
|
||||
*
|
||||
* pattern is coded like:
|
||||
*
|
||||
* +-----------------+
|
||||
* | V
|
||||
* <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
|
||||
* | ^ | ^
|
||||
* +------+ +----------+
|
||||
*
|
||||
*
|
||||
* +------------------+
|
||||
* V |
|
||||
* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
|
||||
* | | ^ ^
|
||||
* | +---------------+ |
|
||||
* +---------------------------------------------+
|
||||
*
|
||||
*
|
||||
* +----------------------+
|
||||
* V |
|
||||
* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
|
||||
* | | ^ ^
|
||||
* | +-----------+ |
|
||||
* +--------------------------------------------------+
|
||||
*
|
||||
*
|
||||
* +-------------------------+
|
||||
* V |
|
||||
* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
|
||||
* | | ^
|
||||
* | +----------------+
|
||||
* +-----------------------------------------------+
|
||||
*
|
||||
*
|
||||
* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
|
||||
* | | ^ ^
|
||||
* | +----------------+ |
|
||||
* +--------------------------------+
|
||||
*
|
||||
* +---------+
|
||||
* | V
|
||||
* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
|
||||
* | | | | ^ ^
|
||||
* | | | +-----+ |
|
||||
* | | +----------------+ |
|
||||
* | +---------------------------+ |
|
||||
* +------------------------------------------------------+
|
||||
*
|
||||
* They all start with a BRANCH for "\|" alternatives, even when there is only
|
||||
* one alternative.
|
||||
*/
|
||||
// Structure for regexp "program". This is essentially a linear encoding
|
||||
// of a nondeterministic finite-state machine (aka syntax charts or
|
||||
// "railroad normal form" in parsing technology). Each node is an opcode
|
||||
// plus a "next" pointer, possibly plus an operand. "Next" pointers of
|
||||
// all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
|
||||
// pointer with a BRANCH on both ends of it is connecting two alternatives.
|
||||
// (Here we have one of the subtle syntax dependencies: an individual BRANCH
|
||||
// (as opposed to a collection of them) is never concatenated with anything
|
||||
// because of operator precedence). The "next" pointer of a BRACES_COMPLEX
|
||||
// node points to the node after the stuff to be repeated.
|
||||
// The operand of some types of node is a literal string; for others, it is a
|
||||
// node leading into a sub-FSM. In particular, the operand of a BRANCH node
|
||||
// is the first node of the branch.
|
||||
// (NB this is *not* a tree structure: the tail of the branch connects to the
|
||||
// thing following the set of BRANCHes.)
|
||||
//
|
||||
// pattern is coded like:
|
||||
//
|
||||
// +-----------------+
|
||||
// | V
|
||||
// <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END
|
||||
// | ^ | ^
|
||||
// +------+ +----------+
|
||||
//
|
||||
//
|
||||
// +------------------+
|
||||
// V |
|
||||
// <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
|
||||
// | | ^ ^
|
||||
// | +---------------+ |
|
||||
// +---------------------------------------------+
|
||||
//
|
||||
//
|
||||
// +----------------------+
|
||||
// V |
|
||||
// <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
|
||||
// | | ^ ^
|
||||
// | +-----------+ |
|
||||
// +--------------------------------------------------+
|
||||
//
|
||||
//
|
||||
// +-------------------------+
|
||||
// V |
|
||||
// <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
|
||||
// | | ^
|
||||
// | +----------------+
|
||||
// +-----------------------------------------------+
|
||||
//
|
||||
//
|
||||
// <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
|
||||
// | | ^ ^
|
||||
// | +----------------+ |
|
||||
// +--------------------------------+
|
||||
//
|
||||
// +---------+
|
||||
// | V
|
||||
// \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
|
||||
// | | | | ^ ^
|
||||
// | | | +-----+ |
|
||||
// | | +----------------+ |
|
||||
// | +---------------------------+ |
|
||||
// +------------------------------------------------------+
|
||||
//
|
||||
// They all start with a BRANCH for "\|" alternatives, even when there is only
|
||||
// one alternative.
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
@ -141,9 +134,7 @@
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/regexp.h"
|
||||
|
||||
/*
|
||||
* The opcodes are:
|
||||
*/
|
||||
// The opcodes are:
|
||||
|
||||
// definition number opnd? meaning
|
||||
#define END 0 // End of program or NOMATCH operand.
|
||||
@ -240,9 +231,7 @@
|
||||
#define RE_VISUAL 208 // Match Visual area
|
||||
#define RE_COMPOSING 209 // any composing characters
|
||||
|
||||
/*
|
||||
* Flags to be passed up and down.
|
||||
*/
|
||||
// Flags to be passed up and down.
|
||||
#define HASWIDTH 0x1 // Known never to match null string.
|
||||
#define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand.
|
||||
#define SPSTART 0x4 // Starts with * or +.
|
||||
@ -273,10 +262,8 @@ static int classcodes[] = {
|
||||
UPPER, NUPPER
|
||||
};
|
||||
|
||||
/*
|
||||
* When regcode is set to this value, code is not emitted and size is computed
|
||||
* instead.
|
||||
*/
|
||||
// When regcode is set to this value, code is not emitted and size is computed
|
||||
// instead.
|
||||
#define JUST_CALC_SIZE ((char_u *)-1)
|
||||
|
||||
// Values for rs_state in regitem_T.
|
||||
@ -297,11 +284,9 @@ typedef enum regstate_E {
|
||||
RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match
|
||||
} regstate_T;
|
||||
|
||||
/*
|
||||
* Structure used to save the current input state, when it needs to be
|
||||
* restored after trying a match. Used by reg_save() and reg_restore().
|
||||
* Also stores the length of "backpos".
|
||||
*/
|
||||
// Structure used to save the current input state, when it needs to be
|
||||
// restored after trying a match. Used by reg_save() and reg_restore().
|
||||
// Also stores the length of "backpos".
|
||||
typedef struct {
|
||||
union {
|
||||
char_u *ptr; // rex.input pointer, for single-line regexp
|
||||
@ -327,12 +312,10 @@ typedef struct regbehind_S {
|
||||
save_se_T save_end[NSUBEXP];
|
||||
} regbehind_T;
|
||||
|
||||
/*
|
||||
* When there are alternatives a regstate_T is put on the regstack to remember
|
||||
* what we are doing.
|
||||
* Before it may be another type of item, depending on rs_state, to remember
|
||||
* more things.
|
||||
*/
|
||||
// When there are alternatives a regstate_T is put on the regstack to remember
|
||||
// what we are doing.
|
||||
// Before it may be another type of item, depending on rs_state, to remember
|
||||
// more things.
|
||||
typedef struct regitem_S {
|
||||
regstate_T rs_state; // what we are doing, one of RS_ above
|
||||
int16_t rs_no; // submatch nr or BEHIND/NOBEHIND
|
||||
@ -359,69 +342,63 @@ typedef struct backpos_S {
|
||||
regsave_T bp_pos; // last input position
|
||||
} backpos_T;
|
||||
|
||||
/*
|
||||
* "regstack" and "backpos" are used by regmatch(). They are kept over calls
|
||||
* to avoid invoking malloc() and free() often.
|
||||
* "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
|
||||
* or regbehind_T.
|
||||
* "backpos_T" is a table with backpos_T for BACK
|
||||
*/
|
||||
// "regstack" and "backpos" are used by regmatch(). They are kept over calls
|
||||
// to avoid invoking malloc() and free() often.
|
||||
// "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
|
||||
// or regbehind_T.
|
||||
// "backpos_T" is a table with backpos_T for BACK
|
||||
static garray_T regstack = GA_EMPTY_INIT_VALUE;
|
||||
static garray_T backpos = GA_EMPTY_INIT_VALUE;
|
||||
|
||||
static regsave_T behind_pos;
|
||||
|
||||
/*
|
||||
* Both for regstack and backpos tables we use the following strategy of
|
||||
* allocation (to reduce malloc/free calls):
|
||||
* - Initial size is fairly small.
|
||||
* - When needed, the tables are grown bigger (8 times at first, double after
|
||||
* that).
|
||||
* - After executing the match we free the memory only if the array has grown.
|
||||
* Thus the memory is kept allocated when it's at the initial size.
|
||||
* This makes it fast while not keeping a lot of memory allocated.
|
||||
* A three times speed increase was observed when using many simple patterns.
|
||||
*/
|
||||
// Both for regstack and backpos tables we use the following strategy of
|
||||
// allocation (to reduce malloc/free calls):
|
||||
// - Initial size is fairly small.
|
||||
// - When needed, the tables are grown bigger (8 times at first, double after
|
||||
// that).
|
||||
// - After executing the match we free the memory only if the array has grown.
|
||||
// Thus the memory is kept allocated when it's at the initial size.
|
||||
// This makes it fast while not keeping a lot of memory allocated.
|
||||
// A three times speed increase was observed when using many simple patterns.
|
||||
#define REGSTACK_INITIAL 2048
|
||||
#define BACKPOS_INITIAL 64
|
||||
|
||||
/*
|
||||
* Opcode notes:
|
||||
*
|
||||
* BRANCH The set of branches constituting a single choice are hooked
|
||||
* together with their "next" pointers, since precedence prevents
|
||||
* anything being concatenated to any individual branch. The
|
||||
* "next" pointer of the last BRANCH in a choice points to the
|
||||
* thing following the whole choice. This is also where the
|
||||
* final "next" pointer of each individual branch points; each
|
||||
* branch starts with the operand node of a BRANCH node.
|
||||
*
|
||||
* BACK Normal "next" pointers all implicitly point forward; BACK
|
||||
* exists to make loop structures possible.
|
||||
*
|
||||
* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
|
||||
* BRANCH structures using BACK. Simple cases (one character
|
||||
* per match) are implemented with STAR and PLUS for speed
|
||||
* and to minimize recursive plunges.
|
||||
*
|
||||
* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
|
||||
* node, and defines the min and max limits to be used for that
|
||||
* node.
|
||||
*
|
||||
* MOPEN,MCLOSE ...are numbered at compile time.
|
||||
* ZOPEN,ZCLOSE ...ditto
|
||||
*/
|
||||
|
||||
/*
|
||||
* A node is one char of opcode followed by two chars of "next" pointer.
|
||||
* "Next" pointers are stored as two 8-bit bytes, high order first. The
|
||||
* value is a positive offset from the opcode of the node containing it.
|
||||
* An operand, if any, simply follows the node. (Note that much of the
|
||||
* code generation knows about this implicit relationship.)
|
||||
*
|
||||
* Using two bytes for the "next" pointer is vast overkill for most things,
|
||||
* but allows patterns to get big without disasters.
|
||||
*/
|
||||
// Opcode notes:
|
||||
//
|
||||
// BRANCH The set of branches constituting a single choice are hooked
|
||||
// together with their "next" pointers, since precedence prevents
|
||||
// anything being concatenated to any individual branch. The
|
||||
// "next" pointer of the last BRANCH in a choice points to the
|
||||
// thing following the whole choice. This is also where the
|
||||
// final "next" pointer of each individual branch points; each
|
||||
// branch starts with the operand node of a BRANCH node.
|
||||
//
|
||||
// BACK Normal "next" pointers all implicitly point forward; BACK
|
||||
// exists to make loop structures possible.
|
||||
//
|
||||
// STAR,PLUS '=', and complex '*' and '+', are implemented as circular
|
||||
// BRANCH structures using BACK. Simple cases (one character
|
||||
// per match) are implemented with STAR and PLUS for speed
|
||||
// and to minimize recursive plunges.
|
||||
//
|
||||
// BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
|
||||
// node, and defines the min and max limits to be used for that
|
||||
// node.
|
||||
//
|
||||
// MOPEN,MCLOSE ...are numbered at compile time.
|
||||
// ZOPEN,ZCLOSE ...ditto
|
||||
///
|
||||
//
|
||||
//
|
||||
// A node is one char of opcode followed by two chars of "next" pointer.
|
||||
// "Next" pointers are stored as two 8-bit bytes, high order first. The
|
||||
// value is a positive offset from the opcode of the node containing it.
|
||||
// An operand, if any, simply follows the node. (Note that much of the
|
||||
// code generation knows about this implicit relationship.)
|
||||
//
|
||||
// Using two bytes for the "next" pointer is vast overkill for most things,
|
||||
// but allows patterns to get big without disasters.
|
||||
#define OP(p) ((int)(*(p)))
|
||||
#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
|
||||
#define OPERAND(p) ((p) + 3)
|
||||
@ -449,9 +426,7 @@ static int regnarrate = 0;
|
||||
# include "regexp_bt.c.generated.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Setup to parse the regexp. Used once to get the length and once to do it.
|
||||
*/
|
||||
// Setup to parse the regexp. Used once to get the length and once to do it.
|
||||
static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp()
|
||||
{
|
||||
initchr(expr);
|
||||
@ -484,9 +459,7 @@ static bool use_multibytecode(int c)
|
||||
|| utf_iscomposing(c));
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit (if appropriate) a byte of code
|
||||
*/
|
||||
// Emit (if appropriate) a byte of code
|
||||
static void regc(int b)
|
||||
{
|
||||
if (regcode == JUST_CALC_SIZE) {
|
||||
@ -496,9 +469,7 @@ static void regc(int b)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit (if appropriate) a multi-byte character of code
|
||||
*/
|
||||
// Emit (if appropriate) a multi-byte character of code
|
||||
static void regmbc(int c)
|
||||
{
|
||||
if (regcode == JUST_CALC_SIZE) {
|
||||
@ -508,11 +479,9 @@ static void regmbc(int c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Produce the bytes for equivalence class "c".
|
||||
* Currently only handles latin1, latin9 and utf-8.
|
||||
* NOTE: When changing this function, also change nfa_emit_equi_class()
|
||||
*/
|
||||
// Produce the bytes for equivalence class "c".
|
||||
// Currently only handles latin1, latin9 and utf-8.
|
||||
// NOTE: When changing this function, also change nfa_emit_equi_class()
|
||||
static void reg_equi_class(int c)
|
||||
{
|
||||
{
|
||||
@ -1481,10 +1450,8 @@ static void reg_equi_class(int c)
|
||||
regmbc(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit a node.
|
||||
* Return pointer to generated code.
|
||||
*/
|
||||
// Emit a node.
|
||||
// Return pointer to generated code.
|
||||
static char_u *regnode(int op)
|
||||
{
|
||||
char_u *ret;
|
||||
@ -1500,9 +1467,7 @@ static char_u *regnode(int op)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write a four bytes number at "p" and return pointer to the next char.
|
||||
*/
|
||||
// Write a four bytes number at "p" and return pointer to the next char.
|
||||
static char_u *re_put_uint32(char_u *p, uint32_t val)
|
||||
{
|
||||
*p++ = (char_u)((val >> 24) & 0377);
|
||||
@ -1512,11 +1477,9 @@ static char_u *re_put_uint32(char_u *p, uint32_t val)
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* regnext - dig the "next" pointer out of a node
|
||||
* Returns NULL when calculating size, when there is no next item and when
|
||||
* there is an error.
|
||||
*/
|
||||
// regnext - dig the "next" pointer out of a node
|
||||
// Returns NULL when calculating size, when there is no next item and when
|
||||
// there is an error.
|
||||
static char_u *regnext(char_u *p)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
@ -1573,9 +1536,7 @@ static void regtail(char_u *p, char_u *val)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Like regtail, on item after a BRANCH; nop if none.
|
||||
*/
|
||||
// Like regtail, on item after a BRANCH; nop if none.
|
||||
static void regoptail(char_u *p, char_u *val)
|
||||
{
|
||||
// When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless"
|
||||
@ -1587,11 +1548,9 @@ static void regoptail(char_u *p, char_u *val)
|
||||
regtail(OPERAND(p), val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an operator in front of already-emitted operand
|
||||
*
|
||||
* Means relocating the operand.
|
||||
*/
|
||||
// Insert an operator in front of already-emitted operand
|
||||
//
|
||||
// Means relocating the operand.
|
||||
static void reginsert(int op, char_u *opnd)
|
||||
{
|
||||
char_u *src;
|
||||
@ -1615,10 +1574,8 @@ static void reginsert(int op, char_u *opnd)
|
||||
*place = NUL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an operator in front of already-emitted operand.
|
||||
* Add a number to the operator.
|
||||
*/
|
||||
// Insert an operator in front of already-emitted operand.
|
||||
// Add a number to the operator.
|
||||
static void reginsert_nr(int op, long val, char_u *opnd)
|
||||
{
|
||||
char_u *src;
|
||||
@ -1644,12 +1601,10 @@ static void reginsert_nr(int op, long val, char_u *opnd)
|
||||
re_put_uint32(place, (uint32_t)val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an operator in front of already-emitted operand.
|
||||
* The operator has the given limit values as operands. Also set next pointer.
|
||||
*
|
||||
* Means relocating the operand.
|
||||
*/
|
||||
// Insert an operator in front of already-emitted operand.
|
||||
// The operator has the given limit values as operands. Also set next pointer.
|
||||
//
|
||||
// Means relocating the operand.
|
||||
static void reginsert_limits(int op, long minval, long maxval, char_u *opnd)
|
||||
{
|
||||
char_u *src;
|
||||
@ -1704,13 +1659,11 @@ static int seen_endbrace(int refnum)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse the lowest level.
|
||||
*
|
||||
* Optimization: gobbles an entire sequence of ordinary characters so that
|
||||
* it can turn them into a single node, which is smaller to store and
|
||||
* faster to run. Don't do this when one_exactly is set.
|
||||
*/
|
||||
// Parse the lowest level.
|
||||
//
|
||||
// Optimization: gobbles an entire sequence of ordinary characters so that
|
||||
// it can turn them into a single node, which is smaller to store and
|
||||
// faster to run. Don't do this when one_exactly is set.
|
||||
static char_u *regatom(int *flagp)
|
||||
{
|
||||
char_u *ret;
|
||||
@ -2289,8 +2242,7 @@ collection:
|
||||
if (c_class != 0) {
|
||||
// produce equivalence class
|
||||
reg_equi_class(c_class);
|
||||
} else if ((c_class =
|
||||
get_coll_element(®parse)) != 0) {
|
||||
} else if ((c_class = get_coll_element(®parse)) != 0) {
|
||||
// produce a collating element
|
||||
regmbc(c_class);
|
||||
} else {
|
||||
@ -2466,7 +2418,7 @@ do_multibyte:
|
||||
for (len = 0; c != NUL && (len == 0
|
||||
|| (re_multi_type(peekchr()) == NOT_MULTI
|
||||
&& !one_exactly
|
||||
&& !is_Magic(c))); ++len) {
|
||||
&& !is_Magic(c))); len++) {
|
||||
c = no_Magic(c);
|
||||
{
|
||||
regmbc(c);
|
||||
@ -2500,15 +2452,13 @@ do_multibyte:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse something followed by possible [*+=].
|
||||
*
|
||||
* Note that the branching code sequences used for = and the general cases
|
||||
* of * and + are somewhat optimized: they use the same NOTHING node as
|
||||
* both the endmarker for their branch list and the body of the last branch.
|
||||
* It might seem that this node could be dispensed with entirely, but the
|
||||
* endmarker role is not redundant.
|
||||
*/
|
||||
// Parse something followed by possible [*+=].
|
||||
//
|
||||
// Note that the branching code sequences used for = and the general cases
|
||||
// of * and + are somewhat optimized: they use the same NOTHING node as
|
||||
// both the endmarker for their branch list and the body of the last branch.
|
||||
// It might seem that this node could be dispensed with entirely, but the
|
||||
// endmarker role is not redundant.
|
||||
static char_u *regpiece(int *flagp)
|
||||
{
|
||||
char_u *ret;
|
||||
@ -2644,10 +2594,8 @@ static char_u *regpiece(int *flagp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse one alternative of an | or & operator.
|
||||
* Implements the concatenation operator.
|
||||
*/
|
||||
// Parse one alternative of an | or & operator.
|
||||
// Implements the concatenation operator.
|
||||
static char_u *regconcat(int *flagp)
|
||||
{
|
||||
char_u *first = NULL;
|
||||
@ -2722,10 +2670,8 @@ static char_u *regconcat(int *flagp)
|
||||
return first;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse one alternative of an | operator.
|
||||
* Implements the & operator.
|
||||
*/
|
||||
// Parse one alternative of an | operator.
|
||||
// Implements the & operator.
|
||||
static char_u *regbranch(int *flagp)
|
||||
{
|
||||
char_u *ret;
|
||||
@ -2874,27 +2820,25 @@ static char_u *reg(int paren, int *flagp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* bt_regcomp() - compile a regular expression into internal code for the
|
||||
* traditional back track matcher.
|
||||
* Returns the program in allocated space. Returns NULL for an error.
|
||||
*
|
||||
* We can't allocate space until we know how big the compiled form will be,
|
||||
* but we can't compile it (and thus know how big it is) until we've got a
|
||||
* place to put the code. So we cheat: we compile it twice, once with code
|
||||
* generation turned off and size counting turned on, and once "for real".
|
||||
* This also means that we don't allocate space until we are sure that the
|
||||
* thing really will compile successfully, and we never have to move the
|
||||
* code and thus invalidate pointers into it. (Note that it has to be in
|
||||
* one piece because free() must be able to free it all.)
|
||||
*
|
||||
* Whether upper/lower case is to be ignored is decided when executing the
|
||||
* program, it does not matter here.
|
||||
*
|
||||
* Beware that the optimization-preparation code in here knows about some
|
||||
* of the structure of the compiled regexp.
|
||||
* "re_flags": RE_MAGIC and/or RE_STRING.
|
||||
*/
|
||||
// bt_regcomp() - compile a regular expression into internal code for the
|
||||
// traditional back track matcher.
|
||||
// Returns the program in allocated space. Returns NULL for an error.
|
||||
//
|
||||
// We can't allocate space until we know how big the compiled form will be,
|
||||
// but we can't compile it (and thus know how big it is) until we've got a
|
||||
// place to put the code. So we cheat: we compile it twice, once with code
|
||||
// generation turned off and size counting turned on, and once "for real".
|
||||
// This also means that we don't allocate space until we are sure that the
|
||||
// thing really will compile successfully, and we never have to move the
|
||||
// code and thus invalidate pointers into it. (Note that it has to be in
|
||||
// one piece because free() must be able to free it all.)
|
||||
//
|
||||
// Whether upper/lower case is to be ignored is decided when executing the
|
||||
// program, it does not matter here.
|
||||
//
|
||||
// Beware that the optimization-preparation code in here knows about some
|
||||
// of the structure of the compiled regexp.
|
||||
// "re_flags": RE_MAGIC and/or RE_STRING.
|
||||
static regprog_T *bt_regcomp(char_u *expr, int re_flags)
|
||||
{
|
||||
char_u *scan;
|
||||
@ -2999,19 +2943,15 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)
|
||||
return (regprog_T *)r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if during the previous call to vim_regcomp the EOL item "$" has been
|
||||
* found. This is messy, but it works fine.
|
||||
*/
|
||||
// Check if during the previous call to vim_regcomp the EOL item "$" has been
|
||||
// found. This is messy, but it works fine.
|
||||
int vim_regcomp_had_eol(void)
|
||||
{
|
||||
return had_eol;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a number after a backslash that is inside [].
|
||||
* When nothing is recognized return a backslash.
|
||||
*/
|
||||
// Get a number after a backslash that is inside [].
|
||||
// When nothing is recognized return a backslash.
|
||||
static int coll_get_char(void)
|
||||
{
|
||||
int64_t nr = -1;
|
||||
@ -3037,9 +2977,7 @@ static int coll_get_char(void)
|
||||
return (int)nr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a compiled regexp program, returned by bt_regcomp().
|
||||
*/
|
||||
// Free a compiled regexp program, returned by bt_regcomp().
|
||||
static void bt_regfree(regprog_T *prog)
|
||||
{
|
||||
xfree(prog);
|
||||
@ -3047,11 +2985,9 @@ static void bt_regfree(regprog_T *prog)
|
||||
|
||||
#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
|
||||
|
||||
/*
|
||||
* The arguments from BRACE_LIMITS are stored here. They are actually local
|
||||
* to regmatch(), but they are here to reduce the amount of stack space used
|
||||
* (it can be called recursively many times).
|
||||
*/
|
||||
// The arguments from BRACE_LIMITS are stored here. They are actually local
|
||||
// to regmatch(), but they are here to reduce the amount of stack space used
|
||||
// (it can be called recursively many times).
|
||||
static long bl_minval;
|
||||
static long bl_maxval;
|
||||
|
||||
@ -3108,13 +3044,11 @@ static bool reg_save_equal(const regsave_T *save)
|
||||
else /* NOLINT */ \
|
||||
*(pp) = (savep)->se_u.ptr; }
|
||||
|
||||
/*
|
||||
* Tentatively set the sub-expression start to the current position (after
|
||||
* calling regmatch() they will have changed). Need to save the existing
|
||||
* values for when there is no match.
|
||||
* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
|
||||
* depending on REG_MULTI.
|
||||
*/
|
||||
// Tentatively set the sub-expression start to the current position (after
|
||||
// calling regmatch() they will have changed). Need to save the existing
|
||||
// values for when there is no match.
|
||||
// Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
|
||||
// depending on REG_MULTI.
|
||||
static void save_se_multi(save_se_T *savep, lpos_T *posp)
|
||||
{
|
||||
savep->se_u.pos = *posp;
|
||||
@ -3494,10 +3428,8 @@ do_class:
|
||||
return (int)count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Push an item onto the regstack.
|
||||
* Returns pointer to new item. Returns NULL when out of memory.
|
||||
*/
|
||||
// Push an item onto the regstack.
|
||||
// Returns pointer to new item. Returns NULL when out of memory.
|
||||
static regitem_T *regstack_push(regstate_T state, char_u *scan)
|
||||
{
|
||||
regitem_T *rp;
|
||||
@ -3516,9 +3448,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan)
|
||||
return rp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pop an item from the regstack.
|
||||
*/
|
||||
// Pop an item from the regstack.
|
||||
static void regstack_pop(char_u **scan)
|
||||
{
|
||||
regitem_T *rp;
|
||||
@ -4643,7 +4573,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
|
||||
// Pop the state. Restore pointers when there is no match.
|
||||
if (status == RA_NOMATCH) {
|
||||
reg_restore(&rp->rs_un.regsave, &backpos);
|
||||
--brace_count[rp->rs_no]; // decrement match count
|
||||
brace_count[rp->rs_no]--; // decrement match count
|
||||
}
|
||||
regstack_pop(&scan);
|
||||
break;
|
||||
@ -4653,7 +4583,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out)
|
||||
if (status == RA_NOMATCH) {
|
||||
// There was no match, but we did find enough matches.
|
||||
reg_restore(&rp->rs_un.regsave, &backpos);
|
||||
--brace_count[rp->rs_no];
|
||||
brace_count[rp->rs_no]--;
|
||||
// continue with the items after "\{}"
|
||||
status = RA_CONT;
|
||||
}
|
||||
@ -5247,9 +5177,7 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T
|
||||
return bt_regexec_both(NULL, col, tm, timed_out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
|
||||
*/
|
||||
// Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
|
||||
static int re_num_cmp(uint32_t val, char_u *scan)
|
||||
{
|
||||
uint32_t n = (uint32_t)OPERAND_MIN(scan);
|
||||
@ -5265,9 +5193,7 @@ static int re_num_cmp(uint32_t val, char_u *scan)
|
||||
|
||||
#ifdef BT_REGEXP_DUMP
|
||||
|
||||
/*
|
||||
* regdump - dump a regexp onto stdout in vaguely comprehensible form
|
||||
*/
|
||||
// regdump - dump a regexp onto stdout in vaguely comprehensible form
|
||||
static void regdump(char_u *pattern, bt_regprog_T *r)
|
||||
{
|
||||
char_u *s;
|
||||
@ -5353,9 +5279,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r)
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
|
||||
/*
|
||||
* regprop - printable representation of opcode
|
||||
*/
|
||||
// regprop - printable representation of opcode
|
||||
static char_u *regprop(char_u *op)
|
||||
{
|
||||
char *p;
|
||||
|
@ -1,13 +1,11 @@
|
||||
/*
|
||||
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
|
||||
*
|
||||
* This is NOT the original regular expression code as written by Henry
|
||||
* Spencer. This code has been modified specifically for use with Vim, and
|
||||
* should not be used apart from compiling Vim. If you want a good regular
|
||||
* expression library, get the original code.
|
||||
*
|
||||
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
|
||||
*/
|
||||
// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
|
||||
//
|
||||
// This is NOT the original regular expression code as written by Henry
|
||||
// Spencer. This code has been modified specifically for use with Vim, and
|
||||
// should not be used apart from compiling Vim. If you want a good regular
|
||||
// expression library, get the original code.
|
||||
//
|
||||
// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
|
||||
|
||||
#ifndef NVIM_REGEXP_DEFS_H
|
||||
#define NVIM_REGEXP_DEFS_H
|
||||
@ -17,18 +15,14 @@
|
||||
#include "nvim/pos.h"
|
||||
#include "nvim/types.h"
|
||||
|
||||
/*
|
||||
* The number of sub-matches is limited to 10.
|
||||
* The first one (index 0) is the whole match, referenced with "\0".
|
||||
* The second one (index 1) is the first sub-match, referenced with "\1".
|
||||
* This goes up to the tenth (index 9), referenced with "\9".
|
||||
*/
|
||||
// The number of sub-matches is limited to 10.
|
||||
// The first one (index 0) is the whole match, referenced with "\0".
|
||||
// The second one (index 1) is the first sub-match, referenced with "\1".
|
||||
// This goes up to the tenth (index 9), referenced with "\9".
|
||||
#define NSUBEXP 10
|
||||
|
||||
/*
|
||||
* In the NFA engine: how many braces are allowed.
|
||||
* TODO(RE): Use dynamic memory allocation instead of static, like here
|
||||
*/
|
||||
// In the NFA engine: how many braces are allowed.
|
||||
// TODO(RE): Use dynamic memory allocation instead of static, like here
|
||||
#define NFA_MAX_BRACES 20
|
||||
|
||||
// In the NFA engine: how many states are allowed.
|
||||
@ -61,11 +55,9 @@ typedef struct {
|
||||
|
||||
#include "nvim/buffer_defs.h"
|
||||
|
||||
/*
|
||||
* Structure returned by vim_regcomp() to pass on to vim_regexec().
|
||||
* This is the general structure. For the actual matcher, two specific
|
||||
* structures are used. See code below.
|
||||
*/
|
||||
// Structure returned by vim_regcomp() to pass on to vim_regexec().
|
||||
// This is the general structure. For the actual matcher, two specific
|
||||
// structures are used. See code below.
|
||||
struct regprog {
|
||||
regengine_T *engine;
|
||||
unsigned regflags;
|
||||
@ -74,11 +66,9 @@ struct regprog {
|
||||
bool re_in_use; ///< prog is being executed
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used by the back track matcher.
|
||||
* These fields are only to be used in regexp.c!
|
||||
* See regexp.c for an explanation.
|
||||
*/
|
||||
// Structure used by the back track matcher.
|
||||
// These fields are only to be used in regexp.c!
|
||||
// See regexp.c for an explanation.
|
||||
typedef struct {
|
||||
// These four members implement regprog_T.
|
||||
regengine_T *engine;
|
||||
@ -107,9 +97,7 @@ struct nfa_state {
|
||||
int val;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used by the NFA matcher.
|
||||
*/
|
||||
// Structure used by the NFA matcher.
|
||||
typedef struct {
|
||||
// These four members implement regprog_T.
|
||||
regengine_T *engine;
|
||||
@ -133,11 +121,9 @@ typedef struct {
|
||||
nfa_state_T state[1]; // actually longer..
|
||||
} nfa_regprog_T;
|
||||
|
||||
/*
|
||||
* Structure to be used for single-line matching.
|
||||
* Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
|
||||
* When there is no match, the pointer is NULL.
|
||||
*/
|
||||
// Structure to be used for single-line matching.
|
||||
// Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
|
||||
// When there is no match, the pointer is NULL.
|
||||
typedef struct {
|
||||
regprog_T *regprog;
|
||||
char *startp[NSUBEXP];
|
||||
@ -145,11 +131,9 @@ typedef struct {
|
||||
bool rm_ic;
|
||||
} regmatch_T;
|
||||
|
||||
/*
|
||||
* Structure used to store external references: "\z\(\)" to "\z\1".
|
||||
* Use a reference count to avoid the need to copy this around. When it goes
|
||||
* from 1 to zero the matches need to be freed.
|
||||
*/
|
||||
// Structure used to store external references: "\z\(\)" to "\z\1".
|
||||
// Use a reference count to avoid the need to copy this around. When it goes
|
||||
// from 1 to zero the matches need to be freed.
|
||||
struct reg_extmatch {
|
||||
int16_t refcnt;
|
||||
char_u *matches[NSUBEXP];
|
||||
|
@ -1,11 +1,9 @@
|
||||
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
||||
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||||
|
||||
/*
|
||||
* NFA regular expression implementation.
|
||||
*
|
||||
* This file is included in "regexp.c".
|
||||
*/
|
||||
// NFA regular expression implementation.
|
||||
//
|
||||
// This file is included in "regexp.c".
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
@ -383,10 +381,8 @@ static void nfa_regcomp_start(char_u *expr, int re_flags)
|
||||
regcomp_start(expr, re_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out if the NFA state list starts with an anchor, must match at start
|
||||
* of the line.
|
||||
*/
|
||||
// Figure out if the NFA state list starts with an anchor, must match at start
|
||||
// of the line.
|
||||
static int nfa_get_reganch(nfa_state_T *start, int depth)
|
||||
{
|
||||
nfa_state_T *p = start;
|
||||
@ -441,10 +437,8 @@ static int nfa_get_reganch(nfa_state_T *start, int depth)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out if the NFA state list starts with a character which must match
|
||||
* at start of the match.
|
||||
*/
|
||||
// Figure out if the NFA state list starts with a character which must match
|
||||
// at start of the match.
|
||||
static int nfa_get_regstart(nfa_state_T *start, int depth)
|
||||
{
|
||||
nfa_state_T *p = start;
|
||||
@ -521,11 +515,9 @@ static int nfa_get_regstart(nfa_state_T *start, int depth)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out if the NFA state list contains just literal text and nothing
|
||||
* else. If so return a string in allocated memory with what must match after
|
||||
* regstart. Otherwise return NULL.
|
||||
*/
|
||||
// Figure out if the NFA state list contains just literal text and nothing
|
||||
// else. If so return a string in allocated memory with what must match after
|
||||
// regstart. Otherwise return NULL.
|
||||
static char_u *nfa_get_match_text(nfa_state_T *start)
|
||||
{
|
||||
nfa_state_T *p = start;
|
||||
@ -557,10 +549,8 @@ static char_u *nfa_get_match_text(nfa_state_T *start)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate more space for post_start. Called when
|
||||
* running above the estimated number of states.
|
||||
*/
|
||||
// Allocate more space for post_start. Called when
|
||||
// running above the estimated number of states.
|
||||
static void realloc_post_list(void)
|
||||
{
|
||||
// For weird patterns the number of states can be very high. Increasing by
|
||||
@ -572,16 +562,14 @@ static void realloc_post_list(void)
|
||||
post_start = new_start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search between "start" and "end" and try to recognize a
|
||||
* character class in expanded form. For example [0-9].
|
||||
* On success, return the id the character class to be emitted.
|
||||
* On failure, return 0 (=FAIL)
|
||||
* Start points to the first char of the range, while end should point
|
||||
* to the closing brace.
|
||||
* Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
|
||||
* need to be interpreted as [a-zA-Z].
|
||||
*/
|
||||
// Search between "start" and "end" and try to recognize a
|
||||
// character class in expanded form. For example [0-9].
|
||||
// On success, return the id the character class to be emitted.
|
||||
// On failure, return 0 (=FAIL)
|
||||
// Start points to the first char of the range, while end should point
|
||||
// to the closing brace.
|
||||
// Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
|
||||
// need to be interpreted as [a-zA-Z].
|
||||
static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
|
||||
{
|
||||
#define CLASS_not 0x80
|
||||
@ -700,14 +688,12 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
|
||||
return FAIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Produce the bytes for equivalence class "c".
|
||||
* Currently only handles latin1, latin9 and utf-8.
|
||||
* Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
|
||||
* equivalent to 'a OR b OR c'
|
||||
*
|
||||
* NOTE! When changing this function, also update reg_equi_class()
|
||||
*/
|
||||
// Produce the bytes for equivalence class "c".
|
||||
// Currently only handles latin1, latin9 and utf-8.
|
||||
// Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
|
||||
// equivalent to 'a OR b OR c'
|
||||
//
|
||||
// NOTE! When changing this function, also update reg_equi_class()
|
||||
static void nfa_emit_equi_class(int c)
|
||||
{
|
||||
#define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
|
||||
@ -1778,26 +1764,22 @@ static void nfa_emit_equi_class(int c)
|
||||
#undef EMIT2
|
||||
}
|
||||
|
||||
/*
|
||||
* Code to parse regular expression.
|
||||
*
|
||||
* We try to reuse parsing functions in regexp.c to
|
||||
* minimize surprise and keep the syntax consistent.
|
||||
*/
|
||||
// Code to parse regular expression.
|
||||
//
|
||||
// We try to reuse parsing functions in regexp.c to
|
||||
// minimize surprise and keep the syntax consistent.
|
||||
|
||||
/*
|
||||
* Parse the lowest level.
|
||||
*
|
||||
* An atom can be one of a long list of items. Many atoms match one character
|
||||
* in the text. It is often an ordinary character or a character class.
|
||||
* Braces can be used to make a pattern into an atom. The "\z(\)" construct
|
||||
* is only for syntax highlighting.
|
||||
*
|
||||
* atom ::= ordinary-atom
|
||||
* or \( pattern \)
|
||||
* or \%( pattern \)
|
||||
* or \z( pattern \)
|
||||
*/
|
||||
// Parse the lowest level.
|
||||
//
|
||||
// An atom can be one of a long list of items. Many atoms match one character
|
||||
// in the text. It is often an ordinary character or a character class.
|
||||
// Braces can be used to make a pattern into an atom. The "\z(\)" construct
|
||||
// is only for syntax highlighting.
|
||||
//
|
||||
// atom ::= ordinary-atom
|
||||
// or \( pattern \)
|
||||
// or \%( pattern \)
|
||||
// or \z( pattern \)
|
||||
static int nfa_regatom(void)
|
||||
{
|
||||
int c;
|
||||
@ -1862,9 +1844,7 @@ static int nfa_regatom(void)
|
||||
// "\_x" is character class plus newline
|
||||
FALLTHROUGH;
|
||||
|
||||
/*
|
||||
* Character classes.
|
||||
*/
|
||||
// Character classes.
|
||||
case Magic('.'):
|
||||
case Magic('i'):
|
||||
case Magic('I'):
|
||||
@ -2228,24 +2208,20 @@ static int nfa_regatom(void)
|
||||
|
||||
case Magic('['):
|
||||
collection:
|
||||
/*
|
||||
* [abc] uses NFA_START_COLL - NFA_END_COLL
|
||||
* [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
|
||||
* Each character is produced as a regular state, using
|
||||
* NFA_CONCAT to bind them together.
|
||||
* Besides normal characters there can be:
|
||||
* - character classes NFA_CLASS_*
|
||||
* - ranges, two characters followed by NFA_RANGE.
|
||||
*/
|
||||
// [abc] uses NFA_START_COLL - NFA_END_COLL
|
||||
// [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
|
||||
// Each character is produced as a regular state, using
|
||||
// NFA_CONCAT to bind them together.
|
||||
// Besides normal characters there can be:
|
||||
// - character classes NFA_CLASS_*
|
||||
// - ranges, two characters followed by NFA_RANGE.
|
||||
|
||||
p = (char_u *)regparse;
|
||||
endp = skip_anyof((char *)p);
|
||||
if (*endp == ']') {
|
||||
/*
|
||||
* Try to reverse engineer character classes. For example,
|
||||
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
|
||||
* and perform the necessary substitutions in the NFA.
|
||||
*/
|
||||
// Try to reverse engineer character classes. For example,
|
||||
// recognize that [0-9] stands for \d and [A-Za-z_] for \h,
|
||||
// and perform the necessary substitutions in the NFA.
|
||||
int result = nfa_recognize_char_class((char_u *)regparse, endp, extra == NFA_ADD_NL);
|
||||
if (result != FAIL) {
|
||||
if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) {
|
||||
@ -2259,10 +2235,8 @@ collection:
|
||||
MB_PTR_ADV(regparse);
|
||||
return OK;
|
||||
}
|
||||
/*
|
||||
* Failed to recognize a character class. Use the simple
|
||||
* version that turns [abc] into 'a' OR 'b' OR 'c'
|
||||
*/
|
||||
// Failed to recognize a character class. Use the simple
|
||||
// version that turns [abc] into 'a' OR 'b' OR 'c'
|
||||
startc = -1;
|
||||
negated = false;
|
||||
if (*regparse == '^') { // negated range
|
||||
@ -2554,16 +2528,14 @@ nfa_do_multibyte:
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse something followed by possible [*+=].
|
||||
*
|
||||
* A piece is an atom, possibly followed by a multi, an indication of how many
|
||||
* times the atom can be matched. Example: "a*" matches any sequence of "a"
|
||||
* characters: "", "a", "aa", etc.
|
||||
*
|
||||
* piece ::= atom
|
||||
* or atom multi
|
||||
*/
|
||||
// Parse something followed by possible [*+=].
|
||||
//
|
||||
// A piece is an atom, possibly followed by a multi, an indication of how many
|
||||
// times the atom can be matched. Example: "a*" matches any sequence of "a"
|
||||
// characters: "", "a", "aa", etc.
|
||||
//
|
||||
// piece ::= atom
|
||||
// or atom multi
|
||||
static int nfa_regpiece(void)
|
||||
{
|
||||
int i;
|
||||
@ -2601,17 +2573,15 @@ static int nfa_regpiece(void)
|
||||
break;
|
||||
|
||||
case Magic('+'):
|
||||
/*
|
||||
* Trick: Normally, (a*)\+ would match the whole input "aaa". The
|
||||
* first and only submatch would be "aaa". But the backtracking
|
||||
* engine interprets the plus as "try matching one more time", and
|
||||
* a* matches a second time at the end of the input, the empty
|
||||
* string.
|
||||
* The submatch will be the empty string.
|
||||
*
|
||||
* In order to be consistent with the old engine, we replace
|
||||
* <atom>+ with <atom><atom>*
|
||||
*/
|
||||
// Trick: Normally, (a*)\+ would match the whole input "aaa". The
|
||||
// first and only submatch would be "aaa". But the backtracking
|
||||
// engine interprets the plus as "try matching one more time", and
|
||||
// a* matches a second time at the end of the input, the empty
|
||||
// string.
|
||||
// The submatch will be the empty string.
|
||||
//
|
||||
// In order to be consistent with the old engine, we replace
|
||||
// <atom>+ with <atom><atom>*
|
||||
restore_parse_state(&old_state);
|
||||
curchr = -1;
|
||||
if (nfa_regatom() == FAIL) {
|
||||
@ -2770,16 +2740,14 @@ static int nfa_regpiece(void)
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse one or more pieces, concatenated. It matches a match for the
|
||||
* first piece, followed by a match for the second piece, etc. Example:
|
||||
* "f[0-9]b", first matches "f", then a digit and then "b".
|
||||
*
|
||||
* concat ::= piece
|
||||
* or piece piece
|
||||
* or piece piece piece
|
||||
* etc.
|
||||
*/
|
||||
// Parse one or more pieces, concatenated. It matches a match for the
|
||||
// first piece, followed by a match for the second piece, etc. Example:
|
||||
// "f[0-9]b", first matches "f", then a digit and then "b".
|
||||
//
|
||||
// concat ::= piece
|
||||
// or piece piece
|
||||
// or piece piece piece
|
||||
// etc.
|
||||
static int nfa_regconcat(void)
|
||||
{
|
||||
bool cont = true;
|
||||
@ -2843,18 +2811,16 @@ static int nfa_regconcat(void)
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse a branch, one or more concats, separated by "\&". It matches the
|
||||
* last concat, but only if all the preceding concats also match at the same
|
||||
* position. Examples:
|
||||
* "foobeep\&..." matches "foo" in "foobeep".
|
||||
* ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
|
||||
*
|
||||
* branch ::= concat
|
||||
* or concat \& concat
|
||||
* or concat \& concat \& concat
|
||||
* etc.
|
||||
*/
|
||||
// Parse a branch, one or more concats, separated by "\&". It matches the
|
||||
// last concat, but only if all the preceding concats also match at the same
|
||||
// position. Examples:
|
||||
// "foobeep\&..." matches "foo" in "foobeep".
|
||||
// ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
|
||||
//
|
||||
// branch ::= concat
|
||||
// or concat \& concat
|
||||
// or concat \& concat \& concat
|
||||
// etc.
|
||||
static int nfa_regbranch(void)
|
||||
{
|
||||
int old_post_pos;
|
||||
@ -3311,9 +3277,7 @@ static FILE *log_fd;
|
||||
static char_u e_log_open_failed[] =
|
||||
N_("Could not open temporary log file for writing, displaying on stderr... ");
|
||||
|
||||
/*
|
||||
* Print the postfix notation of the current regexp.
|
||||
*/
|
||||
// Print the postfix notation of the current regexp.
|
||||
static void nfa_postfix_dump(char_u *expr, int retval)
|
||||
{
|
||||
int *p;
|
||||
@ -3341,9 +3305,7 @@ static void nfa_postfix_dump(char_u *expr, int retval)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the NFA starting with a root node "state".
|
||||
*/
|
||||
// Print the NFA starting with a root node "state".
|
||||
static void nfa_print_state(FILE *debugf, nfa_state_T *state)
|
||||
{
|
||||
garray_T indent;
|
||||
@ -3413,9 +3375,7 @@ static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent)
|
||||
ga_append(indent, NUL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the NFA state machine.
|
||||
*/
|
||||
// Print the NFA state machine.
|
||||
static void nfa_dump(nfa_regprog_T *prog)
|
||||
{
|
||||
FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a");
|
||||
@ -3437,12 +3397,10 @@ static void nfa_dump(nfa_regprog_T *prog)
|
||||
fclose(debugf);
|
||||
}
|
||||
}
|
||||
#endif /* REGEXP_DEBUG */
|
||||
#endif // REGEXP_DEBUG
|
||||
|
||||
/*
|
||||
* Parse r.e. @expr and convert it into postfix form.
|
||||
* Return the postfix string on success, NULL otherwise.
|
||||
*/
|
||||
// Parse r.e. @expr and convert it into postfix form.
|
||||
// Return the postfix string on success, NULL otherwise.
|
||||
static int *re2post(void)
|
||||
{
|
||||
if (nfa_reg(REG_NOPAREN) == FAIL) {
|
||||
@ -3454,18 +3412,14 @@ static int *re2post(void)
|
||||
|
||||
// NB. Some of the code below is inspired by Russ's.
|
||||
|
||||
/*
|
||||
* Represents an NFA state plus zero or one or two arrows exiting.
|
||||
* if c == MATCH, no arrows out; matching state.
|
||||
* If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
|
||||
* If c < 256, labeled arrow with character c to out.
|
||||
*/
|
||||
// Represents an NFA state plus zero or one or two arrows exiting.
|
||||
// if c == MATCH, no arrows out; matching state.
|
||||
// If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
|
||||
// If c < 256, labeled arrow with character c to out.
|
||||
|
||||
static nfa_state_T *state_ptr; // points to nfa_prog->state
|
||||
|
||||
/*
|
||||
* Allocate and initialize nfa_state_T.
|
||||
*/
|
||||
// Allocate and initialize nfa_state_T.
|
||||
static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
|
||||
{
|
||||
nfa_state_T *s;
|
||||
@ -3488,16 +3442,12 @@ static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1)
|
||||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* A partially built NFA without the matching state filled in.
|
||||
* Frag_T.start points at the start state.
|
||||
* Frag_T.out is a list of places that need to be set to the
|
||||
* next state for this fragment.
|
||||
*/
|
||||
// A partially built NFA without the matching state filled in.
|
||||
// Frag_T.start points at the start state.
|
||||
// Frag_T.out is a list of places that need to be set to the
|
||||
// next state for this fragment.
|
||||
|
||||
/*
|
||||
* Initialize a Frag_T struct and return it.
|
||||
*/
|
||||
// Initialize a Frag_T struct and return it.
|
||||
static Frag_T frag(nfa_state_T *start, Ptrlist *out)
|
||||
{
|
||||
Frag_T n;
|
||||
@ -3507,9 +3457,7 @@ static Frag_T frag(nfa_state_T *start, Ptrlist *out)
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create singleton list containing just outp.
|
||||
*/
|
||||
// Create singleton list containing just outp.
|
||||
static Ptrlist *list1(nfa_state_T **outp)
|
||||
{
|
||||
Ptrlist *l;
|
||||
@ -3519,9 +3467,7 @@ static Ptrlist *list1(nfa_state_T **outp)
|
||||
return l;
|
||||
}
|
||||
|
||||
/*
|
||||
* Patch the list of states at out to point to start.
|
||||
*/
|
||||
// Patch the list of states at out to point to start.
|
||||
static void patch(Ptrlist *l, nfa_state_T *s)
|
||||
{
|
||||
Ptrlist *next;
|
||||
@ -3532,9 +3478,7 @@ static void patch(Ptrlist *l, nfa_state_T *s)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Join the two lists l1 and l2, returning the combination.
|
||||
*/
|
||||
// Join the two lists l1 and l2, returning the combination.
|
||||
static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
|
||||
{
|
||||
Ptrlist *oldl1;
|
||||
@ -3547,9 +3491,7 @@ static Ptrlist *append(Ptrlist *l1, Ptrlist *l2)
|
||||
return oldl1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stack used for transforming postfix form into NFA.
|
||||
*/
|
||||
// Stack used for transforming postfix form into NFA.
|
||||
static Frag_T empty;
|
||||
|
||||
static void st_error(int *postfix, int *end, int *p)
|
||||
@ -3592,9 +3534,7 @@ static void st_error(int *postfix, int *end, int *p)
|
||||
emsg(_("E874: (NFA) Could not pop the stack!"));
|
||||
}
|
||||
|
||||
/*
|
||||
* Push an item onto the stack.
|
||||
*/
|
||||
// Push an item onto the stack.
|
||||
static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
|
||||
{
|
||||
Frag_T *stackp = *p;
|
||||
@ -3606,9 +3546,7 @@ static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end)
|
||||
*p = *p + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pop an item from the stack.
|
||||
*/
|
||||
// Pop an item from the stack.
|
||||
static Frag_T st_pop(Frag_T **p, Frag_T *stack)
|
||||
{
|
||||
Frag_T *stackp;
|
||||
@ -3621,10 +3559,8 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack)
|
||||
return **p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Estimate the maximum byte length of anything matching "state".
|
||||
* When unknown or unlimited return -1.
|
||||
*/
|
||||
// Estimate the maximum byte length of anything matching "state".
|
||||
// When unknown or unlimited return -1.
|
||||
static int nfa_max_width(nfa_state_T *startstate, int depth)
|
||||
{
|
||||
int l, r;
|
||||
@ -3827,10 +3763,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a postfix form into its equivalent NFA.
|
||||
* Return the NFA start state on success, NULL otherwise.
|
||||
*/
|
||||
// Convert a postfix form into its equivalent NFA.
|
||||
// Return the NFA start state on success, NULL otherwise.
|
||||
static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
|
||||
{
|
||||
int *p;
|
||||
@ -3866,7 +3800,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size)
|
||||
stack_end = stack + (nstate + 1);
|
||||
}
|
||||
|
||||
for (p = postfix; p < end; ++p) {
|
||||
for (p = postfix; p < end; p++) {
|
||||
switch (*p) {
|
||||
case NFA_CONCAT:
|
||||
// Concatenation.
|
||||
@ -4350,15 +4284,13 @@ theend:
|
||||
#undef PUSH
|
||||
}
|
||||
|
||||
/*
|
||||
* After building the NFA program, inspect it to add optimization hints.
|
||||
*/
|
||||
// After building the NFA program, inspect it to add optimization hints.
|
||||
static void nfa_postprocess(nfa_regprog_T *prog)
|
||||
{
|
||||
int i;
|
||||
int c;
|
||||
|
||||
for (i = 0; i < prog->nstate; ++i) {
|
||||
for (i = 0; i < prog->nstate; i++) {
|
||||
c = prog->state[i].c;
|
||||
if (c == NFA_START_INVISIBLE
|
||||
|| c == NFA_START_INVISIBLE_NEG
|
||||
@ -4490,9 +4422,7 @@ static void clear_sub(regsub_T *sub)
|
||||
sub->in_use = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the submatches from "from" to "to".
|
||||
*/
|
||||
// Copy the submatches from "from" to "to".
|
||||
static void copy_sub(regsub_T *to, regsub_T *from)
|
||||
{
|
||||
to->in_use = from->in_use;
|
||||
@ -4508,9 +4438,7 @@ static void copy_sub(regsub_T *to, regsub_T *from)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Like copy_sub() but exclude the main match.
|
||||
*/
|
||||
// Like copy_sub() but exclude the main match.
|
||||
static void copy_sub_off(regsub_T *to, regsub_T *from)
|
||||
{
|
||||
if (to->in_use < from->in_use) {
|
||||
@ -4528,9 +4456,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Like copy_sub() but only do the end of the main match if \ze is present.
|
||||
*/
|
||||
// Like copy_sub() but only do the end of the main match if \ze is present.
|
||||
static void copy_ze_off(regsub_T *to, regsub_T *from)
|
||||
{
|
||||
if (rex.nfa_has_zend) {
|
||||
@ -4954,7 +4880,7 @@ static regsubs_T *addstate(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_ar
|
||||
// When called from addstate_here() do insert before
|
||||
// existing states.
|
||||
if (add_here) {
|
||||
for (k = 0; k < l->n && k < listindex; ++k) {
|
||||
for (k = 0; k < l->n && k < listindex; k++) {
|
||||
if (l->t[k].state->id == state->id) {
|
||||
found = true;
|
||||
break;
|
||||
@ -5094,7 +5020,7 @@ skip_add:
|
||||
save_in_use = -1;
|
||||
} else {
|
||||
save_in_use = sub->in_use;
|
||||
for (i = sub->in_use; i < subidx; ++i) {
|
||||
for (i = sub->in_use; i < subidx; i++) {
|
||||
sub->list.multi[i].start_lnum = -1;
|
||||
sub->list.multi[i].end_lnum = -1;
|
||||
}
|
||||
@ -5115,7 +5041,7 @@ skip_add:
|
||||
save_in_use = -1;
|
||||
} else {
|
||||
save_in_use = sub->in_use;
|
||||
for (i = sub->in_use; i < subidx; ++i) {
|
||||
for (i = sub->in_use; i < subidx; i++) {
|
||||
sub->list.line[i].start = NULL;
|
||||
sub->list.line[i].end = NULL;
|
||||
}
|
||||
@ -5314,9 +5240,7 @@ static regsubs_T *addstate_here(nfa_list_T *l, nfa_state_T *state, regsubs_T *su
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check character class "class" against current character c.
|
||||
*/
|
||||
// Check character class "class" against current character c.
|
||||
static int check_char_class(int class, int c)
|
||||
{
|
||||
switch (class) {
|
||||
@ -5502,11 +5426,9 @@ static int match_zref(int subidx, int *bytelen)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save list IDs for all NFA states of "prog" into "list".
|
||||
* Also reset the IDs to zero.
|
||||
* Only used for the recursive value lastlist[1].
|
||||
*/
|
||||
// Save list IDs for all NFA states of "prog" into "list".
|
||||
// Also reset the IDs to zero.
|
||||
// Only used for the recursive value lastlist[1].
|
||||
static void nfa_save_listids(nfa_regprog_T *prog, int *list)
|
||||
{
|
||||
int i;
|
||||
@ -5521,9 +5443,7 @@ static void nfa_save_listids(nfa_regprog_T *prog, int *list)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore list IDs from "list" to all NFA states.
|
||||
*/
|
||||
// Restore list IDs from "list" to all NFA states.
|
||||
static void nfa_restore_listids(nfa_regprog_T *prog, int *list)
|
||||
{
|
||||
int i;
|
||||
@ -5547,11 +5467,9 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos)
|
||||
return val == pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recursively call nfa_regmatch()
|
||||
* "pim" is NULL or contains info about a Postponed Invisible Match (start
|
||||
* position).
|
||||
*/
|
||||
// Recursively call nfa_regmatch()
|
||||
// "pim" is NULL or contains info about a Postponed Invisible Match (start
|
||||
// position).
|
||||
static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog,
|
||||
regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len)
|
||||
FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7)
|
||||
@ -5691,12 +5609,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Estimate the chance of a match with "state" failing.
|
||||
* empty match: 0
|
||||
* NFA_ANY: 1
|
||||
* specific character: 99
|
||||
*/
|
||||
// Estimate the chance of a match with "state" failing.
|
||||
// empty match: 0
|
||||
// NFA_ANY: 1
|
||||
// specific character: 99
|
||||
static int failure_chance(nfa_state_T *state, int depth)
|
||||
{
|
||||
int c = state->c;
|
||||
@ -5851,9 +5767,7 @@ static int failure_chance(nfa_state_T *state, int depth)
|
||||
return 50;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip until the char "c" we know a match must start with.
|
||||
*/
|
||||
// Skip until the char "c" we know a match must start with.
|
||||
static int skip_to_start(int c, colnr_T *colp)
|
||||
{
|
||||
const char_u *const s = cstrchr(rex.line + *colp, c);
|
||||
@ -5864,11 +5778,9 @@ static int skip_to_start(int c, colnr_T *colp)
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a match with match_text.
|
||||
* Called after skip_to_start() has found regstart.
|
||||
* Returns zero for no match, 1 for a match.
|
||||
*/
|
||||
// Check for a match with match_text.
|
||||
// Called after skip_to_start() has found regstart.
|
||||
// Returns zero for no match, 1 for a match.
|
||||
static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
|
||||
{
|
||||
#define PTR2LEN(x) utf_ptr2len(x)
|
||||
@ -6038,9 +5950,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
add_off = clen; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Run for each character.
|
||||
*/
|
||||
// Run for each character.
|
||||
for (;;) {
|
||||
int curc = utf_ptr2char((char *)rex.input);
|
||||
int clen = utfc_ptr2len((char *)rex.input);
|
||||
@ -6086,9 +5996,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
#ifdef NFA_REGEXP_DEBUG_LOG
|
||||
fprintf(debug, "\n-------------------\n");
|
||||
#endif
|
||||
/*
|
||||
* If the state lists are empty we can stop.
|
||||
*/
|
||||
// If the state lists are empty we can stop.
|
||||
if (thislist->n == 0) {
|
||||
break;
|
||||
}
|
||||
@ -6131,10 +6039,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Handle the possible codes of the current state.
|
||||
* The most important is NFA_MATCH.
|
||||
*/
|
||||
// Handle the possible codes of the current state.
|
||||
// The most important is NFA_MATCH.
|
||||
add_state = NULL;
|
||||
add_here = false;
|
||||
add_count = 0;
|
||||
@ -7525,10 +7431,8 @@ theend:
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compile a regular expression into internal code for the NFA matcher.
|
||||
* Returns the program in allocated space. Returns NULL for an error.
|
||||
*/
|
||||
// Compile a regular expression into internal code for the NFA matcher.
|
||||
// Returns the program in allocated space. Returns NULL for an error.
|
||||
static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
{
|
||||
nfa_regprog_T *prog = NULL;
|
||||
@ -7554,11 +7458,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
goto fail; // Cascaded (syntax?) error
|
||||
}
|
||||
|
||||
/*
|
||||
* In order to build the NFA, we parse the input regexp twice:
|
||||
* 1. first pass to count size (so we can allocate space)
|
||||
* 2. second to emit code
|
||||
*/
|
||||
// In order to build the NFA, we parse the input regexp twice:
|
||||
// 1. first pass to count size (so we can allocate space)
|
||||
// 2. second to emit code
|
||||
#ifdef REGEXP_DEBUG
|
||||
{
|
||||
FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a");
|
||||
@ -7573,10 +7475,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PASS 1
|
||||
* Count number of NFA states in "nstate". Do not build the NFA.
|
||||
*/
|
||||
// PASS 1
|
||||
// Count number of NFA states in "nstate". Do not build the NFA.
|
||||
post2nfa(postfix, post_ptr, true);
|
||||
|
||||
// allocate the regprog with space for the compiled regexp
|
||||
@ -7585,10 +7485,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
state_ptr = prog->state;
|
||||
prog->re_in_use = false;
|
||||
|
||||
/*
|
||||
* PASS 2
|
||||
* Build the NFA
|
||||
*/
|
||||
// PASS 2
|
||||
// Build the NFA
|
||||
prog->start = post2nfa(postfix, post_ptr, false);
|
||||
if (prog->start == NULL) {
|
||||
goto fail;
|
||||
@ -7632,9 +7530,7 @@ fail:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a compiled regexp program, returned by nfa_regcomp().
|
||||
*/
|
||||
// Free a compiled regexp program, returned by nfa_regcomp().
|
||||
static void nfa_regfree(regprog_T *prog)
|
||||
{
|
||||
if (prog != NULL) {
|
||||
|
@ -2465,9 +2465,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
|
||||
|
||||
aff_entry->ae_cond = (char_u *)getroom_save(spin, (char_u *)items[4]);
|
||||
if (*items[0] == 'P') {
|
||||
sprintf((char *)buf, "^%s", items[4]);
|
||||
sprintf((char *)buf, "^%s", items[4]); // NOLINT(runtime/printf)
|
||||
} else {
|
||||
sprintf((char *)buf, "%s$", items[4]);
|
||||
sprintf((char *)buf, "%s$", items[4]); // NOLINT(runtime/printf)
|
||||
}
|
||||
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING + RE_STRICT);
|
||||
if (aff_entry->ae_prog == NULL) {
|
||||
@ -2514,8 +2514,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
|
||||
onecap_copy((char_u *)items[4], buf, true);
|
||||
aff_entry->ae_cond = (char_u *)getroom_save(spin, buf);
|
||||
if (aff_entry->ae_cond != NULL) {
|
||||
sprintf((char *)buf, "^%s",
|
||||
aff_entry->ae_cond);
|
||||
sprintf((char *)buf, "^%s", aff_entry->ae_cond); // NOLINT(runtime/printf)
|
||||
vim_regfree(aff_entry->ae_prog);
|
||||
aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING);
|
||||
}
|
||||
@ -3614,7 +3613,7 @@ static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afff
|
||||
if (store_aff_word(spin, newword, ae->ae_flags,
|
||||
affile, &affile->af_suff, xht,
|
||||
use_condit & (xht == NULL
|
||||
? ~0 : ~CONDIT_SUF),
|
||||
? ~0 : ~CONDIT_SUF),
|
||||
use_flags, use_pfxlist, pfxlen) == FAIL) {
|
||||
retval = FAIL;
|
||||
}
|
||||
|
@ -1035,9 +1035,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
|
||||
: va_arg(ap, long long)); // NOLINT (runtime/int)
|
||||
break;
|
||||
case 'z':
|
||||
arg = (tvs
|
||||
? (ptrdiff_t)tv_nr(tvs, &arg_idx)
|
||||
: va_arg(ap, ptrdiff_t));
|
||||
arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t));
|
||||
break;
|
||||
}
|
||||
if (arg > 0) {
|
||||
@ -1049,19 +1047,13 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
|
||||
// unsigned
|
||||
switch (length_modifier) {
|
||||
case '\0':
|
||||
uarg = (unsigned int)(tvs
|
||||
? tv_nr(tvs, &arg_idx)
|
||||
: va_arg(ap, unsigned int));
|
||||
uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
|
||||
break;
|
||||
case 'h':
|
||||
uarg = (uint16_t)(tvs
|
||||
? tv_nr(tvs, &arg_idx)
|
||||
: va_arg(ap, unsigned int));
|
||||
uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
|
||||
break;
|
||||
case 'l':
|
||||
uarg = (tvs
|
||||
? (unsigned long)tv_nr(tvs, &arg_idx)
|
||||
: va_arg(ap, unsigned long));
|
||||
uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long));
|
||||
break;
|
||||
case '2':
|
||||
uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int)
|
||||
@ -1071,9 +1063,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
|
||||
: va_arg(ap, unsigned long long)); // NOLINT (runtime/int)
|
||||
break;
|
||||
case 'z':
|
||||
uarg = (tvs
|
||||
? (size_t)tv_nr(tvs, &arg_idx)
|
||||
: va_arg(ap, size_t));
|
||||
uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t));
|
||||
break;
|
||||
}
|
||||
arg_sign = (uarg != 0);
|
||||
|
@ -1655,13 +1655,12 @@ static int syn_current_attr(const bool syncing, const bool displaying, bool *con
|
||||
&& (spp->sp_type == SPTYPE_MATCH
|
||||
|| spp->sp_type == SPTYPE_START)
|
||||
&& (current_next_list != NULL
|
||||
? in_id_list(NULL, current_next_list,
|
||||
&spp->sp_syn, 0)
|
||||
: (cur_si == NULL
|
||||
? !(spp->sp_flags & HL_CONTAINED)
|
||||
: in_id_list(cur_si,
|
||||
cur_si->si_cont_list, &spp->sp_syn,
|
||||
spp->sp_flags & HL_CONTAINED)))) {
|
||||
? in_id_list(NULL, current_next_list, &spp->sp_syn, 0)
|
||||
: (cur_si == NULL
|
||||
? !(spp->sp_flags & HL_CONTAINED)
|
||||
: in_id_list(cur_si,
|
||||
cur_si->si_cont_list, &spp->sp_syn,
|
||||
spp->sp_flags & HL_CONTAINED)))) {
|
||||
// If we already tried matching in this line, and
|
||||
// there isn't a match before next_match_col, skip
|
||||
// this item.
|
||||
@ -2788,9 +2787,9 @@ static keyentry_T *match_keyword(char *keyword, hashtab_T *ht, stateitem_T *cur_
|
||||
if (current_next_list != 0
|
||||
? in_id_list(NULL, current_next_list, &kp->k_syn, 0)
|
||||
: (cur_si == NULL
|
||||
? !(kp->flags & HL_CONTAINED)
|
||||
: in_id_list(cur_si, cur_si->si_cont_list,
|
||||
&kp->k_syn, kp->flags & HL_CONTAINED))) {
|
||||
? !(kp->flags & HL_CONTAINED)
|
||||
: in_id_list(cur_si, cur_si->si_cont_list,
|
||||
&kp->k_syn, kp->flags & HL_CONTAINED))) {
|
||||
return kp;
|
||||
}
|
||||
}
|
||||
|
@ -628,8 +628,8 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
|
||||
GET_CCS(ret, pline);
|
||||
ret.data.cmp.inv = (schar == '<');
|
||||
ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
|
||||
? kExprCmpGreaterOrEqual
|
||||
: kExprCmpGreater);
|
||||
? kExprCmpGreaterOrEqual
|
||||
: kExprCmpGreater);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1963,8 +1963,8 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
|
||||
|| ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
|
||||
&& ((*kv_Z(ast_stack, 1))->type
|
||||
!= kExprNodeConcatOrSubscript))))
|
||||
? kELFlagAllowFloat
|
||||
: 0));
|
||||
? kELFlagAllowFloat
|
||||
: 0));
|
||||
LexExprToken cur_token = viml_pexpr_next_token(pstate,
|
||||
want_node_to_lexer_flags[want_node] |
|
||||
lexer_additional_flags);
|
||||
@ -2031,9 +2031,9 @@ viml_pexpr_parse_process_token:
|
||||
const bool node_is_key = (
|
||||
is_concat_or_subscript
|
||||
&& (cur_token.type == kExprLexPlainIdentifier
|
||||
? (!cur_token.data.var.autoload
|
||||
&& cur_token.data.var.scope == kExprVarScopeMissing)
|
||||
: (cur_token.type == kExprLexNumber))
|
||||
? (!cur_token.data.var.autoload
|
||||
&& cur_token.data.var.scope == kExprVarScopeMissing)
|
||||
: (cur_token.type == kExprLexNumber))
|
||||
&& prev_token.type != kExprLexSpacing);
|
||||
if (is_concat_or_subscript && !node_is_key) {
|
||||
// Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
|
||||
@ -2707,14 +2707,14 @@ viml_pexpr_parse_figure_brace_closing_error:
|
||||
break;
|
||||
case kExprLexPlainIdentifier: {
|
||||
const ExprVarScope scope = (cur_token.type == kExprLexInvalid
|
||||
? kExprVarScopeMissing
|
||||
: cur_token.data.var.scope);
|
||||
? kExprVarScopeMissing
|
||||
: cur_token.data.var.scope);
|
||||
if (want_node == kENodeValue) {
|
||||
want_node = kENodeOperator;
|
||||
NEW_NODE_WITH_CUR_POS(cur_node,
|
||||
(node_is_key
|
||||
? kExprNodePlainKey
|
||||
: kExprNodePlainIdentifier));
|
||||
? kExprNodePlainKey
|
||||
: kExprNodePlainIdentifier));
|
||||
cur_node->data.var.scope = scope;
|
||||
const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2);
|
||||
cur_node->data.var.ident = (pline.data + cur_token.start.col
|
||||
@ -2732,8 +2732,8 @@ viml_pexpr_parse_figure_brace_closing_error:
|
||||
scope_shift),
|
||||
cur_token.len - scope_shift,
|
||||
(node_is_key
|
||||
? HL(IdentifierKey)
|
||||
: HL(IdentifierName)));
|
||||
? HL(IdentifierKey)
|
||||
: HL(IdentifierName)));
|
||||
} else {
|
||||
if (scope == kExprVarScopeMissing) {
|
||||
// uncrustify:off
|
||||
@ -2902,15 +2902,15 @@ viml_pexpr_parse_no_paren_closing_error: {}
|
||||
// different error numbers: "E114: Missing quote" and
|
||||
// "E115: Missing quote".
|
||||
ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double
|
||||
? _("E114: Missing double quote: %.*s")
|
||||
: _("E115: Missing single quote: %.*s")));
|
||||
? _("E114: Missing double quote: %.*s")
|
||||
: _("E115: Missing single quote: %.*s")));
|
||||
}
|
||||
if (want_node == kENodeOperator) {
|
||||
OP_MISSING;
|
||||
}
|
||||
NEW_NODE_WITH_CUR_POS(cur_node, (is_double
|
||||
? kExprNodeDoubleQuotedString
|
||||
: kExprNodeSingleQuotedString));
|
||||
? kExprNodeDoubleQuotedString
|
||||
: kExprNodeSingleQuotedString));
|
||||
*top_node_p = cur_node;
|
||||
parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid);
|
||||
want_node = kENodeOperator;
|
||||
|
Loading…
Reference in New Issue
Block a user