charset: Avoid overflow in vim_str2nr

This commit is contained in:
ZyX 2017-10-16 00:07:32 +03:00
parent 2cb95bd937
commit 1a3635304b
4 changed files with 53 additions and 80 deletions

View File

@ -1620,13 +1620,16 @@ bool vim_isblankline(char_u *lbuf)
/// @param maxlen Max length of string to check. /// @param maxlen Max length of string to check.
void vim_str2nr(const char_u *const start, int *const prep, int *const len, void vim_str2nr(const char_u *const start, int *const prep, int *const len,
const int what, varnumber_T *const nptr, const int what, varnumber_T *const nptr,
uvarnumber_T *const unptr, const int maxlen) uvarnumber_T *const unptr, int maxlen)
FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_NONNULL_ARG(1)
{ {
const char_u *ptr = start; const char *ptr = (const char *)start;
if (maxlen == 0) {
maxlen = (int)strlen(ptr);
}
const char *const e = ptr + maxlen;
int pre = 0; // default is decimal int pre = 0; // default is decimal
bool negative = false; bool negative = false;
uvarnumber_T un = 0;
if (ptr[0] == '-') { if (ptr[0] == '-') {
negative = true; negative = true;
@ -1635,19 +1638,19 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
// Recognize hex, octal and bin. // Recognize hex, octal and bin.
if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN)) if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN))
&& (maxlen == 0 || maxlen > 1) && maxlen > 1
&& (ptr[0] == '0') && (ptr[1] != '8') && (ptr[1] != '9')) { && ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') {
pre = ptr[1]; pre = ptr[1];
if ((what & STR2NR_HEX) if ((what & STR2NR_HEX)
&& (maxlen == 0 || maxlen > 2) && maxlen > 2
&& ((pre == 'X') || (pre == 'x')) && (pre == 'X' || pre == 'x')
&& ascii_isxdigit(ptr[2])) { && ascii_isxdigit(ptr[2])) {
// hexadecimal // hexadecimal
ptr += 2; ptr += 2;
} else if ((what & STR2NR_BIN) } else if ((what & STR2NR_BIN)
&& (maxlen == 0 || maxlen > 2) && maxlen > 2
&& ((pre == 'B') || (pre == 'b')) && (pre == 'B' || pre == 'b')
&& ascii_isbdigit(ptr[2])) { && ascii_isbdigit(ptr[2])) {
// binary // binary
ptr += 2; ptr += 2;
@ -1657,32 +1660,26 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
if (what & STR2NR_OCT) { if (what & STR2NR_OCT) {
// Don't interpret "0", "08" or "0129" as octal. // Don't interpret "0", "08" or "0129" as octal.
for (int n = 1; ascii_isdigit(ptr[n]); ++n) { for (int i = 1; i < maxlen && ascii_isdigit(ptr[i]); i++) {
if (ptr[n] > '7') { if (ptr[i] > '7') {
// can't be octal // can't be octal
pre = 0; pre = 0;
break; break;
} }
if (ptr[n] >= '0') { if (ptr[i] >= '0') {
// assume octal // assume octal
pre = '0'; pre = '0';
} }
if (n == maxlen) {
break;
}
} }
} }
} }
} }
// Do the string-to-numeric conversion "manually" to avoid sscanf quirks. // Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
int n = 1; uvarnumber_T un = 0;
if (pre == 'B' || pre == 'b' || what == (STR2NR_BIN|STR2NR_FORCE)) { if (pre == 'B' || pre == 'b' || what == (STR2NR_BIN|STR2NR_FORCE)) {
// bin // bin
if (pre != 0) { while (ptr < e && '0' <= *ptr && *ptr <= '1') {
n += 2; // skip over "0b"
}
while ('0' <= *ptr && *ptr <= '1') {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 2) { if (un < UVARNUMBER_MAX / 2) {
un = 2 * un + (uvarnumber_T)(*ptr - '0'); un = 2 * un + (uvarnumber_T)(*ptr - '0');
@ -1690,13 +1687,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else if (pre == '0' || what == (STR2NR_OCT|STR2NR_FORCE)) { } else if (pre == '0' || what == (STR2NR_OCT|STR2NR_FORCE)) {
// octal // octal
while ('0' <= *ptr && *ptr <= '7') { while (ptr < e && '0' <= *ptr && *ptr <= '7') {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 8) { if (un < UVARNUMBER_MAX / 8) {
un = 8 * un + (uvarnumber_T)(*ptr - '0'); un = 8 * un + (uvarnumber_T)(*ptr - '0');
@ -1704,16 +1698,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else if (pre == 'X' || pre == 'x' || what == (STR2NR_HEX|STR2NR_FORCE)) { } else if (pre == 'X' || pre == 'x' || what == (STR2NR_HEX|STR2NR_FORCE)) {
// hex // hex
if (pre != 0) { while (ptr < e && ascii_isxdigit(*ptr)) {
n += 2; // skip over "0x"
}
while (ascii_isxdigit(*ptr)) {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 16) { if (un < UVARNUMBER_MAX / 16) {
un = 16 * un + (uvarnumber_T)hex2nr(*ptr); un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
@ -1721,13 +1709,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else { } else {
// decimal // decimal
while (ascii_isdigit(*ptr)) { while (ptr < e && ascii_isdigit(*ptr)) {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 10) { if (un < UVARNUMBER_MAX / 10) {
un = 10 * un + (uvarnumber_T)(*ptr - '0'); un = 10 * un + (uvarnumber_T)(*ptr - '0');
@ -1735,9 +1720,6 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} }
@ -1746,7 +1728,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
} }
if (len != NULL) { if (len != NULL) {
*len = (int)(ptr - start); *len = (int)(ptr - (const char *)start);
} }
if (nptr != NULL) { if (nptr != NULL) {

View File

@ -25,12 +25,15 @@ int hex2nr(int c)
void vim_str2nr(const char_u *const start, int *const prep, int *const len, void vim_str2nr(const char_u *const start, int *const prep, int *const len,
const int what, varnumber_T *const nptr, const int what, varnumber_T *const nptr,
uvarnumber_T *const unptr, const int maxlen) uvarnumber_T *const unptr, int maxlen)
{ {
const char_u *ptr = start; const char *ptr = (const char *)start;
if (maxlen == 0) {
maxlen = (int)strlen(ptr);
}
const char *const e = ptr + maxlen;
int pre = 0; // default is decimal int pre = 0; // default is decimal
bool negative = false; bool negative = false;
uvarnumber_T un = 0;
if (ptr[0] == '-') { if (ptr[0] == '-') {
negative = true; negative = true;
@ -39,19 +42,19 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
// Recognize hex, octal and bin. // Recognize hex, octal and bin.
if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN)) if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN))
&& (maxlen == 0 || maxlen > 1) && maxlen > 1
&& (ptr[0] == '0') && (ptr[1] != '8') && (ptr[1] != '9')) { && ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') {
pre = ptr[1]; pre = ptr[1];
if ((what & STR2NR_HEX) if ((what & STR2NR_HEX)
&& (maxlen == 0 || maxlen > 2) && maxlen > 2
&& ((pre == 'X') || (pre == 'x')) && (pre == 'X' || pre == 'x')
&& ascii_isxdigit(ptr[2])) { && ascii_isxdigit(ptr[2])) {
// hexadecimal // hexadecimal
ptr += 2; ptr += 2;
} else if ((what & STR2NR_BIN) } else if ((what & STR2NR_BIN)
&& (maxlen == 0 || maxlen > 2) && maxlen > 2
&& ((pre == 'B') || (pre == 'b')) && (pre == 'B' || pre == 'b')
&& ascii_isbdigit(ptr[2])) { && ascii_isbdigit(ptr[2])) {
// binary // binary
ptr += 2; ptr += 2;
@ -61,32 +64,26 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
if (what & STR2NR_OCT) { if (what & STR2NR_OCT) {
// Don't interpret "0", "08" or "0129" as octal. // Don't interpret "0", "08" or "0129" as octal.
for (int n = 1; ascii_isdigit(ptr[n]); ++n) { for (int i = 1; i < maxlen && ascii_isdigit(ptr[i]); i++) {
if (ptr[n] > '7') { if (ptr[i] > '7') {
// can't be octal // can't be octal
pre = 0; pre = 0;
break; break;
} }
if (ptr[n] >= '0') { if (ptr[i] >= '0') {
// assume octal // assume octal
pre = '0'; pre = '0';
} }
if (n == maxlen) {
break;
}
} }
} }
} }
} }
// Do the string-to-numeric conversion "manually" to avoid sscanf quirks. // Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
int n = 1; uvarnumber_T un = 0;
if (pre == 'B' || pre == 'b' || what == (STR2NR_BIN|STR2NR_FORCE)) { if (pre == 'B' || pre == 'b' || what == (STR2NR_BIN|STR2NR_FORCE)) {
// bin // bin
if (pre != 0) { while (ptr < e && '0' <= *ptr && *ptr <= '1') {
n += 2; // skip over "0b"
}
while ('0' <= *ptr && *ptr <= '1') {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 2) { if (un < UVARNUMBER_MAX / 2) {
un = 2 * un + (uvarnumber_T)(*ptr - '0'); un = 2 * un + (uvarnumber_T)(*ptr - '0');
@ -94,13 +91,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else if (pre == '0' || what == (STR2NR_OCT|STR2NR_FORCE)) { } else if (pre == '0' || what == (STR2NR_OCT|STR2NR_FORCE)) {
// octal // octal
while ('0' <= *ptr && *ptr <= '7') { while (ptr < e && '0' <= *ptr && *ptr <= '7') {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 8) { if (un < UVARNUMBER_MAX / 8) {
un = 8 * un + (uvarnumber_T)(*ptr - '0'); un = 8 * un + (uvarnumber_T)(*ptr - '0');
@ -108,16 +102,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else if (pre == 'X' || pre == 'x' || what == (STR2NR_HEX|STR2NR_FORCE)) { } else if (pre == 'X' || pre == 'x' || what == (STR2NR_HEX|STR2NR_FORCE)) {
// hex // hex
if (pre != 0) { while (ptr < e && ascii_isxdigit(*ptr)) {
n += 2; // skip over "0x"
}
while (ascii_isxdigit(*ptr)) {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 16) { if (un < UVARNUMBER_MAX / 16) {
un = 16 * un + (uvarnumber_T)hex2nr(*ptr); un = 16 * un + (uvarnumber_T)hex2nr(*ptr);
@ -125,13 +113,10 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} else { } else {
// decimal // decimal
while (ascii_isdigit(*ptr)) { while (ptr < e && ascii_isdigit(*ptr)) {
// avoid ubsan error for overflow // avoid ubsan error for overflow
if (un < UVARNUMBER_MAX / 10) { if (un < UVARNUMBER_MAX / 10) {
un = 10 * un + (uvarnumber_T)(*ptr - '0'); un = 10 * un + (uvarnumber_T)(*ptr - '0');
@ -139,9 +124,6 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
un = UVARNUMBER_MAX; un = UVARNUMBER_MAX;
} }
ptr++; ptr++;
if (n++ == maxlen) {
break;
}
} }
} }
@ -150,7 +132,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len,
} }
if (len != NULL) { if (len != NULL) {
*len = (int)(ptr - start); *len = (int)(ptr - (const char *)start);
} }
if (nptr != NULL) { if (nptr != NULL) {

View File

@ -292,6 +292,9 @@ describe('Expressions lexer', function()
simple_test({'0b102'}, 'Number', 4, {data={is_float=false, base=2, val=2}, str='0b10'}) simple_test({'0b102'}, 'Number', 4, {data={is_float=false, base=2, val=2}, str='0b10'})
simple_test({'10F'}, 'Number', 2, {data={is_float=false, base=10, val=10}, str='10'}) simple_test({'10F'}, 'Number', 2, {data={is_float=false, base=10, val=10}, str='10'})
simple_test({'0x0123456789ABCDEFG'}, 'Number', 18, {data={is_float=false, base=16, val=81985529216486895}, str='0x0123456789ABCDEF'}) simple_test({'0x0123456789ABCDEFG'}, 'Number', 18, {data={is_float=false, base=16, val=81985529216486895}, str='0x0123456789ABCDEF'})
simple_test({{data='00', size=2}}, 'Number', 2, {data={is_float=false, base=8, val=0}, str='00'})
simple_test({{data='009', size=2}}, 'Number', 2, {data={is_float=false, base=8, val=0}, str='00'})
simple_test({{data='01', size=1}}, 'Number', 1, {data={is_float=false, base=10, val=0}, str='0'})
end end
local function regular_scope_tests() local function regular_scope_tests()

View File

@ -6867,13 +6867,19 @@ describe('Expressions parser', function()
}, },
}, { }, {
}) })
check_parsing('0', 0, { check_parsing({data='01', size=1}, 0, {
-- 0
ast = { ast = {
'Integer(val=0):0:0:0', 'Integer(val=0):0:0:0',
}, },
}, { }, {
hl('Number', '0'), hl('Number', '0'),
}) })
check_parsing({data='001', size=2}, 0, {
ast = {
'Integer(val=0):0:0:00',
},
}, {
hl('Number', '00'),
})
end) end)
end) end)