vim-patch:9.0.1908: undefined behaviour upper/lower function ptrs (#25238)

Problem:  undefined behaviour upper/lower function ptrs
Solution: Fix UBSAN error in regexp and simplify upper/lowercase
          modifier code

The implementation of \u / \U / \l / \L modifiers in the substitute
command relies on remembering the state by setting function pointers on
func_all/func_one in the code. The code signature of `fptr_T` is
supposed to return void* (due to C function signatures not being able to
return itself due to type recursion), and the definition of the
functions (e.g. to_Upper) didn't follow this rule, and so the code tries
to cast functions of different signatures, resulting in undefined
behavior error under UBSAN in Clang 17. See vim/vim#12745.

We could just fix `do_Upper`/etc to just return void*, which would fix
the problem. However, these functions actually do not need to return
anything at all. It used to be the case that there was only one pointer
"func" to store the pointer, which is why the function needs to either
return itself or NULL to indicate whether it's a one time or ongoing
modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697
(7.3.873) already made that obsolete by introducing `func_one` and
`func_all` to store one-time and ongoing operations separately, so these
functions don't actually need to return anything anymore because it's
implicit whether it's a one-time or ongoing operation. Simplify the code
to reflect that.

closes: vim/vim#13117

d25021cf03

Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>
This commit is contained in:
zeertzjq 2023-09-19 06:41:59 +08:00 committed by GitHub
parent 46402c16c0
commit bbde37fd64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -59,11 +59,7 @@
#define un_Magic(x) ((x) + 256)
#define is_Magic(x) ((x) < 0)
// We should define ftpr as a pointer to a function returning a pointer to
// a function returning a pointer to a function ...
// This is impossible, so we declare a pointer to a function returning a
// pointer to a function returning void. This should work for all compilers.
typedef void (*(*fptr_T)(int *, int))(void);
typedef void (*fptr_T)(int *, int);
static int no_Magic(int x)
{
@ -1494,34 +1490,14 @@ static inline char *cstrchr(const char *const s, const int c)
// regsub stuff //
////////////////////////////////////////////////////////////////
// This stuff below really confuses cc on an SGI -- webb
static fptr_T do_upper(int *d, int c)
static void do_upper(int *d, int c)
{
*d = mb_toupper(c);
return (fptr_T)NULL;
}
static fptr_T do_Upper(int *d, int c)
{
*d = mb_toupper(c);
return (fptr_T)do_Upper;
}
static fptr_T do_lower(int *d, int c)
static void do_lower(int *d, int c)
{
*d = mb_tolower(c);
return (fptr_T)NULL;
}
static fptr_T do_Lower(int *d, int c)
{
*d = mb_tolower(c);
return (fptr_T)do_Lower;
}
/// regtilde(): Replace tildes in the pattern by the old pattern.
@ -1886,16 +1862,16 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
} else if (vim_strchr("uUlLeE", (uint8_t)(*src))) {
switch (*src++) {
case 'u':
func_one = (fptr_T)do_upper;
func_one = do_upper;
continue;
case 'U':
func_all = (fptr_T)do_Upper;
func_all = do_upper;
continue;
case 'l':
func_one = (fptr_T)do_lower;
func_one = do_lower;
continue;
case 'L':
func_all = (fptr_T)do_Lower;
func_all = do_lower;
continue;
case 'e':
case 'E':
@ -1954,11 +1930,13 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
} else {
c = utf_ptr2char(src - 1);
}
// Write to buffer, if copy is set.
if (func_one != NULL) {
func_one = (fptr_T)(func_one(&cc, c));
func_one(&cc, c);
func_one = NULL;
} else if (func_all != NULL) {
func_all = (fptr_T)(func_all(&cc, c));
func_all(&cc, c);
} else {
// just copy
cc = c;
@ -2061,11 +2039,10 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
c = utf_ptr2char(s);
if (func_one != (fptr_T)NULL) {
// Turbo C complains without the typecast
func_one = (fptr_T)(func_one(&cc, c));
func_one(&cc, c);
func_one = NULL;
} else if (func_all != (fptr_T)NULL) {
// Turbo C complains without the typecast
func_all = (fptr_T)(func_all(&cc, c));
func_all(&cc, c);
} else { // just copy
cc = c;
}