From 3ce064bbba8246a9212bb56f53f2090258b22559 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Mon, 11 Jul 2022 00:35:10 +0200 Subject: [PATCH] lex.c: endword(): fix out-of-bounds index to state table The lexer use 256-byte state tables (see data/lexstates.c), one byte per possible value for the (unsigned) char type. But the sp variable used as an index to a state table in loops like this... while((n = state[*sp++]) == 0) ; ...is a char*, a pointer to a char. The C standard does not define if the char type is signed or not (!). On clang and gcc, it is signed. That means that, whenever a single-byte, high-bit (> 127) character is encountered, the value wraps around to negative, and a read occurs outside of the actual state table, causing potentially incorrect behaviour or a crash. src/cmd/ksh93/sh/lex.c: - endword(): Make sp and three related variables explicitly unsigned char pointers. This requires a bunch of annoying typecasts to stop compilers complaining; so be it. - To avoid even more typecasts, make stack_shift() follow suit. - Reorder variable declarations for legibility. --- NEWS | 3 +++ src/cmd/ksh93/sh/lex.c | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 85fe5a77f..cef2c2a17 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ Any uppercase BUG_* names are modernish shell bug IDs. - Fixed a potential crash on retrieving an empty line from the command history. +- Fixed a potential crash in the lexical analyser on processing single-byte + characters with the highest bit set. + 2022-07-09: - Fixed a bug that broke '[[ ... ]]' test expressions for the command diff --git a/src/cmd/ksh93/sh/lex.c b/src/cmd/ksh93/sh/lex.c index bce4b3061..3062d1abe 100644 --- a/src/cmd/ksh93/sh/lex.c +++ b/src/cmd/ksh93/sh/lex.c @@ -2106,15 +2106,15 @@ noreturn void sh_syntax(Lex_t *lp) UNREACHABLE(); } -static char *stack_shift(register char *sp,char *dp) +static unsigned char *stack_shift(unsigned char *sp, unsigned char *dp) { - register char *ep; - register int offset = stktell(sh.stk); - register int left = offset-(sp-stkptr(sh.stk,0)); - register int shift = (dp+1-sp); + unsigned char *ep; + int offset = stktell(sh.stk); + int left = offset - (sp - (unsigned char*)stkptr(sh.stk, 0)); + int shift = (dp+1-sp); offset += shift; stkseek(sh.stk,offset); - sp = stkptr(sh.stk,offset); + sp = (unsigned char*)stkptr(sh.stk,offset); ep = sp - shift; while(left--) *--sp = *--ep; @@ -2131,15 +2131,12 @@ static char *stack_shift(register char *sp,char *dp) */ static struct argnod *endword(int mode) { - register const char *state = sh_lexstates[ST_NESTED]; - register int n; - register char *sp,*dp; - register int inquote=0, inlit=0; /* set within quoted strings */ - struct argnod* argp=0; - char *ep=0, *xp=0; - int bracket=0; + const char *const state = sh_lexstates[ST_NESTED]; + unsigned char *sp, *dp, *ep=0, *xp=0; /* must be unsigned: pointed-to values used as index to 256-byte state table */ + int inquote=0, inlit=0; /* set within quoted strings */ + int n, bracket=0; sfputc(sh.stk,0); - sp = stkptr(sh.stk,ARGVAL); + sp = (unsigned char*)stkptr(sh.stk,ARGVAL); if(mbwide()) { do @@ -2178,13 +2175,16 @@ static struct argnod *endword(int mode) switch(n) { case S_EOF: - stkseek(sh.stk,dp-stkptr(sh.stk,0)); + { + struct argnod* argp=0; + stkseek(sh.stk,dp - (unsigned char*)stkptr(sh.stk,0)); if(mode<=0) { argp = (struct argnod*)stkfreeze(sh.stk,0); argp->argflag = ARG_RAW|ARG_QUOTED; } return(argp); + } case S_LIT: if(!(inquote&1)) { @@ -2195,8 +2195,8 @@ static struct argnod *endword(int mode) if(ep) { *dp = 0; - stresc(ep); - dp = ep+ strlen(ep); + stresc((char*)ep); + dp = ep + strlen((char*)ep); } ep = 0; } @@ -2300,7 +2300,7 @@ static struct argnod *endword(int mode) { inquote >>= 1; if(xp) - dp = sh_checkid(xp,dp); + dp = (unsigned char*)sh_checkid((char*)xp,(char*)dp); xp = 0; if(--bracket<=0 && mode<0) inquote = 1;