From 9e2a8c69255aef8f17c9a8589d7a3d18693d6e1b Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Tue, 8 Mar 2022 20:20:57 +0100 Subject: [PATCH] posix mode: disable effect of repeating whitespace char in $IFS ksh has a little-known field splitting feature that conflicts with POSIX: if a single-byte whitespace character (cf. isspace(3)) is repated in $IFS, then field splitting is done as if that character wasn't a whitespace character. An exmaple with the tab character: $ (IFS=$'\t'; val=$'\tone\t\ttwo\t'; set -- $val; echo $#) 2 $ (IFS=$'\t\t'; val=$'\tone\t\ttwo\t'; set -- $val; echo $#) 4 The latter being the same as, for example $ (IFS=':'; val=':one::two:'; set -- $val; echo $#) 4 However, this is incompatible with the POSIX spec and with every other shell except zsh, in which repeating a character in IFS does not have any effect. So the POSIX mode must disable this. src/cmd/ksh93/include/defs.h, src/cmd/ksh93/sh/init.c: - Add sh_invalidate_ifs() function that invalidates the IFS state table by setting the ifsnp discipline struct member to NULL, which will cause the next get_ifs() call to regenerate it. - get_ifs(): Treat a repeated char as S_DELIM even if whitespace, unless --posix is on. src/cmd/ksh93/sh/args.c: - sh_argopts(): Call sh_invalidate_ifs() when enabling or disabling the POSIX option. This is needed to make the change in field splitting behaviour take immediate effect instead of taking effect at the next assignment to IFS. --- NEWS | 3 +++ src/cmd/ksh93/include/defs.h | 1 + src/cmd/ksh93/sh.1 | 17 +++++++++++++---- src/cmd/ksh93/sh/args.c | 2 ++ src/cmd/ksh93/sh/init.c | 15 ++++++++++++++- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 6c6651e78..eea6d32b4 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ Any uppercase BUG_* names are modernish shell bug IDs. - The 'enum' command can now create more than one type per invocation. +- The POSIX compatibility mode has been amended to disable the special handling + of a repeated $IFS whitespace character as non-whitespace. + 2022-02-23: - When reading input from the keyboard, ksh now turns off nonblocking I/O diff --git a/src/cmd/ksh93/include/defs.h b/src/cmd/ksh93/include/defs.h index 6e3395c76..5158be0ab 100644 --- a/src/cmd/ksh93/include/defs.h +++ b/src/cmd/ksh93/include/defs.h @@ -127,6 +127,7 @@ extern Sfdouble_t sh_arith(const char*); extern void *sh_arithcomp(char*); extern pid_t sh_fork(int,int*); extern pid_t _sh_fork(pid_t, int ,int*); +extern void sh_invalidate_ifs(void); extern char *sh_mactrim(char*,int); extern int sh_macexpand(struct argnod*,struct argnod**,int); extern int sh_macfun(const char*,int); diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index b4742c1fa..469dbca1d 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -2033,7 +2033,7 @@ Each single occurrence of an .SM .B IFS -character in the string to be split, +character in the string to be split that is not in the \f2isspace\^\fP character class, and any adjacent characters in .SM @@ -2043,13 +2043,16 @@ One or more characters in .SM .B IFS -that belong to the \f2isspace\^\fP character class, +that belong to the \f2isspace\^\fP character class delimit a field. In addition, if the same \f2isspace\^\fP character appears consecutively inside -.BR IFS , +.B IFS +and the +.B posix +shell option is not on, this character is treated as if it were not in the \f2isspace\^\fP -class, so that if +class - for exmaple, if .BR IFS consists of two .B tab @@ -7685,6 +7688,12 @@ In addition, while on, the \fBposix\fR option disables exporting variable type attributes to the environment for other ksh processes to import; .IP \[bu] +disables the special handling of repeated +.I isspace +class characters in the +.B IFS +variable; +.IP \[bu] causes file descriptors > 2 to be left open when invoking another program; .IP \[bu] disables the \fB&>\fR redirection shorthand; diff --git a/src/cmd/ksh93/sh/args.c b/src/cmd/ksh93/sh/args.c index 8fcb5c515..5e8488553 100644 --- a/src/cmd/ksh93/sh/args.c +++ b/src/cmd/ksh93/sh/args.c @@ -245,6 +245,7 @@ int sh_argopts(int argc,register char *argv[]) off_option(&newflags,SH_BRACEEXPAND); #endif on_option(&newflags,SH_LETOCTAL); + sh_invalidate_ifs(); } on_option(&newflags,o); off_option(&sh.offoptions,o); @@ -262,6 +263,7 @@ int sh_argopts(int argc,register char *argv[]) on_option(&newflags,SH_BRACEEXPAND); #endif off_option(&newflags,SH_LETOCTAL); + sh_invalidate_ifs(); } if(o==SH_XTRACE) trace = 0; diff --git a/src/cmd/ksh93/sh/init.c b/src/cmd/ksh93/sh/init.c index 840abe282..f029b6c7d 100644 --- a/src/cmd/ksh93/sh/init.c +++ b/src/cmd/ksh93/sh/init.c @@ -549,6 +549,18 @@ static void put_ifs(register Namval_t* np,const char *val,int flags,Namfun_t *fp } } +/* Invalidate IFS state table */ +void sh_invalidate_ifs(void) +{ + Namval_t *np = sh_scoped(IFSNOD); + if(np) + { + struct ifs *ip = (struct ifs*)np->nvfun; + if(ip) + ip->ifsnp = 0; + } +} + /* * This is the lookup function for IFS * It keeps the sh.ifstable up to date @@ -574,7 +586,8 @@ static char* get_ifs(register Namval_t* np, Namfun_t *fp) continue; } n = S_DELIM; - if(c== *cp) + /* Treat a repeated char as S_DELIM even if whitespace, unless --posix is on */ + if(c==*cp && !sh_isoption(SH_POSIX)) cp++; else if(c=='\n') n = S_NL;