When trimming variables in subevalvar, process multi-byte characters as one unit instead of their constituent bytes. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- src/expand.c | 192 ++++++++++++++++++++++++++++++++++--------------- src/expand.h | 1 + src/mystring.c | 2 +- src/parser.h | 1 + 4 files changed, 136 insertions(+), 60 deletions(-) diff --git a/src/expand.c b/src/expand.c index ad186b0..60a51b1 100644 --- a/src/expand.c +++ b/src/expand.c @@ -32,27 +32,27 @@ * SUCH DAMAGE. */ -#include <sys/types.h> -#include <sys/time.h> -#include <sys/stat.h> +#include <ctype.h> #include <dirent.h> -#include <unistd.h> -#ifdef HAVE_GETPWNAM -#include <pwd.h> -#endif -#include <stdlib.h> -#include <stdio.h> -#include <inttypes.h> -#include <limits.h> -#include <string.h> #ifdef HAVE_FNMATCH #include <fnmatch.h> #endif #ifdef HAVE_GLOB #include <glob.h> #endif -#include <ctype.h> +#include <inttypes.h> +#include <limits.h> +#ifdef HAVE_GETPWNAM +#include <pwd.h> +#endif +#include <string.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <unistd.h> #include <wchar.h> /* @@ -550,8 +550,10 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc = startp; loc2 = rmesc; do { - int match; const char *s = loc2; + unsigned ml; + int match; + c = *loc2; if (zero) { *loc2 = '\0'; @@ -560,12 +562,26 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; - if (quotes && *loc == (char)CTLESC) + return quotes ? loc : loc2; + + if (!c) + break; + + if (*loc != (char)CTLMBCHAR) { + if (*loc == (char)CTLESC) + loc++; loc++; - loc++; - loc2++; - } while (c); + loc2++; + continue; + } + + if (*++loc == (char)CTLESC) + loc++; + + ml = (unsigned char)*loc; + loc += ml + 3; + loc2 += ml; + } while (1); return 0; } @@ -573,14 +589,16 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *str, int quotes, int zero ) { - int esc = 0; + size_t esc = 0; char *loc; char *loc2; for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { - int match; - char c = *loc2; const char *s = loc2; + char c = *loc2; + unsigned ml; + int match; + if (zero) { *loc2 = '\0'; s = rmesc; @@ -588,17 +606,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; + return quotes ? loc : loc2; loc--; - if (quotes) { - if (--esc < 0) { - esc = esclen(startp, loc); - } - if (esc % 2) { - esc--; - loc--; - } + if (!esc--) + esc = esclen(startp, loc); + if (esc % 2) { + esc--; + loc--; + continue; } + if (*loc != (char)CTLMBCHAR) + continue; + + ml = (unsigned char)*--loc; + loc -= ml + 2; + if (*loc == (char)CTLESC) + loc--; + loc2 -= ml - 1; } return 0; } @@ -652,14 +676,11 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, nstrloc = str - (char *)stackblock(); } - rmesc = startp; - if (quotes) { - rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) - rmescend = expdest; - startp = stackblock() + startloc; - str = stackblock() + nstrloc; - } + rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); + if (rmesc != startp) + rmescend = expdest; + startp = stackblock() + startloc; + str = stackblock() + nstrloc; rmescend--; /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ @@ -669,16 +690,29 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, endp = stackblock() + strloc - 1; loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero); - if (loc) { - if (zero) { - memmove(startp, loc, endp - loc); - loc = startp + (endp - loc); + if (!loc) { + if (quotes) { + rmesc = startp; + rmescend = endp; } - *loc = '\0'; - } else - loc = endp; + } else if (!quotes) { + if (zero) + rmesc = loc; + else + rmescend = loc; + } else if (zero) { + rmesc = loc; + rmescend = endp; + } else { + rmesc = startp; + rmescend = loc; + } + + memmove(startp, rmesc, rmescend - rmesc); + loc = startp + (rmescend - rmesc); out: + *loc = '\0'; amount = loc - expdest; STADJUST(amount, expdest); @@ -704,6 +738,7 @@ evalvar(char *p, int flag) ssize_t varlen; int discard; int quoted; + int mbchar; varflags = *p++ & ~VSBIT; subtype = varflags & VSTYPE; @@ -713,8 +748,18 @@ evalvar(char *p, int flag) startloc = expdest - (char *)stackblock(); p = strchr(p, '=') + 1; + mbchar = 0; + switch (subtype) { + case VSTRIMLEFT: + case VSTRIMLEFTMAX: + case VSTRIMRIGHT: + case VSTRIMRIGHTMAX: + mbchar = EXP_MBCHAR; + break; + } + again: - varlen = varvalue(var, varflags, flag, quoted); + varlen = varvalue(var, varflags, flag | mbchar, quoted); if (varflags & VSNUL) varlen--; @@ -801,7 +846,7 @@ static char *chtodest(int c, int flags, char *out) { const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX; - if ((flags & QUOTES_ESC) && + if ((flags & (QUOTES_ESC | EXP_MBCHAR)) && ((syntax[c] == CCTL) || (flags & EXP_QUOTED && syntax[c] == CBACK))) USTPUTC(CTLESC, out); @@ -823,9 +868,13 @@ static size_t memtodest(const char *p, size_t len, int flags) if (unlikely(!len)) return 0; - q = makestrspace(len * 2, expdest); + /* CTLMBCHAR, 2, c, c, 2, CTLMBCHAR */ + q = makestrspace(len * 3, expdest); do { + mbstate_t mbs = {}; + size_t ml; + c = (signed char)*p++; if (c) @@ -833,19 +882,30 @@ static size_t memtodest(const char *p, size_t len, int flags) else if (!(flags & EXP_KEEPNUL)) continue; - if (c < 0) { - mbstate_t mbs = {}; + if (c >= 0) + goto copy; - p--; - do { - q = chtodest(c, flags, q); - } while (mbrlen(p++, 1, &mbs) == -2 && - (c = *p, --len)); - if (!len) - break; - continue; + ml = mbrlen(p - 1, len, &mbs); + if (ml == -1 || ml == -2 || ml < 2 || ml > MB_LEN_MAX) + goto copy; + + if ((flags & (QUOTES_ESC | EXP_MBCHAR))) { + USTPUTC(CTLMBCHAR, q); + USTPUTC(ml, q); } + q = mempcpy(q, p - 1, ml); + + if ((flags & (QUOTES_ESC | EXP_MBCHAR))) { + USTPUTC(ml, q); + USTPUTC(CTLMBCHAR, q); + } + + p += ml - 1; + len -= ml - 1; + continue; + +copy: q = chtodest(c, flags, q); } while (--len); @@ -1720,6 +1780,8 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + unsigned ml; + if (*p == (char)CTLQUOTEMARK) { p++; inquotes ^= globbing; @@ -1743,6 +1805,18 @@ add_escape: } } notescaped = globbing; + + if (*p != (char)CTLMBCHAR) + goto copy; + + if (*++p == (char)CTLESC) + p++; + + ml = (unsigned char)*p++; + q = mempcpy(q, p, ml); + p += ml + 2; + continue; + copy: *q++ = *p++; } diff --git a/src/expand.h b/src/expand.h index 49a18f9..e5a990e 100644 --- a/src/expand.h +++ b/src/expand.h @@ -60,6 +60,7 @@ struct arglist { #define EXP_QUOTED 0x100 /* expand word in double quotes */ #define EXP_KEEPNUL 0x200 /* do not skip NUL characters */ #define EXP_DISCARD 0x400 /* discard result of expansion */ +#define EXP_MBCHAR 0x800 /* mark multi-byte characters */ struct jmploc; diff --git a/src/mystring.c b/src/mystring.c index 5eace6c..77b457c 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -67,7 +67,7 @@ const char cqchars[] = { #ifdef HAVE_FNMATCH '^', #endif - CTLESC, CTLQUOTEMARK, 0 + CTLESC, CTLMBCHAR, CTLQUOTEMARK, 0 }; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; diff --git a/src/parser.h b/src/parser.h index 433573d..14bfc4f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -44,6 +44,7 @@ union node; #define CTLVAR -126 /* variable defn */ #define CTLENDVAR -125 #define CTLBACKQ -124 +#define CTLMBCHAR -123 #define CTLARI -122 /* arithmetic expression */ #define CTLENDARI -121 #define CTLQUOTEMARK -120 -- 2.39.2