When trimming variables in subevalvar, process multi-byte characters as one unit instead of their constituent bytes. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- src/expand.c | 196 ++++++++++++++++++++++++++++++++++--------------- src/expand.h | 1 + src/mystring.c | 2 +- src/parser.h | 1 + 4 files changed, 138 insertions(+), 62 deletions(-) diff --git a/src/expand.c b/src/expand.c index 5260d16..b627c7a 100644 --- a/src/expand.c +++ b/src/expand.c @@ -32,27 +32,27 @@ * SUCH DAMAGE. */ -#include <sys/types.h> -#include <sys/time.h> -#include <sys/stat.h> +#include <ctype.h> #include <dirent.h> -#include <unistd.h> -#ifdef HAVE_GETPWNAM -#include <pwd.h> -#endif -#include <stdlib.h> -#include <stdio.h> -#include <inttypes.h> -#include <limits.h> -#include <string.h> #ifdef HAVE_FNMATCH #include <fnmatch.h> #endif #ifdef HAVE_GLOB #include <glob.h> #endif -#include <ctype.h> +#include <inttypes.h> +#include <limits.h> +#ifdef HAVE_GETPWNAM +#include <pwd.h> +#endif +#include <string.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <unistd.h> #include <wchar.h> /* @@ -543,8 +543,10 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc = startp; loc2 = rmesc; do { - int match; const char *s = loc2; + unsigned ml; + int match; + c = *loc2; if (zero) { *loc2 = '\0'; @@ -553,12 +555,26 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; - if (quotes && *loc == (char)CTLESC) + return quotes ? loc : loc2; + + if (!c) + break; + + if (*loc != (char)CTLMBCHAR) { + if (*loc == (char)CTLESC) + loc++; loc++; - loc++; - loc2++; - } while (c); + loc2++; + continue; + } + + if (*++loc == (char)CTLESC) + loc++; + + ml = (unsigned char)*loc; + loc += ml + 3; + loc2 += ml; + } while (1); return 0; } @@ -566,14 +582,16 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *str, int quotes, int zero ) { - int esc = 0; + size_t esc = 0; char *loc; char *loc2; for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { - int match; - char c = *loc2; const char *s = loc2; + char c = *loc2; + unsigned ml; + int match; + if (zero) { *loc2 = '\0'; s = rmesc; @@ -581,17 +599,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; + return quotes ? loc : loc2; loc--; - if (quotes) { - if (--esc < 0) { - esc = esclen(startp, loc); - } - if (esc % 2) { - esc--; - loc--; - } + if (!esc--) + esc = esclen(startp, loc); + if (esc % 2) { + esc--; + loc--; + continue; } + if (*loc != (char)CTLMBCHAR) + continue; + + ml = (unsigned char)*--loc; + loc -= ml + 2; + if (*loc == (char)CTLESC) + loc--; + loc2 -= ml - 1; } return 0; } @@ -645,14 +669,11 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, nstrloc = str - (char *)stackblock(); } - rmesc = startp; - if (quotes) { - rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) - rmescend = expdest; - startp = stackblock() + startloc; - str = stackblock() + nstrloc; - } + rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); + if (rmesc != startp) + rmescend = expdest; + startp = stackblock() + startloc; + str = stackblock() + nstrloc; rmescend--; /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ @@ -662,16 +683,29 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, endp = stackblock() + strloc - 1; loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero); - if (loc) { - if (zero) { - memmove(startp, loc, endp - loc); - loc = startp + (endp - loc); + if (!loc) { + if (quotes) { + rmesc = startp; + rmescend = endp; } - *loc = '\0'; - } else - loc = endp; + } else if (!quotes) { + if (zero) + rmesc = loc; + else + rmescend = loc; + } else if (zero) { + rmesc = loc; + rmescend = endp; + } else { + rmesc = startp; + rmescend = loc; + } + + memmove(startp, rmesc, rmescend - rmesc); + loc = startp + (rmescend - rmesc); out: + *loc = '\0'; amount = loc - expdest; STADJUST(amount, expdest); @@ -697,6 +731,7 @@ evalvar(char *p, int flag) ssize_t varlen; int discard; int quoted; + int mbchar; varflags = *p++ & ~VSBIT; subtype = varflags & VSTYPE; @@ -706,8 +741,18 @@ evalvar(char *p, int flag) startloc = expdest - (char *)stackblock(); p = strchr(p, '=') + 1; + mbchar = 0; + switch (subtype) { + case VSTRIMLEFT: + case VSTRIMLEFTMAX: + case VSTRIMRIGHT: + case VSTRIMRIGHTMAX: + mbchar = EXP_MBCHAR; + break; + } + again: - varlen = varvalue(var, varflags, flag, quoted); + varlen = varvalue(var, varflags, flag | mbchar, quoted); if (varflags & VSNUL) varlen--; @@ -813,14 +858,31 @@ static struct mbpair mbtodest(const char *p, char *q, const char *syntax, size_t ml; ml = mbrlen(--p, len, &mbs); - if (ml == -2 || ml == -1 || ml < 2) + if (ml == -2 || ml == -1 || ml < 2) { + q = chtodest((signed char)*p, syntax, q); ml = 1; + goto out; + } len = ml; do { q = chtodest((signed char)*p++, syntax, q); } while (--len); + goto out; + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(CTLMBCHAR, q); + USTPUTC(ml, q); + } + + q = mempcpy(q, p, ml); + + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(ml, q); + USTPUTC(CTLMBCHAR, q); + } + +out: mbp.ml = ml - 1; mbp.ql = q - q0; return mbp; @@ -839,12 +901,14 @@ static size_t memtodest(const char *p, size_t len, int flags) if (unlikely(!len)) return 0; - q = makestrspace(len * 2, expdest); + /* CTLMBCHAR, 2, c, c, 2, CTLMBCHAR */ + q = makestrspace(len * 3, expdest); -#if QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 -#error QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 +#if QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 +#error QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 #endif - if (likely(!(flags & (flags >> 4 | flags >> 8) & QUOTES_ESC))) { + if (likely(!(flags & (flags >> 3 | flags >> 4 | flags >> 8) & + (QUOTES_ESC | EXP_MBCHAR)))) { while (len >= 8) { uint64_t x = *(uint64_t *)(p + count); @@ -861,7 +925,8 @@ static size_t memtodest(const char *p, size_t len, int flags) q += count; p += count; - syntax = flags & QUOTES_ESC ? BASESYNTAX : is_type; + syntax = flags & (QUOTES_ESC | EXP_MBCHAR) ? + BASESYNTAX : is_type; } else syntax = SQSYNTAX; @@ -1753,17 +1818,25 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + unsigned ml; + int newnesc = globbing; + if (*p == (char)CTLQUOTEMARK) { p++; inquotes ^= globbing; continue; - } - if (*p == '\\') { + } else if (*p == '\\') { /* naked back slash */ - notescaped ^= globbing; - goto copy; - } - if (*p == (char)CTLESC) { + newnesc ^= notescaped; + } else if (*p == (char)CTLMBCHAR) { + if (*++p == (char)CTLESC) + p++; + + ml = (unsigned char)*p++; + q = mempcpy(q, p, ml); + p += ml + 2; + goto setnesc; + } else if (*p == (char)CTLESC) { p++; if (notescaped) *q++ = '\\'; @@ -1772,9 +1845,10 @@ _rmescapes(char *str, int flag) *q++ = '\\'; } } - notescaped = globbing; -copy: + *q++ = *p++; +setnesc: + notescaped = newnesc; } *q = '\0'; if (flag & RMESCAPE_GROW) { diff --git a/src/expand.h b/src/expand.h index 49a18f9..a78564f 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,6 +55,7 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ +#define EXP_MBCHAR 0x20 /* mark multi-byte characters */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ diff --git a/src/mystring.c b/src/mystring.c index 978bbb5..afaa508 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -64,7 +64,7 @@ const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; const char cqchars[] = { '\\', - CTLESC, CTLQUOTEMARK, 0 + CTLESC, CTLMBCHAR, CTLQUOTEMARK, 0 }; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; diff --git a/src/parser.h b/src/parser.h index 433573d..14bfc4f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -44,6 +44,7 @@ union node; #define CTLVAR -126 /* variable defn */ #define CTLENDVAR -125 #define CTLBACKQ -124 +#define CTLMBCHAR -123 #define CTLARI -122 /* arithmetic expression */ #define CTLENDARI -121 #define CTLQUOTEMARK -120 -- 2.39.2