Add CTLMBCHAR support to pmatch. POSIX equivalence classes and collating symbols are not unsupported. Enable CTLMBCHAR generation in mbtodest. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- src/eval.c | 3 +- src/expand.c | 351 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 217 insertions(+), 137 deletions(-) diff --git a/src/eval.c b/src/eval.c index d169eb8..32f1e64 100644 --- a/src/eval.c +++ b/src/eval.c @@ -451,7 +451,8 @@ evalcase(union node *n, int flags) lineno -= funcline - 1; arglist.lastp = &arglist.list; - expandarg(n->ncase.expr, &arglist, EXP_TILDE); + expandarg(n->ncase.expr, &arglist, FNMATCH_IS_ENABLED ? EXP_TILDE : + EXP_TILDE | EXP_MBCHAR); for (cp = n->ncase.cases ; cp && evalskip == 0 ; cp = cp->nclist.next) { for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) { if (casematch(patp, arglist.list->text)) { diff --git a/src/expand.c b/src/expand.c index 8f30e46..a3b81d5 100644 --- a/src/expand.c +++ b/src/expand.c @@ -85,7 +85,6 @@ #define RMESCAPE_GLOB 0x2 /* Add backslashes for glob */ #define RMESCAPE_GROW 0x8 /* Grow strings instead of stalloc */ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ -#define RMESCAPE_EMETA 0x20 /* Remove backslashes too */ /* Add CTLESC when necessary. */ #define QUOTES_ESC (EXP_FULL | EXP_CASE) @@ -141,7 +140,7 @@ STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); STATIC void addfname(char *); STATIC int patmatch(char *, const char *); -STATIC int pmatch(const char *, const char *); +STATIC int pmatch(char *, const char *); static size_t cvtnum(intmax_t num, int flags); STATIC size_t esclen(const char *, const char *); STATIC void varunset(const char *, const char *, const char *, int) @@ -156,6 +155,11 @@ STATIC void varunset(const char *, const char *, const char *, int) STATIC inline char * preglob(const char *pattern, int flag) { + if (FNMATCH_IS_ENABLED) { + if (!flag) + flag = RMESCAPE_GROW; + flag |= RMESCAPE_ALLOC; + } flag |= RMESCAPE_GLOB; return _rmescapes((char *)pattern, flag); } @@ -582,28 +586,31 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc = startp; loc2 = rmesc; do { - const char *s = loc2; + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; unsigned mb; unsigned ml; int match; - c = *loc2; + c = *s; if (zero) { - *loc2 = '\0'; - s = rmesc; + *s = '\0'; + s = FNMATCH_IS_ENABLED ? rmesc : startp; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return quotes ? loc : loc2; + return FNMATCH_IS_ENABLED && quotes ? loc : loc2; if (!c) break; mb = mbnext(loc); loc += (mb & 0xff) + (mb >> 8); - ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; - loc2 += ml; + if (unlikely(FNMATCH_IS_ENABLED || !quotes)) { + ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; + loc2 += ml; + } else + loc2 = loc; } while (1); return 0; } @@ -616,21 +623,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *loc; char *loc2; - for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { - const char *s = loc2; - char c = *loc2; + for (loc = endp, loc2 = rmescend;; + FNMATCH_IS_ENABLED ? loc2-- : (loc2 = loc)) { + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; + char c = *s; unsigned ml; int match; if (zero) { - *loc2 = '\0'; + *s = '\0'; s = rmesc; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return quotes ? loc : loc2; - loc--; + return FNMATCH_IS_ENABLED && quotes ? loc : loc2; + if (--loc < startp) + break; if (!esc--) esc = esclen(startp, loc); if (esc % 2) { @@ -645,7 +654,8 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, loc -= ml + 2; if (*loc == (char)CTLESC) loc--; - loc2 -= ml - 1; + if (FNMATCH_IS_ENABLED) + loc2 -= ml - 1; } return 0; } @@ -691,19 +701,21 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, #endif rmescend = stackblock() + strloc; - str = preglob(rmescend, FNMATCH_IS_ENABLED ? - RMESCAPE_ALLOC | RMESCAPE_GROW : 0); + str = preglob(rmescend, 0); if (FNMATCH_IS_ENABLED) { startp = stackblock() + startloc; rmescend = stackblock() + strloc; nstrloc = str - (char *)stackblock(); } - rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) - rmescend = expdest; - startp = stackblock() + startloc; - str = stackblock() + nstrloc; + rmesc = startp; + if (FNMATCH_IS_ENABLED || !quotes) { + rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); + if (rmesc != startp) + rmescend = expdest; + startp = stackblock() + startloc; + str = stackblock() + nstrloc; + } rmescend--; /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ @@ -894,12 +906,6 @@ static struct mbpair mbtodest(const char *p, char *q, const char *syntax, goto out; } - len = ml; - do { - q = chtodest((signed char)*p++, syntax, q); - } while (--len); - goto out; - if (syntax[CTLMBCHAR] == CCTL) { USTPUTC(CTLMBCHAR, q); USTPUTC(ml, q); @@ -1470,7 +1476,7 @@ static void expandmeta_glob(struct strlist *str) #endif INTOFF; - p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); + p = preglob(str->text, RMESCAPE_HEAP); i = glob64(p, GLOB_ALTDIRFUNC | GLOB_NOMAGIC, 0, &pglob); if (p != str->text) ckfree(p); @@ -1541,13 +1547,15 @@ expandmeta(struct strlist *str) savelastp = exparg.lastp; INTOFF; - p = str->text; + p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); len = strlen(p); expdir_max = len + PATH_MAX; expdir = ckmalloc(expdir_max); expmeta(p, len, 0); ckfree(expdir); + if (p != str->text) + ckfree(p); INTON; if (exparg.lastp == savelastp) { /* @@ -1568,9 +1576,21 @@ nometa: } } -static void expmeta_rmescapes(char *enddir, char *name) +static char *expmeta_rmescapes(char *enddir, const char *name) { - preglob(strcpy(enddir, name), RMESCAPE_EMETA); + const char *p; + + if (!FNMATCH_IS_ENABLED) + return strchrnul(rmescapes(strcpy(enddir, name)), 0); + + p = name; + do { + if (*p == '\\' && p[1]) + p++; + *enddir++ = *p; + } while (*p++); + + return enddir - 1; } static int skipesc(char *p) @@ -1585,8 +1605,7 @@ static int skipesc(char *p) esc = mb & 0xff; if (!esc && p[esc] == '\\' && p[esc + 1]) { - while (p[++esc] == (char)CTLQUOTEMARK) - ; + esc++; mb = mbnext(p + esc); esc += mb & 0xff; @@ -1655,9 +1674,8 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) if (name < start) { c = *start; *start = 0; - expmeta_rmescapes(enddir, name); + enddir = expmeta_rmescapes(enddir, name); *start = c; - enddir += strlen(enddir); } *enddir = 0; cp = expdir; @@ -1673,16 +1691,25 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) } name_len -= endname - name; matchdot = 0; - pat = preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP); + pat = start; + if (FNMATCH_IS_ENABLED) + pat = preglob(pat, RMESCAPE_HEAP); p = pat; - if (*p == '\\') + if (*p == (FNMATCH_IS_ENABLED ? '\\' : (char)CTLESC)) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir64(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; - if (pmatch(pat, dp->d_name)) { + p = dp->d_name; + if (!FNMATCH_IS_ENABLED) { + STARTSTACKSTR(expdest); + strtodest(p, EXP_MBCHAR); + *expdest = 0; + p = stackblock(); + } + if (pmatch(pat, p)) { if (!c) { scopy(dp->d_name, enddir); addfname(expdir); @@ -1706,7 +1733,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) } } } - if (pat != start) + if (FNMATCH_IS_ENABLED && pat != start) ckfree(pat); closedir(dirp); if (c) @@ -1797,52 +1824,48 @@ msort(struct strlist *list, int len) STATIC inline int patmatch(char *pattern, const char *string) { - return pmatch(preglob(pattern, FNMATCH_IS_ENABLED ? - RMESCAPE_ALLOC | RMESCAPE_GROW : 0), - string); + return pmatch(preglob(pattern, 0), string); } -STATIC int ccmatch(const char *p, int chr, const char **r) +static __attribute__((noinline)) int ccmatch(char *p, const char *mbc, int ml, + char **r) { - static const struct class { - char name[10]; - int (*fn)(int); - } classes[] = { - { .name = ":alnum:]", .fn = isalnum }, - { .name = ":cntrl:]", .fn = iscntrl }, - { .name = ":lower:]", .fn = islower }, - { .name = ":space:]", .fn = isspace }, - { .name = ":alpha:]", .fn = isalpha }, - { .name = ":digit:]", .fn = isdigit }, - { .name = ":print:]", .fn = isprint }, - { .name = ":upper:]", .fn = isupper }, - { .name = ":blank:]", .fn = isblank }, - { .name = ":graph:]", .fn = isgraph }, - { .name = ":punct:]", .fn = ispunct }, - { .name = ":xdigit:]", .fn = isxdigit }, - }; - const struct class *class, *end; - - end = classes + sizeof(classes) / sizeof(classes[0]); - for (class = classes; class < end; class++) { - const char *q; - - q = prefix(p, class->name); - if (!q) - continue; - *r = q; - return class->fn(chr); - } + mbstate_t mbst = {}; + wctype_t type; + wchar_t wc; + char *q; *r = 0; - return 0; + + if (*p++ != ':') + return 0; + + q = strstr(p, ":]"); + if (!q) + return 0; + + *q = 0; + type = wctype(p); + *q = ':'; + + if (!type) + return 0; + + *r = q + 2; + + if (mbrtowc(&wc, mbc, ml, &mbst) != ml) + return 0; + + return iswctype(wc, type); } -STATIC int -pmatch(const char *pattern, const char *string) +static int pmatch(char *pattern, const char *string) { - const char *p, *q; + char stop[] = { 0, CTLESC, CTLMBCHAR }; + const char *q; + unsigned mb; + char *p; char c; if (FNMATCH_IS_ENABLED) @@ -1851,36 +1874,43 @@ pmatch(const char *pattern, const char *string) p = pattern; q = string; for (;;) { - switch (c = *p++) { + switch ((signed char)(c = *p++)) { case '\0': goto breakloop; - case '\\': - if (*p) { - c = *p++; - } - goto dft; - case '?': - if (*q++ == '\0') - return 0; + case CTLESC: + c = *p++; break; + case '?': + if (*q == '\0') + return 0; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + continue; case '*': c = *p; while (c == '*') c = *++p; - if (c != '\\' && c != '?' && c != '*' && c != '[') { - while (*q != c) { - if (*q == '\0') + stop[0] = CTLESC; + if (c != '?' && c != '*' && c != '[') + stop[0] = c; + for (;;) { + if (!stop[0]) + q = nullstr; + else if (stop[0] != (char)CTLESC) { + q = strpbrk(q, stop); + if (!q) return 0; - q++; } - } - do { if (pmatch(p, q)) return 1; - } while (*q++ != '\0'); + if (!*q) + break; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + } return 0; case '[': { - const char *startp; + char *startp; int invert, found; char chr; @@ -1891,48 +1921,85 @@ pmatch(const char *pattern, const char *string) p++; } found = 0; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; chr = *q; if (chr == '\0') return 0; c = *p++; do { + unsigned mbp = 0; + const char *mbs = &c; + if (!c) { p = startp; c = '['; goto dft; } if (c == '[') { - const char *r; + char *r; - found |= !!ccmatch(p, chr, &r); + found |= !!ccmatch(p, q, mb > 1 ? + mb - 2 : mb, + &r); if (r) { p = r; continue; } - } else if (c == '\\') + } else if (c == (char)CTLESC) c = *p++; + else if (c == (char)CTLMBCHAR) { + mbp = mbnext(--p); + p += mbp & 0xff; + mbs = p; + mbp >>= 8; + p += mbp; + } if (*p == '-' && p[1] != ']') { p++; - if (*p == '\\') + if (*p == (char)CTLESC) p++; - if (chr >= c && chr <= *p) + else if (*p == CTLMBCHAR) { + mbp = mbnext(p); + p += mbp & 0xff; + p += mbp >> 8; + continue; + } + if (!(mbp | (mb - 1)) && + chr >= c && chr <= *p) found = 1; p++; - } else { - if (chr == c) - found = 1; - } + } else if (!memcmp(mbs, q, mb)) + found = 1; } while ((c = *p++) != ']'); if (found == invert) return 0; - q++; - break; + q += mb; + continue; } -dft: default: - if (*q++ != c) + case CTLMBCHAR: + mb = mbnext(--p); + p += mb & 0xff; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; + + if (memcmp(p - 1, q - 1, mb + 1)) return 0; - break; + + p += mb; + q += mb; + continue; } +dft: + mb = mbnext(q); + if ((mb >> 8) > 1) + return 0; + q += mb & 0xff; + if (*q != c) + return 0; + q += mb >> 8; } breakloop: if (*q != '\0') @@ -1953,7 +2020,6 @@ _rmescapes(char *str, int flag) int notescaped; int globbing; int inquotes; - int expmeta; p = strpbrk(str, cqchars); if (!p) { @@ -1962,7 +2028,6 @@ _rmescapes(char *str, int flag) q = p; r = str; globbing = flag & RMESCAPE_GLOB; - expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0; if (flag & RMESCAPE_ALLOC) { size_t len = p - str; @@ -1992,50 +2057,64 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + int c = (signed char)*p; + int newnesc = globbing; unsigned mb; unsigned ml; - int newnesc = globbing; - if (*p == (char)CTLQUOTEMARK) { + if (c == CTLQUOTEMARK) { p++; inquotes ^= globbing; continue; - } else if (*p == '\\') { + } else if (c == '\\') { /* naked back slash */ newnesc ^= notescaped; /* naked backslashes can only occur outside quotes */ inquotes = 0; - if (expmeta & ~newnesc) { - p++; - goto setnesc; + if (!FNMATCH_IS_ENABLED && notescaped) + c = CTLESC; + } else if (c == CTLESC) { + if ((notescaped ^ inquotes) & inquotes) { + if (FNMATCH_IS_ENABLED) + *q++ = '\\'; + else + q[-1] = '\\'; } - } else if (*p == (char)CTLMBCHAR) { + if (globbing) + *q++ = FNMATCH_IS_ENABLED ? '\\' : CTLESC; + + c = *++p; + } else if (c == CTLMBCHAR) { + unsigned tail = 2; + + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q--; + mb = mbnext(p); ml = mb >> 8; - ml -= 2; - p += mb & 0xff; - q = mempcpy(q, p, ml); - p += ml + 2; - goto setnesc; - } else if (*p == (char)CTLESC) { - p++; - if (expmeta) - ; - else if (notescaped) - *q++ = '\\'; - else if (inquotes) { - *q++ = '\\'; - *q++ = '\\'; + if (!globbing || FNMATCH_IS_ENABLED) { + p += mb & 0xff; + ml -= 2; + } else { + ml += mb & 0xff; + tail = 0; } + + q = mempcpy(q, p, ml); + p += ml + tail; + goto setnesc; } - *q++ = *p++; + *q++ = c; + p++; setnesc: notescaped = newnesc; } + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q[-1] = '\\'; *q = '\0'; - if (flag & RMESCAPE_GROW) { + if (flag & (RMESCAPE_ALLOC | RMESCAPE_GROW)) { expdest = r; STADJUST(q - r + 1, expdest); } -- 2.39.2