On Wed, Mar 07, 2018 at 08:25:07PM +0000, Martijn Dekker wrote: > > This version introduces a parsing bug: > > $ src/dash -c 'x=0; x=$((${x}+1))' > src/dash: 1: Syntax error: Unterminated quoted string > > It is triggered by the ${x} (with braces) within an arithmetic expression. Thanks for testing! Indeed, I wasn't careful enough when changing the syntax to exclude the simple VSNORMAL cases. This patch should fix this problem as well as the one Harald identified: ---8<--- Without a stack of syntaxes we cannot correctly these two cases together: "${a#'$$'}" "${a#"${b-'$$'}"}" A recursive parser also helps in some other corner cases such as nested arithmetic expansion with paratheses. This patch adds a syntax stack allocated from the stack using alloca. As a side-effect this allows us to remove the naked backslashes for patterns within double-quotes, which means that EXP_QPAT also has to go. This patch also fixes removes any backslashes that precede right braces when they are present within a parameter expansion context. The idea of a recursive parser is based on a patch by Harald van Dijk. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> diff --git a/src/expand.c b/src/expand.c index 2a50830..903e250 100644 --- a/src/expand.c +++ b/src/expand.c @@ -83,7 +83,7 @@ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ /* Add CTLESC when necessary. */ -#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT) +#define QUOTES_ESC (EXP_FULL | EXP_CASE) /* Do not skip NUL characters. */ #define QUOTES_KEEPNUL EXP_TILDE @@ -333,16 +333,6 @@ addquote: case CTLESC: startloc++; length++; - - /* - * Quoted parameter expansion pattern: remove quote - * unless inside inner quotes or we have a literal - * backslash. - */ - if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) == - EXP_QPAT && *p != '\\') - break; - goto addquote; case CTLVAR: p = evalvar(p, flag | inquotes); @@ -651,8 +641,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla char *(*scan)(char *, char *, char *, char *, int , int); argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ? - (flag & (EXP_QUOTED | EXP_QPAT) ? - EXP_QPAT : EXP_CASE) : 0)); + EXP_CASE : 0)); STPUTC('\0', expdest); argbackq = saveargbackq; startp = stackblock() + startloc; @@ -1644,7 +1633,6 @@ char * _rmescapes(char *str, int flag) { char *p, *q, *r; - unsigned inquotes; int notescaped; int globbing; @@ -1674,24 +1662,23 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } - inquotes = 0; globbing = flag & RMESCAPE_GLOB; notescaped = globbing; while (*p) { if (*p == (char)CTLQUOTEMARK) { - inquotes = ~inquotes; p++; notescaped = globbing; continue; } + if (*p == '\\') { + /* naked back slash */ + notescaped = 0; + goto copy; + } if (*p == (char)CTLESC) { p++; if (notescaped) *q++ = '\\'; - } else if (*p == '\\' && !inquotes) { - /* naked back slash */ - notescaped = 0; - goto copy; } notescaped = globbing; copy: diff --git a/src/expand.h b/src/expand.h index 26dc5b4..90f5328 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,7 +55,6 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ -#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ diff --git a/src/parser.c b/src/parser.c index 382658e..f329c69 100644 --- a/src/parser.c +++ b/src/parser.c @@ -80,6 +80,18 @@ struct heredoc { int striptabs; /* if set, strip leading tabs */ }; +struct synstack { + const char *syntax; + struct synstack *prev; + struct synstack *next; + int innerdq; + int varpushed; + int dblquote; + int varnest; /* levels of variables expansion */ + int parenlevel; /* levels of parens in arithmetic */ + int dqvarnest; /* levels of variables expansion within double quotes */ +}; + struct heredoc *heredoclist; /* list of here documents to read */ @@ -847,6 +859,21 @@ static int pgetc_eatbnl(void) return c; } +static void synstack_push(struct synstack **stack, struct synstack *next, + const char *syntax) +{ + memset(next, 0, sizeof(*next)); + next->syntax = syntax; + next->next = *stack; + (*stack)->prev = next; + *stack = next; +} + +static void synstack_pop(struct synstack **stack) +{ + *stack = (*stack)->next; +} + /* @@ -876,24 +903,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) size_t len; struct nodelist *bqlist; int quotef; - int dblquote; - int varnest; /* levels of variables expansion */ - int arinest; /* levels of arithmetic expansion */ - int parenlevel; /* levels of parens in arithmetic */ - int dqvarnest; /* levels of variables expansion within double quotes */ int oldstyle; - /* syntax before arithmetic */ - char const *uninitialized_var(prevsyntax); + /* syntax stack */ + struct synstack synbase = { .syntax = syntax }; + struct synstack *synstack = &synbase; - dblquote = 0; if (syntax == DQSYNTAX) - dblquote = 1; + synstack->dblquote = 1; quotef = 0; bqlist = NULL; - varnest = 0; - arinest = 0; - parenlevel = 0; - dqvarnest = 0; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ @@ -901,7 +919,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) if (c == '\034' && doprompt && attyset() && ! equal(termval(), "emacs")) { attyline(); - if (syntax == BASESYNTAX) + if (synstack->syntax == BASESYNTAX) return readtoken(); c = pgetc(); goto loop; @@ -910,9 +928,9 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) CHECKEND(); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ - switch(syntax[c]) { + switch(synstack->syntax[c]) { case CNL: /* '\n' */ - if (syntax == BASESYNTAX) + if (synstack->syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); @@ -922,7 +940,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CCTL: - if (eofmark == NULL || dblquote) + if (eofmark == NULL || synstack->dblquote) USTPUTC(CTLESC, out); USTPUTC(c, out); break; @@ -937,13 +955,17 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) nlprompt(); } else { if ( - dblquote && + synstack->dblquote && c != '\\' && c != '`' && c != '$' && ( c != '"' || eofmark != NULL + ) && ( + c != '}' || + !synstack->varnest ) ) { + USTPUTC(CTLESC, out); USTPUTC('\\', out); } USTPUTC(CTLESC, out); @@ -952,55 +974,64 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } break; case CSQUOTE: - syntax = SQSYNTAX; + synstack->syntax = SQSYNTAX; quotemark: if (eofmark == NULL) { USTPUTC(CTLQUOTEMARK, out); } break; case CDQUOTE: - syntax = DQSYNTAX; - dblquote = 1; + synstack->syntax = DQSYNTAX; + synstack->dblquote = 1; +toggledq: + if (synstack->varnest) + synstack->innerdq ^= 1; goto quotemark; case CENDQUOTE: - if (eofmark && !varnest) + if (eofmark && !synstack->varnest) { USTPUTC(c, out); - else { - if (dqvarnest == 0) { - syntax = BASESYNTAX; - dblquote = 0; - } - quotef++; - goto quotemark; + break; } - break; + + if (synstack->dqvarnest == 0) { + synstack->syntax = BASESYNTAX; + synstack->dblquote = 0; + } + + quotef++; + + if (c == '"') + goto toggledq; + + goto quotemark; case CVAR: /* '$' */ PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ - if (varnest > 0) { - varnest--; - if (dqvarnest > 0) { - dqvarnest--; - } + if (!synstack->innerdq && + synstack->varnest > 0) { + if (!--synstack->varnest && + synstack->varpushed) + synstack_pop(&synstack); + else if (synstack->dqvarnest > 0) + synstack->dqvarnest--; USTPUTC(CTLENDVAR, out); } else { USTPUTC(c, out); } break; case CLP: /* '(' in arithmetic */ - parenlevel++; + synstack->parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (parenlevel > 0) { + if (synstack->parenlevel > 0) { USTPUTC(c, out); - --parenlevel; + --synstack->parenlevel; } else { if (pgetc() == ')') { USTPUTC(CTLENDARI, out); - if (!--arinest) - syntax = prevsyntax; + synstack_pop(&synstack); } else { /* * unbalanced parens @@ -1019,7 +1050,7 @@ quotemark: case CIGN: break; default: - if (varnest == 0) + if (synstack->varnest == 0) goto endword; /* exit outer loop */ if (c != PEOA) { USTPUTC(c, out); @@ -1029,11 +1060,11 @@ quotemark: } } endword: - if (syntax == ARISYNTAX) + if (synstack->syntax == ARISYNTAX) synerror("Missing '))'"); - if (syntax != BASESYNTAX && eofmark == NULL) + if (synstack->syntax != BASESYNTAX && eofmark == NULL) synerror("Unterminated quoted string"); - if (varnest != 0) { + if (synstack->varnest != 0) { /* { */ synerror("Missing '}'"); } @@ -1210,6 +1241,8 @@ parsesub: { PARSEBACKQNEW(); } } else { + const char *newsyn = synstack->syntax; + USTPUTC(CTLVAR, out); typeloc = out - (char *)stackblock(); STADJUST(1, out); @@ -1260,6 +1293,8 @@ varname: } if (subtype == 0) { + int cc = c; + switch (c) { case ':': subtype = VSNUL; @@ -1273,27 +1308,40 @@ varname: break; case '%': case '#': - { - int cc = c; - subtype = c == '#' ? VSTRIMLEFT : - VSTRIMRIGHT; - c = pgetc_eatbnl(); - if (c == cc) - subtype++; - else - pungetc(); - break; - } + subtype = c == '#' ? VSTRIMLEFT : + VSTRIMRIGHT; + c = pgetc_eatbnl(); + if (c == cc) + subtype++; + else + pungetc(); + + newsyn = BASESYNTAX; + break; } } else { badsub: pungetc(); } + + if (newsyn == ARISYNTAX && subtype > VSNORMAL) + newsyn = DQSYNTAX; + + if (newsyn != synstack->syntax) { + synstack_push(&synstack, + synstack->prev ?: + alloca(sizeof(*synstack)), + newsyn); + + synstack->varpushed++; + synstack->dblquote = newsyn != BASESYNTAX; + } + *((char *)stackblock() + typeloc) = subtype; if (subtype != VSNORMAL) { - varnest++; - if (dblquote) - dqvarnest++; + synstack->varnest++; + if (synstack->dblquote) + synstack->dqvarnest++; } STPUTC('=', out); } @@ -1352,7 +1400,7 @@ parsebackq: { continue; } if (pc != '\\' && pc != '`' && pc != '$' - && (!dblquote || pc != '"')) + && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); if (pc > PEOA) { break; @@ -1428,10 +1476,10 @@ done: */ parsearith: { - if (++arinest == 1) { - prevsyntax = syntax; - syntax = ARISYNTAX; - } + synstack_push(&synstack, + synstack->prev ?: alloca(sizeof(*synstack)), + ARISYNTAX); + synstack->dblquote = 1; USTPUTC(CTLARI, out); goto parsearith_return; } -- Email: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line "unsubscribe dash" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html