Hello. Vladimir N. Oleynik wrote:
$ a=abc $ echo ${#a#a} dash-0.5.11.3 produce: 3a bash: ${#a#a}: bad substitution
Ok. I rewrote the var substitutions parser. Its now: * strong find bad substitution * support indirections ${!var} * may be good skip unsupport syntax: ${v/p/x} ${v//p/x} ${v:offs:l} ${v@trans} ${!prev*} ${!prev@} and mark as "bad substitution". my variant of parser.c ready for parse this syntax, but expand.c very knotty for me for realize this syntax. * also small simplify for expand $- :) --w vodz
--- expand.c.orig 2021-02-08 23:23:54.000000000 +0400 +++ expand.c 2021-02-10 12:58:11.508816065 +0400 @@ -854,6 +854,9 @@ (flags & EXP_DISCARD); ssize_t len = 0; char c; + char str_opts[NOPTS + 1]; + int len_for_num = max_int_length(sizeof(int)); + char str_num[len_for_num]; if (!subtype) { if (discard) @@ -866,6 +869,7 @@ flags &= discard ? ~QUOTES_ESC : ~0; sep = (flags & EXP_FULL) << CHAR_BIT; +again: switch (*name) { case '$': num = rootpid; @@ -881,18 +885,16 @@ if (num == 0) return -1; numvar: - len = cvtnum(num, flags); - break; + fmtstr(p = str_num, len_for_num, "%d", num); + goto value; case '-': - p = makestrspace(NOPTS, expdest); for (i = NOPTS - 1; i >= 0; i--) { - if (optlist[i] && optletters[i]) { - USTPUTC(optletters[i], p); - len++; - } + if (optlist[i] && optletters[i]) + str_opts[len++] = optletters[i]; } - expdest = p; - break; + str_opts[len] = 0; + p = str_opts; + goto value; case '@': if (quoted && sep) goto param; @@ -945,9 +947,37 @@ goto value; default: p = lookupvar(name); -value: if (!p) return -1; +value: + if (subtype == VSINDIRECT) { + char *v; + int ok = 0, alpha = 0; + + for (v = p; *v; v++) { + if (is_name(*v)) { + if (alpha < 0) + goto errv; + else + alpha = 1; + } else if (isdigit(*v)) { + if (!alpha) alpha = -1; + else if (alpha == -2) goto errv; + } else if (ok == 0 && is_special(*v)) { + alpha = -2; + } else { + errv: + ok = 0; break; + } + ok = 1; + } + + if (!ok) + sh_error("Bad variable name for indirect"); + name = p; + subtype = 0; + goto again; + } len = strtodest(p, flags); break; --- parser.h.orig 2021-02-10 01:47:34.000000000 +0400 +++ parser.h 2021-02-10 13:13:59.276818541 +0400 @@ -48,20 +48,25 @@ #define CTL_LAST -120 /* last 'special' character */ /* variable substitution byte (follows CTLVAR) */ -#define VSTYPE 0x0f /* type of variable substitution */ -#define VSNUL 0x10 /* colon--treat the empty string as unset */ - +#define VSTYPE 0x0f /* type of variable substitution */ +#define VSNUL 0x10 /* colon+VSTYPE -- treat the empty string as unset, + or unrealized ${var:offset[:lenght]} */ +#define VSBADSUB 0 /* bad substitunion */ /* values of VSTYPE field */ -#define VSNORMAL 0x1 /* normal variable: $var or ${var} */ -#define VSMINUS 0x2 /* ${var-text} */ -#define VSPLUS 0x3 /* ${var+text} */ -#define VSQUESTION 0x4 /* ${var?message} */ -#define VSASSIGN 0x5 /* ${var=text} */ -#define VSTRIMRIGHT 0x6 /* ${var%pattern} */ -#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */ -#define VSTRIMLEFT 0x8 /* ${var#pattern} */ -#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */ -#define VSLENGTH 0xa /* ${#var} */ +#define VSNORMAL 0x1 /* normal variable: $var or ${var} */ +#define VSMINUS 0x2 /* ${var-text} */ +#define VSPLUS 0x3 /* ${var+text} */ +#define VSQUESTION 0x4 /* ${var?message} */ +#define VSASSIGN 0x5 /* ${var=text} */ +#define VSTRIMRIGHT 0x6 /* ${var%pattern} */ +#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */ +#define VSTRIMLEFT 0x8 /* ${var#pattern} */ +#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */ +#define VSLENGTH 0xa /* ${#var} */ +#define VSINDIRECT 0xb /* ${!var} */ +#define VSPATERN 0xc /* ${var/pattern/string} unrealized */ +#define VSPATERNALL 0xd /* ${var//pattern/string} unrealized */ +#define VSTRANS 0xe /* ${var@transformation} unrealized */ /* values of checkkwd variable */ #define CHKALIAS 0x1 --- parser.c.orig 2021-02-10 01:47:50.000000000 +0400 +++ parser.c 2021-02-10 12:47:27.609814383 +0400 @@ -1235,8 +1235,9 @@ parsesub: { int subtype; int typeloc; - char *p; - static const char types[] = "}-+?="; + const char *newsyn; + int lenght; + int alpha; c = pgetc_eatbnl(); if ( @@ -1246,6 +1247,7 @@ ) { USTPUTC('$', out); pungetc(); + goto parsesub_return; } else if (c == '(') { /* $(command) or $((arith)) */ if (pgetc_eatbnl() == '(') { PARSEARITH(); @@ -1253,113 +1255,176 @@ pungetc(); PARSEBACKQNEW(); } - } else { - const char *newsyn = synstack->syntax; + goto parsesub_return; + } + + newsyn = synstack->syntax; + USTPUTC(CTLVAR, out); + typeloc = out - (char *)stackblock(); + STADJUST(1, out); - USTPUTC(CTLVAR, out); - typeloc = out - (char *)stackblock(); - STADJUST(1, out); + if (c != '{') { subtype = VSNORMAL; - if (likely(c == '{')) { - c = pgetc_eatbnl(); - subtype = 0; - } -varname: - if (is_name(c)) { - do { - STPUTC(c, out); - c = pgetc_eatbnl(); - } while (is_in_name(c)); - } else if (is_digit(c)) { + if(is_special(c)) { + /* $[!#$*-0123456789?@] */ + STPUTC(c, out); + } else { + /* found simple $var */ do { STPUTC(c, out); c = pgetc_eatbnl(); - } while (!subtype && is_digit(c)); - } else if (c != '}') { - int cc = c; + } while(is_in_name(c)); + pungetc (); + } + goto eov; + } + subtype = lenght = alpha = 0; + c = pgetc_eatbnl(); + if (c == '#') { + /* very special */ + c = pgetc_eatbnl(); + if (is_in_name(c) || (is_special(c) && c != '#')) { + /* lenght($var) */ + subtype = VSLENGTH; + goto varc; + } + if (c != '#') { + /* $# and may be mods */ + var_is_num_arg: + pungetc (); + c = '#'; + goto var_spec; + } + /* ${##... */ + c = pgetc_eatbnl(); + if (c == '}') { + /* ${##} as lenght($#) */ + subtype = VSLENGTH; + goto var_is_num_arg; + } + /* ${##(#)[#]*...} as (max)stripleft($#, [#]*...) */ + pungetc (); + STPUTC('#', out); + c = '#'; + goto var_num_sub; + } else if (c == '!') { + c = pgetc_eatbnl(); + if ((is_special(c) && c != '@' && c != '*') || is_in_name(c)) { + /* ${!var} */ + subtype = VSINDIRECT; + goto varc; + } else { + /* $! and may be mods */ + pungetc (); + c = '!'; + goto var_spec; + } + } + while (1) { + if (c == '}') { + if (!lenght) + subtype = VSBADSUB; /* bad: ${} */ + if (subtype) + pungetc (); + else + subtype = VSNORMAL; + goto eov; + } + varc: + if (is_name(c)) { + if (alpha < 0) + subtype = VSBADSUB; /* bad: ${ [!#]? [0-9]*|specvar and _alpha* */ + else + alpha = 1; + } else if (isdigit(c)) { + if (!alpha) alpha = -1; /* ${ [!#]? [0-9]* and _alpha* -> bad */ + else if (alpha == -2) subtype = VSBADSUB; /* bad: ${ [!#]? and specvar and [0-9]* */ + } else if(!lenght && is_special(c)) { + var_spec: + alpha = -2; /* ${ [!#]? and specvar and is_in_name* -> bad */ + } else + break; + STPUTC(c, out); + lenght = 1; + c = pgetc_eatbnl(); + } + if (!subtype) { + var_num_sub: + lenght = 0; + switch (c) { + case '#': subtype = VSTRIMLEFT; lenght = 2; break; + case '%': subtype = VSTRIMRIGHT; lenght = 2; break; + case '/': subtype = VSPATERN; lenght = 2; break; + case '@': subtype = VSTRANS; lenght = 1; break; + } + if (lenght) { + if (lenght == 2) { + if (c == pgetc_eatbnl()) + subtype |= 1; + else + pungetc (); + } + newsyn = BASESYNTAX; + goto eov; + } + if (c == ':') { c = pgetc_eatbnl(); - - if (!subtype && cc == '#') { - subtype = VSLENGTH; - - if (c == '_' || isalnum(c)) - goto varname; - - cc = c; - c = pgetc_eatbnl(); - if (cc == '}' || c != '}') { - pungetc(); - subtype = 0; - c = cc; - cc = '#'; - } + if (c == '}') { + /* ${var:} bad: empty expr syntax eq 0, + but for ${@}/${*} default is 1 + its a conflict, simple - set as error */ + subtype = VSBADSUB; + pungetc (); + goto eov; } - - if (!is_special(cc)) { - if (subtype == VSLENGTH) - subtype = 0; - goto badsub; + subtype = VSNUL; + } + { + static const char types[] = "-+?="; + const char *p = strchr(types, c); + + if (p != NULL) { + subtype |= VSMINUS + (p - types); + goto eov; } + } - USTPUTC(cc, out); - } else - goto badsub; - - if (subtype == 0) { - int cc = c; - - switch (c) { - case ':': - subtype = VSNUL; - c = pgetc_eatbnl(); - /*FALLTHROUGH*/ - default: - p = strchr(types, c); - if (p == NULL) - break; - subtype |= p - types + VSNORMAL; - break; - case '%': - case '#': - subtype = c == '#' ? VSTRIMLEFT : - VSTRIMRIGHT; - c = pgetc_eatbnl(); - if (c == cc) - subtype++; - else - pungetc(); - - newsyn = BASESYNTAX; - break; - } + pungetc (); + if (subtype == VSNUL) { + newsyn = ARISYNTAX; /* ${var:offset[:lenght]} */ } else { -badsub: - pungetc(); + /* unknown char for subtype==VSNORMAL */ + subtype = VSBADSUB; } + } else { + /* expected '}' but unknown char for subtype==lenght|indirect */ + subtype = VSBADSUB; + pungetc (); + } - if (newsyn == ARISYNTAX) - newsyn = DQSYNTAX; + eov: + if (subtype == VSBADSUB) + newsyn = BASESYNTAX; - if ((newsyn != synstack->syntax || synstack->innerdq) && - subtype != VSNORMAL) { - synstack_push(&synstack, - synstack->prev ?: - alloca(sizeof(*synstack)), - newsyn); - - synstack->varpushed++; - synstack->dblquote = newsyn != BASESYNTAX; - } - - *((char *)stackblock() + typeloc) = subtype; - if (subtype != VSNORMAL) { - synstack->varnest++; - if (synstack->dblquote) - synstack->dqvarnest++; - } - STPUTC('=', out); + if ((newsyn != synstack->syntax || synstack->innerdq) && subtype != VSNORMAL) { + synstack_push(&synstack, synstack->prev ?: alloca(sizeof(*synstack)), newsyn); + + synstack->varpushed++; + synstack->dblquote = newsyn != BASESYNTAX; + } + + /* unrealized -> to VSBADSUB */ + if (subtype == VSNUL || subtype == VSPATERN || subtype == VSPATERNALL || subtype == VSTRANS) { + subtype = VSBADSUB; + } + *((char *)stackblock() + typeloc) = subtype; + if (subtype != VSNORMAL) { + synstack->varnest++; + if (synstack->dblquote) + synstack->dqvarnest++; } + STPUTC('=', out); goto parsesub_return; }