On 02/12/2015 23:37, Gioele Barabucci wrote:
Hello,
I am forwarding a bug [1] reported by a Debian user: `read` does not
ignore trailing spaces. The current version of dash is affected by
this bug.
A simple test from the original reporter:
$ dash -c 'echo " a b " | { read v ; echo "<$v>" ; }'
<a b >
$ bash -c 'echo " a b " | { read v ; echo "<$v>" ; }'
<a b>
Other shells like posh and mksh behave like bash.
This is indeed a bug based on the current specification. In the past,
the specification was unclear and the dash interpretation was a
legitimate one, but currently it explicitly spells out that trailing IFS
whitespace gets ignored.
However, unless I'm misreading the spec, mksh and bash don't seem to
implement it properly either: only trailing IFS whitespace is supposed
to be dropped. IFS non-whitespace is supposed to remain, even at the end
of the input. mksh and bash remove it, posh and zsh leave it in:
$ for shell in bash mksh posh zsh; do printf %s: "$shell"; $shell -c
'IFS=,; echo a, | { read v; echo "<$v>"; }'; done
bash:<a>
mksh:<a>
posh:<a,>
zsh:<a,>
As far as I can tell, the posh/zsh behaviour is the correct behaviour,
but I'm not convinced yet my interpretation is correct.
Attached is a not fully tested proof of concept to implement the
posh/zsh behaviour in dash by extending ifsbreakup() to allow specifying
a maximum number of arguments instead of fixing it up in
readcmd_handle_line(). It returns <a b> in your test, and <a,> in mine.
Feedback welcome.
Cheers,
Harald van Dijk
This error is reproducible with dash 0.5.7 and with the current master
git master branch, commit 2e5842258bd5b252ffdaa630db09c9a19a9717ca.
[1] https://bugs.debian.org/794965
--
Gioele Barabucci <gioele@xxxxxxxxx>
diff --git a/src/expand.c b/src/expand.c
index b2d710d..6afd562 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -203,7 +203,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag)
* TODO - EXP_REDIR
*/
if (flag & EXP_FULL) {
- ifsbreakup(p, &exparg);
+ ifsbreakup(p, 0, &exparg);
*exparg.lastp = NULL;
exparg.lastp = &exparg.list;
expandmeta(exparg.list, flag);
@@ -1016,9 +1016,11 @@ recordregion(int start, int end, int nulonly)
* Break the argument string into pieces based upon IFS and add the
* strings to the argument list. The regions of the string to be
* searched for IFS characters have been stored by recordregion.
+ * If maxargs is non-zero, at most maxargs arguments will be created, by
+ * joining together the last arguments.
*/
void
-ifsbreakup(char *string, struct arglist *arglist)
+ifsbreakup(char *string, int maxargs, struct arglist *arglist)
{
struct ifsregion *ifsp;
struct strlist *sp;
@@ -1054,12 +1056,36 @@ ifsbreakup(char *string, struct arglist *arglist)
start = p;
continue;
}
+ /* If only reading one more argument, combine any field terminators,
+ * except for trailing IFS whitespace. */
+ if (maxargs == 1) {
+ q = p;
+ p++;
+ if (ifsspc) {
+ /* Ignore IFS whitespace at end */
+ for (;;) {
+ if (p >= string + ifsp->endoff) {
+ *q = '\0';
+ goto add;
+ }
+ if (*p == (char)CTLESC)
+ p++;
+ ifsspc = strchr(ifs, *p) && strchr(defifs, *p);
+ p++;
+ if (!ifsspc) {
+ break;
+ }
+ }
+ }
+ continue;
+ }
*q = '\0';
sp = (struct strlist *)stalloc(sizeof *sp);
sp->text = start;
*arglist->lastp = sp;
arglist->lastp = &sp->next;
p++;
+ if (maxargs) maxargs--;
if (!nulonly) {
for (;;) {
if (p >= string + ifsp->endoff) {
diff --git a/src/expand.h b/src/expand.h
index 6a90f67..26dc5b4 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -69,7 +69,7 @@ char *_rmescapes(char *, int);
int casematch(union node *, char *);
void recordregion(int, int, int);
void removerecordregions(int);
-void ifsbreakup(char *, struct arglist *);
+void ifsbreakup(char *, int, struct arglist *);
void ifsfree(void);
/* From arith.y */
diff --git a/src/miscbltin.c b/src/miscbltin.c
index b596fd2..39b9c47 100644
--- a/src/miscbltin.c
+++ b/src/miscbltin.c
@@ -67,28 +67,21 @@
* less fields than variables -> remaining variables unset.
*
* @param line complete line of input
+ * @param ac argument count
* @param ap argument (variable) list
* @param len length of line including trailing '\0'
*/
static void
-readcmd_handle_line(char *s, char **ap)
+readcmd_handle_line(char *s, int ac, char **ap)
{
struct arglist arglist;
struct strlist *sl;
- char *backup;
- char *line;
- /* ifsbreakup will fiddle with stack region... */
- line = stackblock();
s = grabstackstr(s);
- /* need a copy, so that delimiters aren't lost
- * in case there are more fields than variables */
- backup = sstrdup(line);
-
arglist.lastp = &arglist.list;
- ifsbreakup(s, &arglist);
+ ifsbreakup(s, ac, &arglist);
*arglist.lastp = NULL;
ifsfree();
@@ -104,21 +97,6 @@ readcmd_handle_line(char *s, char **ap)
return;
}
- /* remaining fields present, but no variables left. */
- if (!ap[1] && sl->next) {
- size_t offset;
- char *remainder;
-
- /* FIXME little bit hacky, assuming that ifsbreakup
- * will not modify the length of the string */
- offset = sl->text - s;
- remainder = backup + offset;
- rmescapes(remainder);
- setvar(*ap, remainder, 0);
-
- return;
- }
-
/* set variable to field */
rmescapes(sl->text);
setvar(*ap, sl->text, 0);
@@ -211,7 +189,7 @@ start:
out:
recordregion(startloc, p - (char *)stackblock(), 0);
STACKSTRNUL(p);
- readcmd_handle_line(p + 1, ap);
+ readcmd_handle_line(p + 1, argc - (ap - argv), ap);
return status;
}