In order to parse multi-byte characters which may be up to MB_LEN_MAX bytes long, allow enough calls to pungetc to undo a single multi-byte character. Also add a function pungetn to do multiple pungetc calls in a row. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- src/input.c | 58 ++++++++++++++++++++++++++++++++++------------------- src/input.h | 11 +++++----- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/input.c b/src/input.c index fb9858f..c7805ad 100644 --- a/src/input.c +++ b/src/input.c @@ -56,7 +56,7 @@ #include "main.h" #include "myhistedit.h" -#define IBUFSIZ (BUFSIZ + 1) +#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) MKINIT struct parsefile basepf; /* top level input file */ @@ -83,13 +83,16 @@ INIT { } RESET { + int c; + /* clear input buffer */ popallfiles(); - basepf.unget = 0; - while (basepf.lastc[0] != '\n' && - basepf.lastc[0] != PEOF && - !int_pending()) - pgetc(); + + c = PEOF; + if (basepf.nextc - basebuf > basepf.unget) + c = basepf.nextc[-basepf.unget]; + while (c != '\n' && c != PEOF && !int_pending()) + c = pgetc(); } FORKRESET { @@ -131,17 +134,20 @@ static int __pgetc(void) { int c; - if (parsefile->unget) - return parsefile->lastc[--parsefile->unget]; + if (parsefile->unget) { + long unget = -(long)(unsigned)parsefile->unget--; + + if (parsefile->nleft < 0) + return preadbuffer(); + + return parsefile->nextc[unget]; + } if (--parsefile->nleft >= 0) c = (signed char)*parsefile->nextc++; else c = preadbuffer(); - parsefile->lastc[1] = parsefile->lastc[0]; - parsefile->lastc[0] = c; - return c; } @@ -176,9 +182,16 @@ static int stdin_clear_nonblock(void) static int preadfd(void) { + char *buf = parsefile->buf; + int unget; int nr; - char *buf = parsefile->buf; - parsefile->nextc = buf; + + unget = parsefile->nextc - buf; + if (unget > PUNGETC_MAX) + unget = PUNGETC_MAX; + + memmove(buf, parsefile->nextc - unget, unget); + parsefile->nextc = buf += unget; retry: #ifndef SMALL @@ -196,8 +209,8 @@ retry: nr = 0; else { nr = el_len; - if (nr > IBUFSIZ - 1) - nr = IBUFSIZ - 1; + if (nr > BUFSIZ) + nr = BUFSIZ; memcpy(buf, rl_cp, nr); if (nr != el_len) { el_len -= nr; @@ -209,9 +222,9 @@ retry: } else #endif if (parsefile->fd) - nr = read(parsefile->fd, buf, IBUFSIZ - 1); + nr = read(parsefile->fd, buf, BUFSIZ); else { - unsigned len = IBUFSIZ - 1; + unsigned len = BUFSIZ; nr = 0; @@ -348,6 +361,11 @@ done: return (signed char)*parsefile->nextc++; } +void pungetn(int n) +{ + parsefile->unget += n; +} + /* * Undo a call to pgetc. Only two characters may be pushed back. * PEOF may be pushed back. @@ -356,7 +374,7 @@ done: void pungetc(void) { - parsefile->unget++; + pungetn(1); } /* @@ -383,7 +401,6 @@ pushstring(char *s, void *ap) sp->prevnleft = parsefile->nleft; sp->unget = parsefile->unget; sp->spfree = parsefile->spfree; - memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc)); sp->ap = (struct alias *)ap; if (ap) { ((struct alias *)ap)->flag |= ALIASINUSE; @@ -413,7 +430,6 @@ static void popstring(void) parsefile->nextc = sp->prevstring; parsefile->nleft = sp->prevnleft; parsefile->unget = sp->unget; - memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc)); /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/ parsefile->strpush = sp->prev; parsefile->spfree = sp; @@ -457,7 +473,7 @@ setinputfd(int fd, int push) } parsefile->fd = fd; if (parsefile->buf == NULL) - parsefile->buf = ckmalloc(IBUFSIZ); + parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); input_set_lleft(parsefile, parsefile->nleft = 0); plinno = 1; } diff --git a/src/input.h b/src/input.h index 1ff5773..5b4a045 100644 --- a/src/input.h +++ b/src/input.h @@ -34,12 +34,16 @@ * @(#)input.h 8.2 (Berkeley) 5/4/95 */ +#include <limits.h> + #ifdef SMALL #define IS_DEFINED_SMALL 1 #else #define IS_DEFINED_SMALL 0 #endif +#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + /* PEOF (the end of file marker) is defined in syntax.h */ enum { @@ -59,9 +63,6 @@ struct strpush { /* Delay freeing so we can stop nested aliases. */ struct strpush *spfree; - /* Remember last two characters for pungetc. */ - int lastc[2]; - /* Number of outstanding calls to pungetc. */ int unget; }; @@ -87,9 +88,6 @@ struct parsefile { /* Delay freeing so we can stop nested aliases. */ struct strpush *spfree; - /* Remember last two characters for pungetc. */ - int lastc[2]; - /* Number of outstanding calls to pungetc. */ int unget; }; @@ -106,6 +104,7 @@ extern struct parsefile *parsefile; int pgetc(void); int pgetc2(void); void pungetc(void); +void pungetn(int); void pushstring(char *, void *); int setinputfile(const char *, int); void setinputstring(char *); -- 2.39.2