[PATCH v2 1/3] write: correctly handle wide characters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Do this by replacing fputc_careful() (notice that the description said
it's locale-aware ‒ it very much is /not/), with a fputs_careful() which
does the same thing, but if it were to output a byte in the \123 format,
first it checks whether this byte starts a valid multibyte character.

If it does, and that character is printable, write it verbatim.
This means that
  echo 'foo åäö ąęćźżń bar' | write nabijaczleweli pts/4
instead of
  foo \303\245\303\244\303\266
  \304\205\304\231\304\207\305\272\305\274\305\204 bar
yields
  foo åäö ąęćźżń bar
or, more realistically, from a message I got earlier today,
  Filip powiedzia\305\202 \305\274e zap\305\202aci jutro
becomes
  Filip powiedział że zapłaci jutro

Invalid/non-printable sequences get processed as before.

Line reading in write must become getline() to avoid dealing with
partial characters: for example on input consisting solely of
ąęćźżń, where every {1} is an instance, the output would be
  {42}ąęć\305\272żń{84}ąęćź\305\274ń{84}ąęćźż\305\204{39}
with just fixed-512 fgets()

Bug-Debian: https://bugs.debian.org/826596
---
 include/carefulputc.h | 60 +++++++++++++++++++++++++++++++------------
 login-utils/last.c    |  4 +--
 term-utils/write.c    | 25 +++++-------------
 3 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/include/carefulputc.h b/include/carefulputc.h
index 8860b1234..740add68e 100644
--- a/include/carefulputc.h
+++ b/include/carefulputc.h
@@ -1,31 +1,59 @@
 #ifndef UTIL_LINUX_CAREFULPUTC_H
 #define UTIL_LINUX_CAREFULPUTC_H
 
-/*
- * A putc() for use in write and wall (that sometimes are sgid tty).
- * It avoids control characters in our locale, and also ASCII control
- * characters.   Note that the locale of the recipient is unknown.
-*/
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
+#ifdef HAVE_WIDECHAR
+#include <wctype.h>
+#endif
+#include <stdbool.h>
 
 #include "cctype.h"
 
-static inline int fputc_careful(int c, FILE *fp, const char fail)
+/*
+ * A puts() for use in write and wall (that sometimes are sgid tty).
+ * It avoids control and invalid characters.
+ * The locale of the recipient is nominally unknown,
+ * but it's a solid bet that the encoding is compatible with the author's.
+ */
+static inline int fputs_careful(const char * s, FILE *fp, const char ctrl, bool cr_lf)
 {
-	int ret;
+	int ret = 0;
 
-	if (isprint(c) || c == '\a' || c == '\t' || c == '\r' || c == '\n')
-		ret = putc(c, fp);
-	else if (!c_isascii(c))
-		ret = fprintf(fp, "\\%3o", (unsigned char)c);
-	else {
-		ret = putc(fail, fp);
-		if (ret != EOF)
-			ret = putc(c ^ 0x40, fp);
+	for (size_t slen = strlen(s); *s; ++s, --slen) {
+		if (*s == '\n')
+			ret = fputs(cr_lf ? "\r\n" : "\n", fp);
+		else if (isprint(*s) || *s == '\a' || *s == '\t' || *s == '\r')
+			ret = putc(*s, fp);
+		else if (!c_isascii(*s)) {
+#ifdef HAVE_WIDECHAR
+			wchar_t w;
+			size_t clen = mbtowc(&w, s, slen);
+			switch(clen) {
+				case (size_t)-2:  // incomplete
+				case (size_t)-1:  // EILSEQ
+					mbtowc(NULL, NULL, 0);
+				nonprint:
+					ret = fprintf(fp, "\\%3hho", *s);
+					break;
+				default:
+					if(!iswprint(w))
+						goto nonprint;
+					ret = fwrite(s, 1, clen, fp);
+					s += clen - 1;
+					slen -= clen - 1;
+					break;
+			}
+#else
+			ret = fprintf(fp, "\\%3hho", *s);
+#endif
+		} else
+			ret = fputs((char[]){ ctrl, *s ^ 0x40, '\0' }, fp);
+		if (ret < 0)
+			return EOF;
 	}
-	return (ret < 0) ? EOF : 0;
+	return 0;
 }
 
 static inline void fputs_quoted_case(const char *data, FILE *out, int dir)
diff --git a/login-utils/last.c b/login-utils/last.c
index d3eeed4b6..1b45dbf24 100644
--- a/login-utils/last.c
+++ b/login-utils/last.c
@@ -392,7 +392,6 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t
 	char		final[512];
 	char		utline[sizeof(p->ut_line) + 1];
 	char		domain[256];
-	char		*s;
 	int		mins, hours, days;
 	int		r, len;
 	struct last_timefmt *fmt;
@@ -548,8 +547,7 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t
 	/*
 	 *	Print out "final" string safely.
 	 */
-	for (s = final; *s; s++)
-		fputc_careful(*s, stdout, '*');
+	fputs_careful(final, stdout, '*', false);
 
 	if (len < 0 || (size_t)len >= sizeof(final))
 		putchar('\n');
diff --git a/term-utils/write.c b/term-utils/write.c
index 8b86e9a9d..b485e28fd 100644
--- a/term-utils/write.c
+++ b/term-utils/write.c
@@ -223,21 +223,6 @@ static void signal_handler(int signo)
 	signal_received = signo;
 }
 
-/*
- * write_line - like fputs(), but makes control characters visible and
- *     turns \n into \r\n.
- */
-static void write_line(char *s)
-{
-	while (*s) {
-		const int c = *s++;
-
-		if ((c == '\n' && fputc_careful('\r', stdout, '^') == EOF)
-		    || fputc_careful(c, stdout, '^') == EOF)
-			err(EXIT_FAILURE, _("carefulputc failed"));
-	}
-}
-
 /*
  * do_write - actually make the connection
  */
@@ -247,7 +232,8 @@ static void do_write(const struct write_control *ctl)
 	struct passwd *pwd;
 	time_t now;
 	struct tm *tm;
-	char *host, line[512];
+	char *host, *line = NULL;
+	size_t linelen = 0;
 	struct sigaction sigact;
 
 	/* Determine our login name(s) before the we reopen() stdout */
@@ -286,11 +272,14 @@ static void do_write(const struct write_control *ctl)
 	free(host);
 	printf("\r\n");
 
-	while (fgets(line, sizeof(line), stdin) != NULL) {
+	while (getline(&line, &linelen, stdin) >= 0) {
 		if (signal_received)
 			break;
-		write_line(line);
+
+		if (fputs_careful(line, stdout, '^', true) == EOF)
+			err(EXIT_FAILURE, _("carefulputc failed"));
 	}
+	free(line);
 	printf("EOF\r\n");
 }
 
-- 
2.30.2

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux