[PATCH] mailsplit and mailinfo: gracefully handle NUL characters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The function fgets() has a big problem with NUL characters: it reads
them, but nobody will know if the NUL comes from the file stream, or
was appended at the end of the line.

So implement a custom read_line() function.

Noticed by Tommy Thorn.

Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx>
---

	Sorry for the binary patch: the file t5100/nul contains NUL
	characters, obviously.

	BTW I do not know how much fgetc() instead of fgets() slows
	down things, but I expect both to be equally fast because
	they are both buffered, right?

 builtin-mailinfo.c  |   24 +++++++++++++-----------
 builtin-mailsplit.c |   27 +++++++++++++++++++++++----
 builtin.h           |    1 +
 t/t5100-mailinfo.sh |    9 +++++++++
 t/t5100/nul         |  Bin 0 -> 91 bytes
 5 files changed, 46 insertions(+), 15 deletions(-)
 create mode 100644 t/t5100/nul

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index 11f154b..f0c4209 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -641,7 +641,7 @@ static void decode_transfer_encoding(char *line, unsigned linesize)
 	}
 }
 
-static int handle_filter(char *line, unsigned linesize);
+static int handle_filter(char *line, unsigned linesize, int linelen);
 
 static int find_boundary(void)
 {
@@ -669,7 +669,7 @@ again:
 					"can't recover\n");
 			exit(1);
 		}
-		handle_filter(newline, sizeof(newline));
+		handle_filter(newline, sizeof(newline), strlen(newline));
 
 		/* skip to the next boundary */
 		if (!find_boundary())
@@ -759,14 +759,14 @@ static int handle_commit_msg(char *line, unsigned linesize)
 	return 0;
 }
 
-static int handle_patch(char *line)
+static int handle_patch(char *line, int len)
 {
-	fputs(line, patchfile);
+	fwrite(line, 1, len, patchfile);
 	patch_lines++;
 	return 0;
 }
 
-static int handle_filter(char *line, unsigned linesize)
+static int handle_filter(char *line, unsigned linesize, int linelen)
 {
 	static int filter = 0;
 
@@ -779,7 +779,7 @@ static int handle_filter(char *line, unsigned linesize)
 			break;
 		filter++;
 	case 1:
-		if (!handle_patch(line))
+		if (!handle_patch(line, linelen))
 			break;
 		filter++;
 	default:
@@ -794,6 +794,7 @@ static void handle_body(void)
 	int rc = 0;
 	static char newline[2000];
 	static char *np = newline;
+	int len = strlen(line);
 
 	/* Skip up to the first boundary */
 	if (content_top->boundary) {
@@ -807,7 +808,8 @@ static void handle_body(void)
 			/* flush any leftover */
 			if ((transfer_encoding == TE_BASE64)  &&
 			    (np != newline)) {
-				handle_filter(newline, sizeof(newline));
+				handle_filter(newline, sizeof(newline),
+						strlen(newline));
 			}
 			if (!handle_boundary())
 				return;
@@ -824,7 +826,7 @@ static void handle_body(void)
 
 			/* binary data most likely doesn't have newlines */
 			if (message_type != TYPE_TEXT) {
-				rc = handle_filter(line, sizeof(newline));
+				rc = handle_filter(line, sizeof(line), len);
 				break;
 			}
 
@@ -841,7 +843,7 @@ static void handle_body(void)
 					/* should be sitting on a new line */
 					*(++np) = 0;
 					op++;
-					rc = handle_filter(newline, sizeof(newline));
+					rc = handle_filter(newline, sizeof(newline), np - newline);
 					np = newline;
 				}
 			} while (*op != 0);
@@ -851,12 +853,12 @@ static void handle_body(void)
 			break;
 		}
 		default:
-			rc = handle_filter(line, sizeof(newline));
+			rc = handle_filter(line, sizeof(line), len);
 		}
 		if (rc)
 			/* nothing left to filter */
 			break;
-	} while (fgets(line, sizeof(line), fin));
+	} while ((len = read_line_with_nul(line, sizeof(line), fin)));
 
 	return;
 }
diff --git a/builtin-mailsplit.c b/builtin-mailsplit.c
index 46b27cd..021dc16 100644
--- a/builtin-mailsplit.c
+++ b/builtin-mailsplit.c
@@ -45,6 +45,25 @@ static int is_from_line(const char *line, int len)
 /* Could be as small as 64, enough to hold a Unix "From " line. */
 static char buf[4096];
 
+/* We cannot use fgets() because our lines can contain NULs */
+int read_line_with_nul(char *buf, int size, FILE *in)
+{
+	int len = 0, c;
+
+	for (;;) {
+		c = fgetc(in);
+		buf[len++] = c;
+		if (c == EOF || c == '\n' || len + 1 >= size)
+			break;
+	}
+
+	if (c == EOF)
+		len--;
+	buf[len] = '\0';
+
+	return len;
+}
+
 /* Called with the first line (potentially partial)
  * already in buf[] -- normally that should begin with
  * the Unix "From " line.  Write it into the specified
@@ -70,19 +89,19 @@ static int split_one(FILE *mbox, const char *name, int allow_bare)
 	 * "From " and having something that looks like a date format.
 	 */
 	for (;;) {
-		int is_partial = (buf[len-1] != '\n');
+		int is_partial = len && buf[len-1] != '\n';
 
-		if (fputs(buf, output) == EOF)
+		if (fwrite(buf, 1, len, output) != len)
 			die("cannot write output");
 
-		if (fgets(buf, sizeof(buf), mbox) == NULL) {
+		len = read_line_with_nul(buf, sizeof(buf), mbox);
+		if (len == 0) {
 			if (feof(mbox)) {
 				status = 1;
 				break;
 			}
 			die("cannot read mbox");
 		}
-		len = strlen(buf);
 		if (!is_partial && !is_bare && is_from_line(buf, len))
 			break; /* done with one message */
 	}
diff --git a/builtin.h b/builtin.h
index c630d5b..d0a0ead 100644
--- a/builtin.h
+++ b/builtin.h
@@ -9,6 +9,7 @@ extern const char git_usage_string[];
 extern void list_common_cmds_help(void);
 extern void help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index d6c55c1..5a4610b 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -25,4 +25,13 @@ do
 		diff ../t5100/info$mail info$mail"
 done
 
+test_expect_success 'respect NULs' '
+
+	git mailsplit -d3 -o. ../t5100/nul &&
+	cmp ../t5100/nul 001 &&
+	(cat 001 | git mailinfo msg patch) &&
+	test 4 = $(wc -l < patch)
+
+'
+
 test_done
diff --git a/t/t5100/nul b/t/t5100/nul
new file mode 100644
index 0000000000000000000000000000000000000000..3d40691787b855cc0133514a19052492eb853d21
GIT binary patch
literal 91
zcmW;6y$ygM5C%|6a#MT@Tm%~v2e7kZ0t`Q)fHOej_C}MJcXX*}a!Gh_N`s3x>;_}@
mA68>55i?ULDS<hc3BM!}T;HU$lNvE*_bo@vIHuA{lcE=x<rtIz

literal 0
HcmV?d00001

-- 
1.5.5.1.425.g5f464.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux