[RFC/PATCH 1/3] mailinfo: refactor commit message processing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Within the processing of the commit message, check for a scissors line
or a patchbreak line first (before checking for in-body headers) so that
a subsequent patch modifying the processing of in-body headers would not
cause a scissors line or patchbreak line to be misidentified.

If a line could be both an in-body header and a scissors line (for
example, "From: -- >8 --"), this is considered a fatal error
(previously, it would be interpreted as an in-body header).  (It is not
possible for a line to be both an in-body header and a patchbreak line,
since both require different prefixes.)

The following enumeration shows that processing is the same except (as
described above) the in-body header + scissors line case.

o in-body header (check_header OK)
  o passes UTF-8 conversion
    o [described above] is scissors line
    o [not possible] is patchbreak line
    o [not possible] is blank line
    o is none of the above - processed as header
  o fails UTF-8 conversion - processed as header
o not in-body header
  o passes UTF-8 conversion
    o is scissors line - processed as scissors
    o is patchbreak line - processed as patchbreak
    o is blank line - ignored if in header_stage
    o is none of the above - log message
  o fails UTF-8 conversion - input error

As for the result left in "line" (after the invocation of
handle_commit_msg), it is unused (by its caller, handle_filter, and by
handle_filter's callers, handle_boundary and handle_body) unless this
line is a patchbreak line, in which case handle_patch is subsequently
called (in handle_filter) on "line". In this case, "line" must have
passed UTF-8 conversion both before and after this patch, so the result
is still the same overall.

Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
---
 mailinfo.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 115 insertions(+), 30 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index e19abe3..23a56c2 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -340,23 +340,56 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
 	return out;
 }
 
-static int convert_to_utf8(struct mailinfo *mi,
-			   struct strbuf *line, const char *charset)
+/*
+ * Attempts to convert line into UTF-8, storing the result in line.
+ *
+ * This differs from convert_to_utf8 in that conversion non-success is not
+ * considered an error case - mi->input_error is not set, and no error message
+ * is printed.
+ *
+ * If the conversion is unnecessary, returns 0 and stores NULL in old_buf (if
+ * old_buf is not NULL).
+ *
+ * If the conversion is successful, returns 0 and stores the unconverted string
+ * in old_buf and old_len (if they are respectively not NULL).
+ *
+ * If the conversion is unsuccessful, returns -1.
+ */
+static int try_convert_to_utf8(const struct mailinfo *mi, struct strbuf *line,
+			       const char *charset, char **old_buf,
+			       size_t *old_len)
 {
-	char *out;
+	char *utf8;
 
-	if (!mi->metainfo_charset || !charset || !*charset)
+	if (!mi->metainfo_charset || !charset || !*charset ||
+	    same_encoding(mi->metainfo_charset, charset)) {
+		if (old_buf)
+			*old_buf = NULL;
 		return 0;
+	}
 
-	if (same_encoding(mi->metainfo_charset, charset))
+	utf8 = reencode_string(line->buf, mi->metainfo_charset, charset);
+	if (utf8) {
+		char *temp = strbuf_detach(line, old_len);
+		if (old_buf)
+			*old_buf = temp;
+		strbuf_attach(line, utf8, strlen(utf8), strlen(utf8));
 		return 0;
-	out = reencode_string(line->buf, mi->metainfo_charset, charset);
-	if (!out) {
+	}
+	return -1;
+}
+
+/*
+ * Converts line into UTF-8, setting mi->input_error to -1 upon failure.
+ */
+static int convert_to_utf8(struct mailinfo *mi,
+			   struct strbuf *line, const char *charset)
+{
+	if (try_convert_to_utf8(mi, line, charset, NULL, NULL)) {
 		mi->input_error = -1;
 		return error("cannot convert from %s to %s",
 			     charset, mi->metainfo_charset);
 	}
-	strbuf_attach(line, out, strlen(out), strlen(out));
 	return 0;
 }
 
@@ -515,6 +548,13 @@ static int check_header(struct mailinfo *mi,
 	return ret;
 }
 
+static int check_header_raw(struct mailinfo *mi,
+			    char *buf, size_t len,
+			    struct strbuf *hdr_data[], int overwrite) {
+	const struct strbuf sb = {0, len, buf};
+	return check_header(mi, &sb, hdr_data, overwrite);
+}
+
 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
 {
 	struct strbuf *ret;
@@ -623,32 +663,48 @@ static int is_scissors_line(const struct strbuf *line)
 		gap * 2 < perforation);
 }
 
-static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+static int resembles_rfc2822_header(const struct strbuf *line)
 {
-	assert(!mi->filter_stage);
+	char *c;
 
-	if (mi->header_stage) {
-		if (!line->len || (line->len == 1 && line->buf[0] == '\n'))
+	if (!isalpha(line->buf[0]))
+		return 0;
+
+	for (c = line->buf + 1; *c != 0; c++) {
+		if (*c == ':')
+			return 1;
+		else if (*c != '-' && !isalpha(*c))
 			return 0;
 	}
+	return 0;
+}
 
-	if (mi->use_inbody_headers && mi->header_stage) {
-		mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0);
-		if (mi->header_stage)
-			return 0;
-	} else
-		/* Only trim the first (blank) line of the commit message
-		 * when ignoring in-body headers.
-		 */
-		mi->header_stage = 0;
+static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+{
+	int ret = 0;
+	int utf8_result;
+	char *old_buf;
+	size_t old_len;
+
+	assert(!mi->filter_stage);
 
-	/* normalize the log message to UTF-8. */
-	if (convert_to_utf8(mi, line, mi->charset.buf))
-		return 0; /* mi->input_error already set */
+	/*
+	 * Obtain UTF8 for scissors line and patchbreak checks, but retain the
+	 * undecoded line in case we need to process it as an in-body header.
+	 */
+	utf8_result = try_convert_to_utf8(mi, line, mi->charset.buf, &old_buf,
+					  &old_len);
 
-	if (mi->use_scissors && is_scissors_line(line)) {
+	if (!utf8_result && mi->use_scissors && is_scissors_line(line)) {
 		int i;
 
+		if (resembles_rfc2822_header(line))
+			/*
+			 * Explicitly reject scissor lines that resemble a RFC
+			 * 2822 header, to avoid being prone to error.
+			 */
+			die("scissors line resembles RFC 2822 header");
+
 		strbuf_setlen(&mi->log_message, 0);
 		mi->header_stage = 1;
 
@@ -661,18 +717,47 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
 				strbuf_release(mi->s_hdr_data[i]);
 			mi->s_hdr_data[i] = NULL;
 		}
-		return 0;
+		goto handle_commit_msg_out;
 	}
-
-	if (patchbreak(line)) {
+	if (!utf8_result && patchbreak(line)) {
 		if (mi->message_id)
 			strbuf_addf(&mi->log_message,
 				    "Message-Id: %s\n", mi->message_id);
-		return 1;
+		ret = 1;
+		goto handle_commit_msg_out;
 	}
 
+	if (mi->header_stage) {
+		char *buf = old_buf ? old_buf : line->buf;
+		if (buf[0] == 0 || (buf[0] == '\n' && buf[1] == 0))
+			goto handle_commit_msg_out;
+	}
+
+	if (mi->use_inbody_headers && mi->header_stage) {
+		char *buf = old_buf ? old_buf : line->buf;
+		size_t len = old_buf ? old_len : line->len;
+		mi->header_stage = check_header_raw(mi, buf, len,
+						    mi->s_hdr_data, 0);
+		if (mi->header_stage)
+			goto handle_commit_msg_out;
+	} else
+		/* Only trim the first (blank) line of the commit message
+		 * when ignoring in-body headers.
+		 */
+		mi->header_stage = 0;
+
+	/* If adding as a log message, conversion to UTF-8 is required. */
+	if (utf8_result) {
+		mi->input_error = -1;
+		error("cannot convert from %s to %s",
+		      mi->charset.buf, mi->metainfo_charset);
+		goto handle_commit_msg_out;
+	}
 	strbuf_addbuf(&mi->log_message, line);
-	return 0;
+
+handle_commit_msg_out:
+	free(old_buf);
+	return ret;
 }
 
 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
-- 
2.10.0.rc2.20.g5b18e70




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]