Within the processing of the commit message, check for a scissors line or a patchbreak line first (before checking for in-body headers) so that a subsequent patch modifying the processing of in-body headers would not cause a scissors line or patchbreak line to be misidentified. If a line could be both an in-body header and a scissors line (for example, "From: -- >8 --"), this is considered a fatal error (previously, it would be interpreted as an in-body header). (It is not possible for a line to be both an in-body header and a patchbreak line, since both require different prefixes.) The following enumeration shows that processing is the same except (as described above) the in-body header + scissors line case. o in-body header (check_header OK) o passes UTF-8 conversion o [described above] is scissors line o [not possible] is patchbreak line o [not possible] is blank line o is none of the above - processed as header o fails UTF-8 conversion - processed as header o not in-body header o passes UTF-8 conversion o is scissors line - processed as scissors o is patchbreak line - processed as patchbreak o is blank line - ignored if in header_stage o is none of the above - log message o fails UTF-8 conversion - input error As for the result left in "line" (after the invocation of handle_commit_msg), it is unused (by its caller, handle_filter, and by handle_filter's callers, handle_boundary and handle_body) unless this line is a patchbreak line, in which case handle_patch is subsequently called (in handle_filter) on "line". In this case, "line" must have passed UTF-8 conversion both before and after this patch, so the result is still the same overall. Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> --- mailinfo.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 30 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index e19abe3..23a56c2 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -340,23 +340,56 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg) return out; } -static int convert_to_utf8(struct mailinfo *mi, - struct strbuf *line, const char *charset) +/* + * Attempts to convert line into UTF-8, storing the result in line. + * + * This differs from convert_to_utf8 in that conversion non-success is not + * considered an error case - mi->input_error is not set, and no error message + * is printed. + * + * If the conversion is unnecessary, returns 0 and stores NULL in old_buf (if + * old_buf is not NULL). + * + * If the conversion is successful, returns 0 and stores the unconverted string + * in old_buf and old_len (if they are respectively not NULL). + * + * If the conversion is unsuccessful, returns -1. + */ +static int try_convert_to_utf8(const struct mailinfo *mi, struct strbuf *line, + const char *charset, char **old_buf, + size_t *old_len) { - char *out; + char *utf8; - if (!mi->metainfo_charset || !charset || !*charset) + if (!mi->metainfo_charset || !charset || !*charset || + same_encoding(mi->metainfo_charset, charset)) { + if (old_buf) + *old_buf = NULL; return 0; + } - if (same_encoding(mi->metainfo_charset, charset)) + utf8 = reencode_string(line->buf, mi->metainfo_charset, charset); + if (utf8) { + char *temp = strbuf_detach(line, old_len); + if (old_buf) + *old_buf = temp; + strbuf_attach(line, utf8, strlen(utf8), strlen(utf8)); return 0; - out = reencode_string(line->buf, mi->metainfo_charset, charset); - if (!out) { + } + return -1; +} + +/* + * Converts line into UTF-8, setting mi->input_error to -1 upon failure. + */ +static int convert_to_utf8(struct mailinfo *mi, + struct strbuf *line, const char *charset) +{ + if (try_convert_to_utf8(mi, line, charset, NULL, NULL)) { mi->input_error = -1; return error("cannot convert from %s to %s", charset, mi->metainfo_charset); } - strbuf_attach(line, out, strlen(out), strlen(out)); return 0; } @@ -515,6 +548,13 @@ static int check_header(struct mailinfo *mi, return ret; } +static int check_header_raw(struct mailinfo *mi, + char *buf, size_t len, + struct strbuf *hdr_data[], int overwrite) { + const struct strbuf sb = {0, len, buf}; + return check_header(mi, &sb, hdr_data, overwrite); +} + static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line) { struct strbuf *ret; @@ -623,32 +663,48 @@ static int is_scissors_line(const struct strbuf *line) gap * 2 < perforation); } -static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) +static int resembles_rfc2822_header(const struct strbuf *line) { - assert(!mi->filter_stage); + char *c; - if (mi->header_stage) { - if (!line->len || (line->len == 1 && line->buf[0] == '\n')) + if (!isalpha(line->buf[0])) + return 0; + + for (c = line->buf + 1; *c != 0; c++) { + if (*c == ':') + return 1; + else if (*c != '-' && !isalpha(*c)) return 0; } + return 0; +} - if (mi->use_inbody_headers && mi->header_stage) { - mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0); - if (mi->header_stage) - return 0; - } else - /* Only trim the first (blank) line of the commit message - * when ignoring in-body headers. - */ - mi->header_stage = 0; +static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) +{ + int ret = 0; + int utf8_result; + char *old_buf; + size_t old_len; + + assert(!mi->filter_stage); - /* normalize the log message to UTF-8. */ - if (convert_to_utf8(mi, line, mi->charset.buf)) - return 0; /* mi->input_error already set */ + /* + * Obtain UTF8 for scissors line and patchbreak checks, but retain the + * undecoded line in case we need to process it as an in-body header. + */ + utf8_result = try_convert_to_utf8(mi, line, mi->charset.buf, &old_buf, + &old_len); - if (mi->use_scissors && is_scissors_line(line)) { + if (!utf8_result && mi->use_scissors && is_scissors_line(line)) { int i; + if (resembles_rfc2822_header(line)) + /* + * Explicitly reject scissor lines that resemble a RFC + * 2822 header, to avoid being prone to error. + */ + die("scissors line resembles RFC 2822 header"); + strbuf_setlen(&mi->log_message, 0); mi->header_stage = 1; @@ -661,18 +717,47 @@ static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line) strbuf_release(mi->s_hdr_data[i]); mi->s_hdr_data[i] = NULL; } - return 0; + goto handle_commit_msg_out; } - - if (patchbreak(line)) { + if (!utf8_result && patchbreak(line)) { if (mi->message_id) strbuf_addf(&mi->log_message, "Message-Id: %s\n", mi->message_id); - return 1; + ret = 1; + goto handle_commit_msg_out; } + if (mi->header_stage) { + char *buf = old_buf ? old_buf : line->buf; + if (buf[0] == 0 || (buf[0] == '\n' && buf[1] == 0)) + goto handle_commit_msg_out; + } + + if (mi->use_inbody_headers && mi->header_stage) { + char *buf = old_buf ? old_buf : line->buf; + size_t len = old_buf ? old_len : line->len; + mi->header_stage = check_header_raw(mi, buf, len, + mi->s_hdr_data, 0); + if (mi->header_stage) + goto handle_commit_msg_out; + } else + /* Only trim the first (blank) line of the commit message + * when ignoring in-body headers. + */ + mi->header_stage = 0; + + /* If adding as a log message, conversion to UTF-8 is required. */ + if (utf8_result) { + mi->input_error = -1; + error("cannot convert from %s to %s", + mi->charset.buf, mi->metainfo_charset); + goto handle_commit_msg_out; + } strbuf_addbuf(&mi->log_message, line); - return 0; + +handle_commit_msg_out: + free(old_buf); + return ret; } static void handle_patch(struct mailinfo *mi, const struct strbuf *line) -- 2.10.0.rc2.20.g5b18e70