From: Jan H. Schönherr <schnhrr@xxxxxxxxxxxxxxx> According to RFC 2047 and RFC 822, rfc2047 encoded words and and rfc822 quoted strings do not mix. Be more strict about rfc2047 encoded words in addresses, so that it is a bit more conform to RFC 2047. (Especially, my own name gets correctly decoded as Jan H. Schönherr (without quotes) and not as "Jan H. Schönherr" (with quotes).) Signed-off-by: Jan H. Schönherr <schnhrr@xxxxxxxxxxxxxxx> --- pretty.c | 80 ++++++++++++++++++++++++++++++++++++++----------- t/t4014-format-patch.sh | 11 +++++-- 2 Dateien geändert, 71 Zeilen hinzugefügt(+), 20 Zeilen entfernt(-) diff --git a/pretty.c b/pretty.c index ee76219..f3a7383 100644 --- a/pretty.c +++ b/pretty.c @@ -231,7 +231,7 @@ static int is_rfc822_special(char ch) } } -static int has_rfc822_specials(const char *s, int len) +static int needs_rfc822_quoting(const char *s, int len) { int i; for (i = 0; i < len; i++) @@ -272,7 +272,12 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) strbuf_addch(out, '"'); } -static int is_rfc2047_special(char ch) +enum rfc2047_type { + RFC2047_SUBJECT, + RFC2047_ADDRESS, +}; + +static int is_rfc2047_special(char ch, enum rfc2047_type type) { /* * We encode ' ' using '=20' even though rfc2047 @@ -283,33 +288,62 @@ static int is_rfc2047_special(char ch) if (ch == ' ' || ch == '\n') return 1; - return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); + if (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')) + return 1; + + if (type != RFC2047_ADDRESS) + return 0; + + /* + * rfc2047, section 5.3: + * + * As a replacement for a 'word' entity within a 'phrase', for example, + * one that precedes an address in a From, To, or Cc header. The ABNF + * definition for 'phrase' from RFC 822 thus becomes: + * + * phrase = 1*( encoded-word / word ) + * + * In this case the set of characters that may be used in a "Q"-encoded + * 'encoded-word' is restricted to: <upper and lower case ASCII + * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_" + * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a + * 'phrase' MUST be separated from any adjacent 'word', 'text' or + * 'special' by 'linear-white-space'. + */ + + /* '=' and '_' are special cases and have been checked above */ + return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } -static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding) +static int needs_rfc2047_encoding(const char *line, int len, + enum rfc2047_type type) { - static const int max_length = 76; /* per rfc2047 */ int i; - int line_len = last_line_length(sb); for (i = 0; i < len; i++) { int ch = line[i]; if (non_ascii(ch) || ch == '\n') - goto needquote; + return 1; if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) - goto needquote; + return 1; } - strbuf_add_wrapped_bytes(sb, line, len, -line_len, 1, 78+1); - return; -needquote: + return 0; +} + +static void add_rfc2047(struct strbuf *sb, const char *line, int len, + const char *encoding, enum rfc2047_type type) +{ + static const int max_length = 76; /* per rfc2047 */ + int i; + int line_len = last_line_length(sb); + strbuf_grow(sb, len * 3 + strlen(encoding) + 100); strbuf_addf(sb, "=?%s?q?", encoding); line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; - int is_special = is_rfc2047_special(ch); + int is_special = is_rfc2047_special(ch, type); if (line_len + 2 + (is_special ? 3 : 1) > max_length) { strbuf_addf(sb, "?=\n =?%s?q?", encoding); @@ -355,13 +389,18 @@ void pp_user_info(const struct pretty_print_context *pp, name_tail--; display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); - if (!has_rfc822_specials(line, display_name_length)) { - add_rfc2047(sb, line, display_name_length, encoding); - } else { + if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) { + add_rfc2047(sb, line, display_name_length, + encoding, RFC2047_ADDRESS); + } else if (needs_rfc822_quoting(line, display_name_length)) { struct strbuf quoted = STRBUF_INIT; add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, encoding); + strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len, + -6, 1, 78+1); strbuf_release("ed); + } else { + strbuf_add_wrapped_bytes(sb, line, display_name_length, + -6, 1, 78+1); } if (namelen - display_name_length + last_line_length(sb) > 78) { strbuf_addch(sb, '\n'); @@ -1292,7 +1331,12 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding); + if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT)) + add_rfc2047(sb, title.buf, title.len, encoding, + RFC2047_SUBJECT); + else + strbuf_add_wrapped_bytes(sb, title.buf, title.len, + -last_line_length(sb), 1, 78+1); } else { strbuf_addbuf(sb, &title); } diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index 1d5636d..1a3b6e8 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -830,9 +830,16 @@ test_expect_success 'format-patch quotes double-quote in headers' ' ' cat >expect <<'EOF' -From: =?UTF-8?q?"F=C3=B6o=20B.=20Bar"?= <author@xxxxxxxxxxx> +From: =?UTF-8?q?F=C3=B6o=20Bar?= <author@xxxxxxxxxxx> EOF -test_expect_success 'rfc2047-encoded headers also double-quote 822 specials' ' +test_expect_success 'format-patch uses rfc2047-encoded headers when necessary' ' + check_author "Föo Bar" +' + +cat >expect <<'EOF' +From: =?UTF-8?q?F=C3=B6o=20B=2E=20Bar?= <author@xxxxxxxxxxx> +EOF +test_expect_success 'rfc2047-encoded headers leave no rfc822 specials' ' check_author "Föo B. Bar" ' -- 1.7.12 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html