git-send-email passes on an 8bit mail as-is even if it does not declare a content-type. Because the user can edit email between format-patch and send-email, such invalid mails are unfortunately not very hard to come by. Make git-send-email stop and ask about the encoding to use if it encounters any such mail. Also provide a configuration setting to permanently configure an encoding. Signed-off-by: Thomas Rast <trast@xxxxxxxxxxxxxxx> --- Junio C Hamano wrote: > > * tr/send-email-8bit (2010-06-12) 1 commit > > If I am not misreading the patch, it does not seem to stop the scanning of > the "C-T-E:" header at the end of header; it should. You're right. I fixed that in this version. [I'm leaving for holidays tomorrow, so if there are more problems with this patch (who knows) you'll have to hold it in pu for a while.] Documentation/git-send-email.txt | 9 ++++ git-send-email.perl | 60 +++++++++++++++++++++++++++++ t/t9001-send-email.sh | 77 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 0 deletions(-) diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 3dfdc7c..e70d9bf 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -101,6 +101,15 @@ See the CONFIGURATION section for 'sendemail.multiedit'. + The --to option must be repeated for each user you want on the to list. +--8bit-encoding=<encoding>:: + When encountering a non-ASCII message or subject that does not + declare its encoding, add headers/quoting to indicate it is + encoded in <encoding>. Default is the value of the + 'sendemail.assume8bitEncoding'; if that is unspecified, this + will be prompted for if any non-ASCII files are encountered. ++ +Note that no attempts whatsoever are made to validate the encoding. + Sending ~~~~~~~ diff --git a/git-send-email.perl b/git-send-email.perl index ce569a9..0db39b0 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -54,6 +54,7 @@ git send-email [options] <file | directory | rev-list options > --in-reply-to <str> * Email "In-Reply-To:" --annotate * Review each patch that will be sent in an editor. --compose * Open an editor for introduction. + --8bit-encoding <str> * Encoding to assume 8bit mails if undeclared Sending: --envelope-sender <str> * Email envelope sender. @@ -193,6 +194,7 @@ my ($smtp_server, $smtp_server_port, $smtp_authuser, $smtp_encryption); my ($identity, $aliasfiletype, @alias_files, @smtp_host_parts); my ($validate, $confirm); my (@suppress_cc); +my ($auto_8bit_encoding); my ($debug_net_smtp) = 0; # Net::SMTP, see send_message() @@ -223,6 +225,7 @@ my %config_settings = ( "multiedit" => \$multiedit, "confirm" => \$confirm, "from" => \$sender, + "assume8bitencoding" => \$auto_8bit_encoding, ); # Help users prepare for 1.7.0 @@ -298,6 +301,7 @@ my $rc = GetOptions("sender|from=s" => \$sender, "thread!" => \$thread, "validate!" => \$validate, "format-patch!" => \$format_patch, + "8bit-encoding=s" => \$auto_8bit_encoding, ); unless ($rc) { @@ -670,6 +674,35 @@ sub ask { return undef; } +my %broken_encoding; + +sub file_declares_8bit_cte($) { + my $fn = shift; + open (my $fh, '<', $fn); + while (my $line = <$fh>) { + last if ($line =~ /^$/); + return 1 if ($line =~ /^Content-Transfer-Encoding: .*8bit.*$/); + } + close $fh; + return 0; +} + +foreach my $f (@files) { + next unless (body_or_subject_has_nonascii($f) + && !file_declares_8bit_cte($f)); + $broken_encoding{$f} = 1; +} + +if (!defined $auto_8bit_encoding && scalar %broken_encoding) { + print "The following files are 8bit, but do not declare " . + "a Content-Transfer-Encoding.\n"; + foreach my $f (sort keys %broken_encoding) { + print " $f\n"; + } + $auto_8bit_encoding = ask("Which 8bit encoding should I declare [UTF-8]? ", + default => "UTF-8"); +} + my $prompting = 0; if (!defined $sender) { $sender = $repoauthor || $repocommitter || ''; @@ -1225,6 +1258,18 @@ foreach my $t (@files) { or die "(cc-cmd) failed to close pipe to '$cc_cmd'"; } + if ($broken_encoding{$t} && !$has_content_type) { + $has_content_type = 1; + push @xh, "MIME-Version: 1.0", + "Content-Type: text/plain; charset=$auto_8bit_encoding", + "Content-Transfer-Encoding: 8bit"; + $body_encoding = $auto_8bit_encoding; + } + + if ($broken_encoding{$t} && !is_rfc2047_quoted($subject)) { + $subject = quote_rfc2047($subject, $auto_8bit_encoding); + } + if (defined $author and $author ne $sender) { $message = "From: $author\n\n$message"; if (defined $author_encoding) { @@ -1237,6 +1282,7 @@ foreach my $t (@files) { } } else { + $has_content_type = 1; push @xh, 'MIME-Version: 1.0', "Content-Type: text/plain; charset=$author_encoding", @@ -1314,3 +1360,17 @@ sub file_has_nonascii { } return 0; } + +sub body_or_subject_has_nonascii { + my $fn = shift; + open(my $fh, '<', $fn) + or die "unable to open $fn: $!\n"; + while (my $line = <$fh>) { + last if $line =~ /^$/; + return 1 if $line =~ /^Subject.*[^[:ascii:]]/; + } + while (my $line = <$fh>) { + return 1 if $line =~ /[^[:ascii:]]/; + } + return 0; +} diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 640b3d2..0b8a591 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -918,4 +918,81 @@ test_expect_success '--no-bcc overrides sendemail.bcc' ' ! grep "RCPT TO:<other@xxxxxx>" stdout ' +cat >email-using-8bit <<EOF +From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 +Message-Id: <bogus-message-id@xxxxxxxxxxx> +From: author@xxxxxxxxxxx +Date: Sat, 12 Jun 2010 15:53:58 +0200 +Subject: subject goes here + +Dieser deutsche Text enthält einen Umlaut! +EOF + +cat >content-type-decl <<EOF +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +EOF + +test_expect_success 'asks about and fixes 8bit encodings' ' + clean_fake_sendmail && + echo | + git send-email --from=author@xxxxxxxxxxx --to=nobody@xxxxxxxxxxx \ + --smtp-server="$(pwd)/fake.sendmail" \ + email-using-8bit >stdout && + grep "do not declare a Content-Transfer-Encoding" stdout && + grep email-using-8bit stdout && + grep "Which 8bit encoding" stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +test_expect_success 'sendemail.8bitEncoding works' ' + clean_fake_sendmail && + git config sendemail.assume8bitEncoding UTF-8 && + echo bogus | + git send-email --from=author@xxxxxxxxxxx --to=nobody@xxxxxxxxxxx \ + --smtp-server="$(pwd)/fake.sendmail" \ + email-using-8bit >stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +test_expect_success '--8bit-encoding overrides sendemail.8bitEncoding' ' + clean_fake_sendmail && + git config sendemail.assume8bitEncoding "bogus too" && + echo bogus | + git send-email --from=author@xxxxxxxxxxx --to=nobody@xxxxxxxxxxx \ + --smtp-server="$(pwd)/fake.sendmail" \ + --8bit-encoding=UTF-8 \ + email-using-8bit >stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +cat >email-using-8bit <<EOF +From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 +Message-Id: <bogus-message-id@xxxxxxxxxxx> +From: author@xxxxxxxxxxx +Date: Sat, 12 Jun 2010 15:53:58 +0200 +Subject: Dieser Betreff enthält auch einen Umlaut! + +Nothing to see here. +EOF + +cat >expected <<EOF +Subject: =?UTF-8?q?Dieser=20Betreff=20enth=C3=A4lt=20auch=20einen=20Umlaut!?= +EOF + +test_expect_success '--8bit-encoding also treats subject' ' + clean_fake_sendmail && + echo bogus | + git send-email --from=author@xxxxxxxxxxx --to=nobody@xxxxxxxxxxx \ + --smtp-server="$(pwd)/fake.sendmail" \ + --8bit-encoding=UTF-8 \ + email-using-8bit >stdout && + grep "Subject" msgtxt1 >actual && + test_cmp expected actual +' + test_done -- 1.7.1.635.g9a994 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html