[PATCH] send-email: extract email-parsing code into a subroutine

Nathan Payre <nathan.payre@xxxxxxxxxxxxxxxxx> · Thu, 14 Dec 2017 12:12:43 +0100

The existing code mixes parsing of email header with regular
expression and actual code. Extract the parsing code into a new
subroutine "parse_header_line()". This improves the code readability
and make parse_header_line reusable in other place.

"parsed_header_line()" and "filter_body()" could be used for
refactoring the part of code which parses the header to prepare the
email and send it.

Signed-off-by: Nathan Payre <nathan.payre@xxxxxxxxxxxxxxxxx>
Signed-off-by: Matthieu Moy <matthieu.moy@xxxxxxxxxxxxx>
Signed-off-by: Timothee Albertin <timothee.albertin@xxxxxxxxxxxxxxxxx>
Signed-off-by: Daniel Bensoussan <daniel.bensoussan--bohm@xxxxxxxxxxxxxxxxx>
Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
Thanks-to: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
---

>> "PAYRE NATHAN p1508475" <nathan.payre@xxxxxxxxxxxxxxxxx> wrote:
>>> +     my %parsed_email;
>>> +     $parsed_email{'body'} = '';
>>> +     while (my $line = <$c>) {
>>> +             next if $line =~ m/^GIT:/;
>>> +             parse_header_line($line, \%parsed_email);
>>> +             if ($line =~ /^$/) {
>>> +                     $parsed_email{'body'} = filter_body($c);
>>>               }
>>> -             print $c2 $_;
>>
>> I didn't notice this at first, but you're modifying the behavior here:
>> the old code used to print to $c2 anything that didn't match any of
>> the if/else if branches.
>>
>> To keep this behavior, you need to keep all these extra headers in
>> $parsed_email (you do, in this version) and print them after taking
>> care of all the known headers (AFAICT, you don't).
>
> This case is not that easy to correct because:
> - It's could weigh the code.
> - The refactoring may not be legitimate anymore.
> 
> I've found two way to resolve this:
> .1) After every if($parsed_email{'key'}) remove the corresponding key
> and just before closing $c2 create a new loop which add all the
> remaining parts.
>
> .2) Making a mix between the old and new code. Some parts of
> my patch can improve the old code (like the removing of
> $need_8bit_cte) then it will be kept and the while loop will be
> similar the old code
>
> I think that the first version will look like better than the second
> one, easy to read, but it will change the order of the email header.

This is how I see the first choice of the two I've proposed in my last
email.

 git-send-email.perl | 116
 +++++++++++++++++++++++++++++++++++----------------- 1 file changed,
 78 insertions(+), 38 deletions(-)

diff --git a/git-send-email.perl b/git-send-email.perl
index 2208dcc21..f942fc2a5 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -703,57 +703,71 @@ EOT3
 		do_edit($compose_filename);
 	}
 
-	open my $c2, ">", $compose_filename . ".final"
-		or die sprintf(__("Failed to open %s.final: %s"), $compose_filename, $!);
-
 	open $c, "<", $compose_filename
 		or die sprintf(__("Failed to open %s: %s"), $compose_filename, $!);
 
-	my $need_8bit_cte = file_has_nonascii($compose_filename);
-	my $in_body = 0;
-	my $summary_empty = 1;
 	if (!defined $compose_encoding) {
 		$compose_encoding = "UTF-8";
 	}
-	while(<$c>) {
-		next if m/^GIT:/;
-		if ($in_body) {
-			$summary_empty = 0 unless (/^\n$/);
-		} elsif (/^\n$/) {
-			$in_body = 1;
-			if ($need_8bit_cte) {
-				print $c2 "MIME-Version: 1.0\n",
-					 "Content-Type: text/plain; ",
-					   "charset=$compose_encoding\n",
-					 "Content-Transfer-Encoding: 8bit\n";
-			}
-		} elsif (/^MIME-Version:/i) {
-			$need_8bit_cte = 0;
-		} elsif (/^Subject:\s*(.+)\s*$/i) {
-			$initial_subject = $1;
-			my $subject = $initial_subject;
-			$_ = "Subject: " .
-				quote_subject($subject, $compose_encoding) .
-				"\n";
-		} elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
-			$initial_reply_to = $1;
-			next;
-		} elsif (/^From:\s*(.+)\s*$/i) {
-			$sender = $1;
-			next;
-		} elsif (/^(?:To|Cc|Bcc):/i) {
-			print __("To/Cc/Bcc fields are not interpreted yet, they have been ignored\n");
-			next;
+
+	my %parsed_email;
+	while (my $line = <$c>) {
+		next if $line =~ m/^GIT:/;
+		parse_header_line($line, \%parsed_email);
+		if ($line =~ /^$/) {
+			$parsed_email{'body'} = filter_body($c);
 		}
-		print $c2 $_;
 	}
+
 	close $c;
-	close $c2;
 
-	if ($summary_empty) {
+	open my $c2, ">", $compose_filename . ".final"
+	or die sprintf(__("Failed to open %s.final: %s"), $compose_filename, $!);
+
+
+	if ($parsed_email{'From'}) {
+		$sender = delete($parsed_email{'From'});
+	}
+	if ($parsed_email{'In-Reply-To'}) {
+		$initial_reply_to = delete($parsed_email{'In-Reply-To'});
+	}
+	if ($parsed_email{'Subject'}) {
+		$initial_subject = delete($parsed_email{'Subject'});
+		print $c2 "Subject: " .
+			quote_subject($initial_subject, $compose_encoding) .
+			"\n";
+	}
+
+	if ($parsed_email{'MIME-Version'}) {
+		print $c2 "MIME-Version: $parsed_email{'MIME-Version'}\n",
+				"Content-Type: $parsed_email{'Content-Type'};\n",
+				"Content-Transfer-Encoding: $parsed_email{'Content-Transfer-Encoding'}\n";
+		delete($parsed_email{'MIME-Version'});
+		delete($parsed_email{'Content-Type'});
+		delete($parsed_email{'Content-Transfer-Encoding'});
+	} elsif (file_has_nonascii($compose_filename)) {
+		my $content_type = (delete($parsed_email{'Content-Type'}) or
+			"text/plain; charset=$compose_encoding");
+		print $c2 "MIME-Version: 1.0\n",
+			"Content-Type: $content_type\n",
+			"Content-Transfer-Encoding: 8bit\n";
+	}
+
+	foreach my $key (keys %parsed_email) {
+		next if $key == 'body';
+		print $c2 "$key: $parsed_email{$key}";
+	}
+
+	if ($parsed_email{'body'}) {
+		print $c2 "\n$parsed_email{'body'}\n";
+		delete($parsed_email{'body'});
+	} else {
 		print __("Summary email is empty, skipping it\n");
 		$compose = -1;
 	}
+
+	close $c2;
+
 } elsif ($annotate) {
 	do_edit(@files);
 }
@@ -792,6 +806,32 @@ sub ask {
 	return;
 }
 
+sub parse_header_line {
+	my $lines = shift;
+	my $parsed_line = shift;
+	my $pattern = join "|", qw(To Cc Bcc);
+	
+	foreach (split(/\n/, $lines)) {
+		if (/^($pattern):\s*(.+)$/i) {
+		        $parsed_line->{$1} = [ parse_address_line($2) ];
+		} elsif (/^([^:]*):\s*(.+)\s*$/i) {
+		        $parsed_line->{$1} = $2;
+		}
+	}
+}
+
+sub filter_body {
+	my $c = shift;
+	my $body = "";
+	while (my $body_line = <$c>) {
+		if ($body_line !~ m/^GIT:/) {
+			$body .= $body_line;
+		}
+	}
+	return $body;
+}
+
+
 my %broken_encoding;
 
 sub file_declares_8bit_cte {
-- 
2.15.1