Re: [PATCH] send-email: extract email-parsing code into a subroutine

Nathan PAYRE <second.payre@xxxxxxxxx> · Mon, 4 Dec 2017 00:41:04 +0100

I've tested your code, and after few changes it's works perfectly!
The code looks better now.
Thanks a lot for your review.

2017-12-03 23:00 GMT+01:00 Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>:
>
> On Sat, Dec 02 2017, Payre Nathan jotted:
>
>> From: Nathan Payre <second.payre@xxxxxxxxx>
>>
>> The existing code mixes parsing of email header with regular
>> expression and actual code. Extract the parsing code into a new
>> subroutine 'parse_header_line()'. This improves the code readability
>> and make parse_header_line reusable in other place.
>>
>> Signed-off-by: Nathan Payre <nathan.payre@xxxxxxxxxxxxxxxxx>
>> Signed-off-by: Matthieu Moy <matthieu.moy@xxxxxxxxxxxxx>
>> Signed-off-by: Timothee Albertin <timothee.albertin@xxxxxxxxxxxxxxxxx>
>> Signed-off-by: Daniel Bensoussan <daniel.bensoussan--bohm@xxxxxxxxxxxxxxxxx>
>> ---
>>
>> This patch is a first step to implement a new feature.
>> See new feature discussion here: https://public-inbox.org/git/20171030223444.5052-1-nathan.payre@xxxxxxxxxxxxxxxxx/
>>
>>  git-send-email.perl | 106 +++++++++++++++++++++++++++++++++++++++-------------
>>  1 file changed, 80 insertions(+), 26 deletions(-)
>>
>> diff --git a/git-send-email.perl b/git-send-email.perl
>> index 2208dcc21..98c2e461c 100755
>> --- a/git-send-email.perl
>> +++ b/git-send-email.perl
>> @@ -715,41 +715,64 @@ EOT3
>>       if (!defined $compose_encoding) {
>>               $compose_encoding = "UTF-8";
>>       }
>> -     while(<$c>) {
>> +
>> +     my %parsed_email;
>> +     while (<$c>) {
>>               next if m/^GIT:/;
>> -             if ($in_body) {
>> -                     $summary_empty = 0 unless (/^\n$/);
>> -             } elsif (/^\n$/) {
>> -                     $in_body = 1;
>> -                     if ($need_8bit_cte) {
>> +             parse_header_line($_, \%parsed_email);
>> +             if (/^\n$/i) {
>> +                     while (my $row = <$c>) {
>> +                             if (!($row =~ m/^GIT:/)) {
>> +                                     $parsed_email{'body'} = $parsed_email{'body'} . $row;
>> +                             }
>> +                     }
>> +             }
>> +     }
>> +     if ($parsed_email{'from'}) {
>> +             $sender = $parsed_email{'from'};
>> +     }
>> +     if ($parsed_email{'in_reply_to'}) {
>> +             $initial_reply_to = $parsed_email{'in_reply_to'};
>> +     }
>> +     if ($parsed_email{'subject'}) {
>> +             $initial_subject = $parsed_email{'subject'};
>> +             print $c2 "Subject: " .
>> +                     quote_subject($parsed_email{'subject'}, $compose_encoding) .
>> +                     "\n";
>> +     }
>> +     if ($parsed_email{'mime-version'}) {
>> +             $need_8bit_cte = 0;
>> +     }
>> +     if ($need_8bit_cte) {
>> +             if ($parsed_email{'content-type'}) {
>> +                             print $c2 "MIME-Version: 1.0\n",
>> +                                      "Content-Type: $parsed_email{'content-type'};",
>> +                                      "Content-Transfer-Encoding: 8bit\n";
>> +                     } else {
>>                               print $c2 "MIME-Version: 1.0\n",
>>                                        "Content-Type: text/plain; ",
>> -                                        "charset=$compose_encoding\n",
>> +                                      "charset=$compose_encoding\n",
>>                                        "Content-Transfer-Encoding: 8bit\n";
>>                       }
>> -             } elsif (/^MIME-Version:/i) {
>> -                     $need_8bit_cte = 0;
>> -             } elsif (/^Subject:\s*(.+)\s*$/i) {
>> -                     $initial_subject = $1;
>> -                     my $subject = $initial_subject;
>> -                     $_ = "Subject: " .
>> -                             quote_subject($subject, $compose_encoding) .
>> -                             "\n";
>> -             } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
>> -                     $initial_reply_to = $1;
>> -                     next;
>> -             } elsif (/^From:\s*(.+)\s*$/i) {
>> -                     $sender = $1;
>> -                     next;
>> -             } elsif (/^(?:To|Cc|Bcc):/i) {
>> -                     print __("To/Cc/Bcc fields are not interpreted yet, they have been ignored\n");
>> -                     next;
>> -             }
>> -             print $c2 $_;
>>       }
>> +     if ($parsed_email{'body'}) {
>> +             $summary_empty = 0;
>> +             print $c2 "\n$parsed_email{'body'}\n";
>> +     }
>> +
>>       close $c;
>>       close $c2;
>>
>> +     open $c2, "<", $compose_filename . ".final"
>> +             or die sprintf(__("Failed to open %s.final: %s"), $compose_filename, $!);
>> +
>> +     print "affichage : \n";
>> +     while (<$c2>) {
>> +             print $_;
>> +     }
>> +
>> +     close $c2;
>> +
>>       if ($summary_empty) {
>>               print __("Summary email is empty, skipping it\n");
>>               $compose = -1;
>> @@ -792,6 +815,37 @@ sub ask {
>>       return;
>>  }
>>
>> +sub parse_header_line {
>> +     my $lines = shift;
>> +     my $parsed_line = shift;
>> +
>> +     foreach (split(/\n/, $lines)) {
>> +             if (/^From:\s*(.+)$/i) {
>> +                     $parsed_line->{'from'} = $1;
>> +             } elsif (/^To:\s*(.+)$/i) {
>> +                     $parsed_line->{'to'} = [ parse_address_line($1) ];
>> +             } elsif (/^Cc:\s*(.+)$/i) {
>> +                     $parsed_line->{'cc'} = [ parse_address_line($1) ];
>> +             } elsif (/^Bcc:\s*(.+)$/i) {
>> +                     $parsed_line->{'bcc'} = [ parse_address_line($1) ];
>> +             } elsif (/^Subject:\s*(.+)\s*$/i) {
>> +                     $parsed_line->{'subject'} = $1;
>> +             } elsif (/^Date: (.*)/i) {
>> +                     $parsed_line->{'date'} = $1;
>> +             } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
>> +                     $parsed_line->{'in_reply_to'} = $1;
>> +             } elsif (/^Message-ID: (.*)$/i) {
>> +                     $parsed_line->{'message_id'} = $1;
>> +             } elsif (/^MIME-Version:$/i) {
>> +                     $parsed_line->{'mime-version'} = $1;
>> +             } elsif (/^Content-Type:\s+(.*)\s*$/i) {
>> +                     $parsed_line->{'content-type'} = $1;
>> +             } elsif (/^References:\s+(.*)/i) {
>> +                     $parsed_line->{'references'} = $1;
>> +             }
>> +     }
>> +}
>> +
>>  my %broken_encoding;
>>
>>  sub file_declares_8bit_cte {
>
> I haven't read the patches that follow. Completely untested, But just a
> diff on top I came up with while reading this:
>
> Rationale:
>
>  * Once you start passing $_ to functions you should probably just give
>    it a name.
>
>  * !($x =~ m//) you can just write as $x !~ m//
>
>  * There's a lot of copy/paste programming in parse_header_line() and an
>    inconsistency between you seeing A-Header and turning it into either
>    a_header or a-header. If you just stick with a-header and use dash
>    you end up with just two cases.
>
>    The resulting line is quite long, so it's worth doing:
>
>    my $header_parsed   = join "|", qw(To CC ...);
>    my $header_unparsed = join "|", qw(From Subject Message-ID ...);
>    [...]
>    if ($str =~ /^($header_unparsed)
>
>
> diff --git a/git-send-email.perl b/git-send-email.perl
> index 98c2e461cf..3696cad456 100755
> --- a/git-send-email.perl
> +++ b/git-send-email.perl
> @@ -717,12 +717,12 @@ EOT3
>         }
>
>         my %parsed_email;
> -       while (<$c>) {
> -               next if m/^GIT:/;
> -               parse_header_line($_, \%parsed_email);
> -               if (/^\n$/i) {
> +       while (my $line = <$c>) {
> +               next if $line =~ m/^GIT:/;
> +               parse_header_line($line, \%parsed_email);
> +               if ($line =~ /^\n$/i) {
>                         while (my $row = <$c>) {
> -                               if (!($row =~ m/^GIT:/)) {
> +                               if ($row !~ m/^GIT:/) {
>                                         $parsed_email{'body'} = $parsed_email{'body'} . $row;
>                                 }
>                         }
> @@ -731,7 +731,7 @@ EOT3
>         if ($parsed_email{'from'}) {
>                 $sender = $parsed_email{'from'};
>         }
> -       if ($parsed_email{'in_reply_to'}) {
> +       if ($parsed_email{'in-reply-to'}) {
>                 $initial_reply_to = $parsed_email{'in_reply_to'};
>         }
>         if ($parsed_email{'subject'}) {
> @@ -820,28 +820,10 @@ sub parse_header_line {
>         my $parsed_line = shift;
>
>         foreach (split(/\n/, $lines)) {
> -               if (/^From:\s*(.+)$/i) {
> -                       $parsed_line->{'from'} = $1;
> -               } elsif (/^To:\s*(.+)$/i) {
> -                       $parsed_line->{'to'} = [ parse_address_line($1) ];
> -               } elsif (/^Cc:\s*(.+)$/i) {
> -                       $parsed_line->{'cc'} = [ parse_address_line($1) ];
> -               } elsif (/^Bcc:\s*(.+)$/i) {
> -                       $parsed_line->{'bcc'} = [ parse_address_line($1) ];
> -               } elsif (/^Subject:\s*(.+)\s*$/i) {
> -                       $parsed_line->{'subject'} = $1;
> -               } elsif (/^Date: (.*)/i) {
> -                       $parsed_line->{'date'} = $1;
> -               } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
> -                       $parsed_line->{'in_reply_to'} = $1;
> -               } elsif (/^Message-ID: (.*)$/i) {
> -                       $parsed_line->{'message_id'} = $1;
> -               } elsif (/^MIME-Version:$/i) {
> -                       $parsed_line->{'mime-version'} = $1;
> -               } elsif (/^Content-Type:\s+(.*)\s*$/i) {
> -                       $parsed_line->{'content-type'} = $1;
> -               } elsif (/^References:\s+(.*)/i) {
> -                       $parsed_line->{'references'} = $1;
> +               if (/^(To|Cc|Bcc):\s*(.+)$/i) {
> +                       $parsed_line->{lc $1} = [ parse_address_line($2) ];
> +               } elsif (/^(From|Subject|Date|In-Reply-To|Message-ID|MIME-Version|Content-Type|References):\s*(.+)\s*$/i) {
> +                       $parsed_line->{lc $1} = $2;
>                 }
>         }
>  }