The patch titled scripts/get_maintainer.pl: add --file-emails, find embedded email addresses has been added to the -mm tree. Its filename is scripts-get_maintainerpl-add-file-emails-find-embedded-email-addresses.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find out what to do about this The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/ ------------------------------------------------------ Subject: scripts/get_maintainer.pl: add --file-emails, find embedded email addresses From: Joe Perches <joe@xxxxxxxxxxx> Add an imperfect option to search a source file for email addresses. New option: --file-emails or --fe (default is disabled) email addresses in files are freeform text and are nearly impossible to parse. Still, might as well try to do a somewhat acceptable job of finding them. This code will find all addresses that are in the form (addr@xxxxxxxxxx) and <addr@xxxxxxxxxx>. Addresses without parentheses or angle brackets are ignored. Some variants that are shown correctly: John Smith <jksmith@xxxxxxxxxx> Random J. Developer <rjd@xxxxxxx> Random J. Developer (rjd@xxxxxxx) A variant that is shown correctly: Written by First Last (funny-addr@xxxxxxxxxxxxxxx) is shown as: First Last <funny-addr@xxxxxxxxxxxxxxx> Variants that are shown incorrectly: J. Random Developer <jrd@xxxxxxxxxx> Some Really Long Name <srln@xxxxxxx> are returned as: Random Developer <jrd@xxxxxxxxxx> Long Name <srln@xxxxxxx> Some variants that are ignored: Some Really Long Name srrln@xxxxxxx name@xxxxxxxxxx (Developer Name) The code assumes that the 2 words preceeding a found email address are names. If the 1st of the 2 words is a single letter and period, then another word is used, assuming (First, Initial, Last). --roles and --rolestats show "(in file)" for matches. For instance: Without -file-emails: $ ./scripts/get_maintainer.pl -f -nogit -roles net/core/netpoll.c David S. Miller <davem@xxxxxxxxxxxxx> (maintainer:NETWORKING [GENERAL]) linux-kernel@xxxxxxxxxxxxxxx (open list) With -fe: $ ./scripts/get_maintainer.pl -f -fe -nogit -roles net/core/netpoll.c David S. Miller <davem@xxxxxxxxxxxxx> (maintainer:NETWORKING [GENERAL]) Matt Mackall <mpm@xxxxxxxxxxx> (in file) Ingo Molnar <mingo@xxxxxxxxxx> (in file) linux-kernel@xxxxxxxxxxxxxxx (open list) netdev@xxxxxxxxxxxxxxx (open list:NETWORKING [GENERAL]) The number of email addresses in the file in not limited. Neither is the number of returned email addresses. Signed-off-by: Joe Perches <joe@xxxxxxxxxxx> Cc: Matt Mackall <mpm@xxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- scripts/get_maintainer.pl | 62 +++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff -puN scripts/get_maintainer.pl~scripts-get_maintainerpl-add-file-emails-find-embedded-email-addresses scripts/get_maintainer.pl --- a/scripts/get_maintainer.pl~scripts-get_maintainerpl-add-file-emails-find-embedded-email-addresses +++ a/scripts/get_maintainer.pl @@ -41,6 +41,7 @@ my $web = 0; my $subsystem = 0; my $status = 0; my $keywords = 1; +my $file_emails = 0; my $from_filename = 0; my $pattern_depth = 0; my $version = 0; @@ -120,6 +121,7 @@ if (!GetOptions( 'web!' => \$web, 'pattern-depth=i' => \$pattern_depth, 'k|keywords!' => \$keywords, + 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'v|version' => \$version, 'h|help' => \$help, @@ -232,6 +234,7 @@ if ($email_remove_duplicates) { my @files = (); my @range = (); my @keyword_tvi = (); +my @file_emails = (); foreach my $file (@ARGV) { ##if $file is a directory and it lacks a trailing slash, add one @@ -242,15 +245,21 @@ foreach my $file (@ARGV) { } if ($from_filename) { push(@files, $file); - if (-f $file && $keywords) { + if (-f $file && ($keywords || $file_emails)) { open(FILE, "<$file") or die "$P: Can't open ${file}\n"; my $text = do { local($/) ; <FILE> }; - foreach my $line (keys %keyword_hash) { - if ($text =~ m/$keyword_hash{$line}/x) { - push(@keyword_tvi, $line); + close(FILE); + if ($keywords) { + foreach my $line (keys %keyword_hash) { + if ($text =~ m/$keyword_hash{$line}/x) { + push(@keyword_tvi, $line); + } } } - close(FILE); + if ($file_emails) { + my @poss_addr = $text =~ m$[A-Za-z�-ÿ\"\' \,\.\+-]*\s*[\(\<][A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+[\)\>]$g; + push(@file_emails, clean_file_emails(@poss_addr)); + } } } else { my $file_cnt = @files; @@ -285,6 +294,8 @@ foreach my $file (@ARGV) { } } +@file_emails = uniq(@file_emails); + my @email_to = (); my @list_to = (); my @scm = (); @@ -377,6 +388,14 @@ if ($email) { } } } + + foreach my $email (@file_emails) { + my ($name, $address) = parse_email($email); + + my $tmp_email = format_email($name, $address, $email_usename); + push_email_address($tmp_email, ''); + add_role($tmp_email, 'in file'); + } } if ($email || $email_list) { @@ -453,6 +472,7 @@ MAINTAINER field selection options: --remove-duplicates => minimize duplicate email names/addresses --roles => show roles (status:subsystem, git-signer, list, etc...) --rolestats => show roles and statistics (commits/total_commits, %) + --file-emails => add email addresses found in -f file (default: 0 (off)) --scm => print SCM tree(s) if any --status => print status if any --subsystem => print subsystem name if any @@ -811,7 +831,9 @@ sub add_role { foreach my $entry (@email_to) { if ($email_remove_duplicates) { my ($entry_name, $entry_address) = parse_email($entry->[0]); - if ($name eq $entry_name || $address eq $entry_address) { + if (($name eq $entry_name || $address eq $entry_address) + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { if ($entry->[1] eq "") { $entry->[1] = "$role"; } else { @@ -819,7 +841,9 @@ sub add_role { } } } else { - if ($email eq $entry->[0]) { + if ($email eq $entry->[0] + && ($role eq "" || !($entry->[1] =~ m/$role/)) + ) { if ($entry->[1] eq "") { $entry->[1] = "$role"; } else { @@ -1099,6 +1123,30 @@ sub sort_and_uniq { return @parms; } +sub clean_file_emails { + my (@file_emails) = @_; + my @fmt_emails = (); + + foreach my $email (@file_emails) { + $email =~ s/\(/\</g; + $email =~ s/\)/\>/g; + my ($name, $address) = parse_email($email); + my @nw = split(/[^A-Za-z�-ÿ\"\'\,\.\+-]/, $name); + if (@nw > 2) { + if ((length($nw[@nw - 2]) == 2) && + substr($nw[@nw - 2], 1) eq ".") { + $name = "$nw[@nw - 3] $nw[@nw - 2] $nw[@nw - 1]"; + } else { + $name = "$nw[@nw - 2] $nw[@nw - 1]"; + } + } + my $fmt_email = format_email($name, $address, $email_usename); + push(@fmt_emails, $fmt_email); + } + return @fmt_emails; +} + + sub merge_email { my @lines; my %saw; _ Patches currently in -mm which might be from joe@xxxxxxxxxxx are origin.patch linux-next.patch drivers-scsi-correct-the-size-argument-to-kmalloc.patch scripts-get_maintainerpl-add-file-emails-find-embedded-email-addresses.patch scripts-checkpatchpl-add-warn-on-sizeof.patch checkpatchpl-allow-80-char-lines-for-logging-functions-not-just-printk.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html