Here's an updated version of this script if anyone is interested. It can now do the git-blame calls in parallel. Use -t #threads. Here's the usage info: ' -c print count of all lines in all files at the end', ' -f produce file centric output (overrides -l and -s)', ' -l produce longer format', ' -s produce short format, line count and author only', ' -ls Both -l and -s produce an even longer long format', ' -t n set number of threads to use', ' -x re exclude files matching regex', ' -v be more verbose', Enjoy. -Brandon --- git_blame_stats.perl | 387 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 387 insertions(+), 0 deletions(-) create mode 100755 git_blame_stats.perl diff --git a/git_blame_stats.perl b/git_blame_stats.perl new file mode 100755 index 0000000..618cf0a --- /dev/null +++ b/git_blame_stats.perl @@ -0,0 +1,387 @@ +#!/usr/bin/perl -w + +use lib (split(/:/, $ENV{GITPERLLIB} || '/path/to/git/lib/perl5/site_perl/5.10.0')); + +use strict; +use threads; +use Thread::Queue; +use Getopt::Std; +use Git; + +my @LSTREE_OPTS = ('-r', '--name-only'); +my @BLAME_OPTS = ('-C', '-C', '-w', '--incremental'); + +$Getopt::Std::STANDARD_HELP_VERSION = 1; +$main::VERSION = 1.0; + +sub usage { + my $name; + + eval { + require File::Basename; + $name = File::Basename::basename($0); + } or do { + $name = substr $0, rindex($0, '/') + 1; + }; + + print 'Usage: ', $name, ' [--help] [-cflstvx] <rev> [paths...]', "\n"; +} + +sub main::HELP_MESSAGE { + my $fh = shift; + + eval {select $fh; usage}; + + local $\ = "\n"; + local $, = "\n"; + + print $fh '', + 'Generate authorship statistics from a git repository.', + '', + 'OPTIONS', + ' -c print count of all lines in all files at the end', + ' -f produce file centric output (overrides -l and -s)', + ' -l produce longer format', + ' -s produce short format, line count and author only', + ' -ls Both -l and -s produce an even longer long format', + ' -t n set number of threads to use', + ' -x re exclude files matching regex', + ' -v be more verbose', + ' --help this text', + ''; +} + +sub parse_blame_entry { + my $fh = shift; + + return () unless defined($_ = <$fh>); + chomp; + + my ($sha1, $sourceline, $resultline, $num_lines) = split; + + return () unless defined $num_lines; + + my %h = (sha1 => $sha1, sourceline => $sourceline, + resultline => $resultline, lines => $num_lines); + while (<$fh>) { + chomp; + my ($key, $val) = split ' ', $_, 2; + $h{$key} = $val; + last if m/^filename /; + } + + return %h; +} + +sub blame_file { + my $repo = shift; + my $ref = shift; + my $filename = shift; + my $authors = shift; + + my ($fh, $ctx) = $repo->command_output_pipe('blame', @BLAME_OPTS, + $ref, '--', $filename); + + my %commits; + while (my %h = parse_blame_entry $fh) { + + if (! exists $commits{$h{'sha1'}}) { + + if (! exists $authors->{$h{'author'}}->{$filename}) { + $authors->{$h{'author'}}->{$filename} = 0; + } + $commits{$h{'sha1'}} = + \$authors->{$h{'author'}}->{$filename}; + } + + ${$commits{$h{'sha1'}}} += $h{'lines'}; + } + + $repo->command_close_pipe($fh, $ctx); +} + +sub count_total_lines { + my $authors = shift; + + my $lines = 0; + + for (values %{$authors}) { + for (values %{$_}) { $lines += $_; } + } + + return $lines; +} + +# Returns hash +# key: author name +# value: authored lines +sub count_author_lines { + my $authors = shift; + + my %alines; + + foreach my $author (keys %{$authors}) { + my $lines = 0; + for (values %{$authors->{$author}}) { $lines += $_; } + $alines{$author} = $lines; + } + + return %alines; +} + +# Returns hash +# key: filename +# value: lines in file +sub count_file_lines { + my $authors = shift; + + my %flines; + + for (values %{$authors}) { + foreach my $file (keys %{$_}) { + $flines{$file} += $_->{$file}; + } + } + + return %flines; +} + +# Short format +# lines author +sub print_short { + my $authors = shift; + + my %alines = count_author_lines $authors; + + foreach my $author (sort {$alines{$b} <=> $alines{$a}} keys %alines) { + printf "%6d %s\n", $alines{$author}, $author; + } +} + +# Long format +# author (lines): +# file_lines filename +# file_lines filename +# file_lines filename +sub print_long { + my $authors = shift; + + my %alines = count_author_lines $authors; + + foreach my $author (sort {$alines{$b} <=> $alines{$a}} keys %alines) { + print $author, ' (', $alines{$author}, '):', "\n"; + foreach my $file (sort + {$authors->{$author}->{$b} <=> $authors->{$author}->{$a}} + keys %{$authors->{$author}}) { + printf " %10d %s\n", $authors->{$author}->{$file}, + $file; + } + } +} + +# Longer format +# author (lines, % of all lines): +# file_lines (% of author lines) filename +# file_lines (% of author lines) filename +sub print_longer { + my $authors = shift; + + my %alines = count_author_lines $authors; + my $total_lines = count_total_lines $authors; + + foreach my $author (sort {$alines{$b} <=> $alines{$a}} keys %alines) { + printf "%s (%d, %.2f%%):\n", $author, $alines{$author}, + 100. * $alines{$author} / $total_lines; + foreach my $file (sort + {$authors->{$author}->{$b} <=> $authors->{$author}->{$a}} + keys %{$authors->{$author}}) { + printf " %10d (%5.2f%%) %s\n", + $authors->{$author}->{$file}, + 100. * + $authors->{$author}->{$file} / $alines{$author}, + $file; + } + } +} + +# Longer format +# author (# lines in X files, % of all lines, % of all files): +# lines (% of file) file_lines (% of author lines) filename +# lines (% of file) file_lines (% of author lines) filename +sub print_with_file_percentage { + my $authors = shift; + + my %alines = count_author_lines $authors; + my %flines = count_file_lines $authors; + my $total_lines = count_total_lines $authors; + my $total_files = scalar(keys %flines); + + foreach my $author (sort {$alines{$b} <=> $alines{$a}} keys %alines) { + printf "%s (%d lines in %d files, " . + "%.2f%% of all lines, %.2f%% of all files):\n", + $author, $alines{$author}, + scalar(keys %{$authors->{$author}}), + 100. * $alines{$author} / $total_lines, + 100. * scalar(keys %{$authors->{$author}})/$total_files; + foreach my $file (sort + {$authors->{$author}->{$b} <=> $authors->{$author}->{$a}} + keys %{$authors->{$author}}) { + printf " %10d (%6.2f%%) of %6d (%6.2f%%) %s\n", + $authors->{$author}->{$file}, + 100. * + $authors->{$author}->{$file} / $flines{$file}, + $flines{$file}, + 100. * + $authors->{$author}->{$file} / $alines{$author}, + $file; + } + } +} + +# File perspective format +# filename (lines): +# lines author +# lines author +sub print_with_file_perspective { + my $authors = shift; + + my %flines = count_file_lines $authors; + + foreach my $file (sort keys %flines) { + my @auths = grep {exists $authors->{$_}->{$file}} + keys %{$authors}; + print $file, ' (', $flines{$file}, '):', "\n"; + foreach my $author (sort + {$authors->{$b}->{$file} <=> $authors->{$a}->{$file}} + @auths) { + printf " %10d %s\n", $authors->{$author}->{$file}, + $author; + } + } +} + + +my $verbose = 0; +my $output_format = 0; +my $show_total = 0; +my $exclude_pattern; +my $nthreads = 1; + +our ($opt_c, $opt_f, $opt_l, $opt_s, $opt_t, $opt_v, $opt_x); +getopts('cflst:vx:') or die 'Invalid options specified'; + + if ($opt_c) { + $show_total = 1; + } + if ($opt_f) { + $output_format = 4; + } elsif ($opt_l && $opt_s) { + $output_format = 3; + } elsif ($opt_l) { + $output_format = 2; + } elsif ($opt_s) { + $output_format = 1; + } + if (defined $opt_t) { + $nthreads = $opt_t; + if ($nthreads !~ /^\d+$/ || $nthreads < 0) { + die 'Error: argument to -t must be integer >= 0'; + } + if ($nthreads == 0) { + eval { + require Sys::CPU; + $nthreads = Sys::CPU::cpu_count(); + } or $nthreads = 1; + } + } + if ($opt_v) { + $verbose = 1; + } + if ($opt_x) { + $exclude_pattern = $opt_x; + } + +eval {select STDERR; usage; exit 1} unless $#ARGV >= 0; + +my %authors; +my @thr; +my $repo = Git->repository(); + +# Spawn ls-tree now, so it can fail before creating the threads +my ($fh, $ctx) = $repo->command_output_pipe('ls-tree', @LSTREE_OPTS, + '--', @ARGV); + +print STDERR 'Using ', $nthreads, ' thread(s).', "\n" if $verbose; + +my $DataQueue = Thread::Queue->new(); + +# start the threads +for (my $i = 0; $i < $nthreads; $i++) { + ($thr[$i]) = threads->create(sub { + my $tid = threads->tid(); + my %a; + while (my $f = $DataQueue->dequeue()) { + print STDERR "[$tid]Processing file: $f\n" if $verbose; + blame_file $repo, $ARGV[0], $f, \%a; + } + return %a; + }); +} + +# now queue up the files +while (<$fh>) { + chomp; + + if ($exclude_pattern && m/$exclude_pattern/o) { + print STDERR "Skipping file: $_\n" if $verbose; + next; + } else { + print STDERR "Queuing file: $_\n" if $verbose; + } + + $DataQueue->enqueue($_); +} +$repo->command_close_pipe($fh, $ctx); + +# queue up an undef entry for each thread +for (my $i = 0; $i < $nthreads; $i++) { + $DataQueue->enqueue(undef); +} + +# merge the author hash from each thread +for (my $i = 0; $i < $nthreads; $i++) { + my %th_authors = $thr[$i]->join; + + foreach my $author (keys %th_authors) { + if (! exists $authors{$author}) { + $authors{$author} = $th_authors{$author}; + next; + } + foreach my $filename (keys %{$th_authors{$author}}) { + if (! exists $authors{$author}->{$filename}) { + $authors{$author}->{$filename} = + $th_authors{$author}->{$filename}; + } else { + $authors{$author}->{$filename} += + $th_authors{$author}->{$filename}; + } + } + } +} + + +if ($output_format == 0) { + print_long \%authors; +} elsif ($output_format == 1) { + print_short \%authors; +} elsif ($output_format == 2) { + print_longer \%authors; +} elsif ($output_format == 3) { + print_with_file_percentage \%authors; +} elsif ($output_format == 4) { + print_with_file_perspective \%authors; +} + +printf "%6d total lines\n", count_total_lines(\%authors) if $show_total; + +exit; -- 1.7.3.1.45.g9855b -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html