On Thu, Jun 18, 2015 at 04:45:05PM -0400, Jeff King wrote: > Still, I think this is probably a minority case, and it may be > outweighed by the improvements. The "real" solution is to consider the > hunk as a whole and do an LCS diff on it, which would show that yes, > it's worth highlighting both of those spots, as they are a small > percentage of the total hunk. I've been meaning to play with this for years, so I took the opportunity to spend a little time on it. :) Below is a (slightly hacky) patch I came up with. It seems to work, and produces really great output in some cases. For instance, in 99a2cfb, it produces (I put highlighted bits in angle brackets): - <hash>cpy(peeled, <sha1>); + <oid>cpy(<&>peeled, <oid>); It also produces nonsense like: - <un>s<ign>ed <char >peeled<[20]>; + s<truct obj>e<ct_i>d peeled; but I think that is simply because my splitting function is terrible (it splits each byte, whereas we'd probably want to use whitespace and punctuation, or something content-specific). --- diff --git a/contrib/diff-highlight/diff-highlight b/contrib/diff-highlight/diff-highlight index ffefc31..7165518 100755 --- a/contrib/diff-highlight/diff-highlight +++ b/contrib/diff-highlight/diff-highlight @@ -3,6 +3,7 @@ use 5.008; use warnings FATAL => 'all'; use strict; +use Algorithm::Diff; # Highlight by reversing foreground and background. You could do # other things like bold or underline if you prefer. @@ -88,131 +89,54 @@ sub show_hunk { return; } - # If we have mismatched numbers of lines on each side, we could try to - # be clever and match up similar lines. But for now we are simple and - # stupid, and only handle multi-line hunks that remove and add the same - # number of lines. - if (@$a != @$b) { - print @$a, @$b; - return; - } - - my @queue; - for (my $i = 0; $i < @$a; $i++) { - my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]); - print $rm; - push @queue, $add; - } - print @queue; -} - -sub highlight_pair { - my @a = split_line(shift); - my @b = split_line(shift); + my ($prefix_a, $suffix_a, @hunk_a) = split_hunk(@$a); + my ($prefix_b, $suffix_b, @hunk_b) = split_hunk(@$b); - # Find common prefix, taking care to skip any ansi - # color codes. - my $seen_plusminus; - my ($pa, $pb) = (0, 0); - while ($pa < @a && $pb < @b) { - if ($a[$pa] =~ /$COLOR/) { - $pa++; - } - elsif ($b[$pb] =~ /$COLOR/) { - $pb++; - } - elsif ($a[$pa] eq $b[$pb]) { - $pa++; - $pb++; - } - elsif (!$seen_plusminus && $a[$pa] eq '-' && $b[$pb] eq '+') { - $seen_plusminus = 1; - $pa++; - $pb++; - } - else { - last; - } - } + my $diff = Algorithm::Diff->new(\@hunk_a, \@hunk_b); + my (@out_a, @out_b); + while ($diff->Next()) { + my $bits = $diff->Diff(); - # Find common suffix, ignoring colors. - my ($sa, $sb) = ($#a, $#b); - while ($sa >= $pa && $sb >= $pb) { - if ($a[$sa] =~ /$COLOR/) { - $sa--; - } - elsif ($b[$sb] =~ /$COLOR/) { - $sb--; - } - elsif ($a[$sa] eq $b[$sb]) { - $sa--; - $sb--; - } - else { - last; - } - } + push @out_a, $OLD_HIGHLIGHT[1] if $bits & 1; + push @out_a, $diff->Items(1); + push @out_a, $OLD_HIGHLIGHT[2] if $bits & 1; - if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) { - return highlight_line(\@a, $pa, $sa, \@OLD_HIGHLIGHT), - highlight_line(\@b, $pb, $sb, \@NEW_HIGHLIGHT); - } - else { - return join('', @a), - join('', @b); + push @out_b, $NEW_HIGHLIGHT[1] if $bits & 2; + push @out_b, $diff->Items(2); + push @out_b, $NEW_HIGHLIGHT[2] if $bits & 2; } -} -sub split_line { - local $_ = shift; - return utf8::decode($_) ? - map { utf8::encode($_); $_ } - map { /$COLOR/ ? $_ : (split //) } - split /($COLOR+)/ : - map { /$COLOR/ ? $_ : (split //) } - split /($COLOR+)/; + output_split_hunk($prefix_a, $suffix_a, @out_a); + output_split_hunk($prefix_b, $suffix_b, @out_b); } -sub highlight_line { - my ($line, $prefix, $suffix, $theme) = @_; - - my $start = join('', @{$line}[0..($prefix-1)]); - my $mid = join('', @{$line}[$prefix..$suffix]); - my $end = join('', @{$line}[($suffix+1)..$#$line]); - - # If we have a "normal" color specified, then take over the whole line. - # Otherwise, we try to just manipulate the highlighted bits. - if (defined $theme->[0]) { - s/$COLOR//g for ($start, $mid, $end); - chomp $end; - return join('', - $theme->[0], $start, $RESET, - $theme->[1], $mid, $RESET, - $theme->[0], $end, $RESET, - "\n" - ); - } else { - return join('', - $start, - $theme->[1], $mid, $theme->[2], - $end - ); +# Return the individual diff-able items of the hunk, but with any +# diff or color prefix/suffix for each line split out (we assume that the +# prefix/suffix for each line will be the same). +sub split_hunk { + my ($prefix, $suffix, @r); + foreach my $line (@_) { + $line =~ /^($COLOR*[+-]$COLOR*)(.*)($COLOR*)/ + or die "eh, this is supposed to match everything!"; + + # overwrite the old values; we assume they're all the same + # anyway + $prefix = $1; + $suffix = $3; + + # do a straight character split. This almost certainly isn't + # ideal, but it's a good starting point. We should at the very + # least be utf8-aware, and probably use color-words regexes. + push @r, split(//, $2), "\n"; } + return ($prefix, $suffix, @r); } -# Pairs are interesting to highlight only if we are going to end up -# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting -# is just useless noise. We can detect this by finding either a matching prefix -# or suffix (disregarding boring bits like whitespace and colorization). -sub is_pair_interesting { - my ($a, $pa, $sa, $b, $pb, $sb) = @_; - my $prefix_a = join('', @$a[0..($pa-1)]); - my $prefix_b = join('', @$b[0..($pb-1)]); - my $suffix_a = join('', @$a[($sa+1)..$#$a]); - my $suffix_b = join('', @$b[($sb+1)..$#$b]); - - return $prefix_a !~ /^$COLOR*-$BORING*$/ || - $prefix_b !~ /^$COLOR*\+$BORING*$/ || - $suffix_a !~ /^$BORING*$/ || - $suffix_b !~ /^$BORING*$/; +sub output_split_hunk { + my $prefix = shift; + my $suffix = shift; + my $str = join('', @_); + $str =~ s/^/$prefix/mg; + $str =~ s/$/$suffix/mg; + print $str; } -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html