From: Yi EungJun <eungjun.yi@xxxxxxxxxxxxx> Highlighted string might be broken if the common subsequence is a proper subset of a multibyte character. For example, if the old string is "진" and the new string is "지", then we expect the diff is rendered as follows: -진 +지 but actually it was rendered as follows: -<EC><A7><84> +<EC><A7><80> This fixes the bug by splitting the string by multibyte characters. --- contrib/diff-highlight/diff-highlight | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/contrib/diff-highlight/diff-highlight b/contrib/diff-highlight/diff-highlight index 08c88bb..2662c1a 100755 --- a/contrib/diff-highlight/diff-highlight +++ b/contrib/diff-highlight/diff-highlight @@ -2,6 +2,9 @@ use warnings FATAL => 'all'; use strict; +use File::Basename; +use File::Spec::Functions qw( catdir ); +use String::Multibyte; # Highlight by reversing foreground and background. You could do # other things like bold or underline if you prefer. @@ -24,6 +27,8 @@ my @removed; my @added; my $in_hunk; +my $mbcs = get_mbcs(); + # Some scripts may not realize that SIGPIPE is being ignored when launching the # pager--for instance scripts written in Python. $SIG{PIPE} = 'DEFAULT'; @@ -164,8 +169,8 @@ sub highlight_pair { sub split_line { local $_ = shift; - return map { /$COLOR/ ? $_ : (split //) } - split /($COLOR*)/; + return map { /$COLOR/ ? $_ : ($mbcs ? $mbcs->strsplit('', $_) : split //) } + split /($COLOR)/; } sub highlight_line { @@ -211,3 +216,19 @@ sub is_pair_interesting { $suffix_a !~ /^$BORING*$/ || $suffix_b !~ /^$BORING*$/; } + +# Returns an instance of String::Multibyte based on the charset defined by +# i18n.commitencoding or UTF-8, or undef if String::Multibyte doesn't support +# the charset. +sub get_mbcs { + my $dir = catdir(dirname($INC{'String/Multibyte.pm'}), 'Multibyte'); + opendir my $dh, $dir or return; + my @mbcs_charsets = grep s/[.]pm\z//, readdir $dh; + close $dh; + my $expected_charset = `git config i18n.commitencoding` || "UTF-8"; + $expected_charset =~ s/-//g; + my @matches = grep {/^$expected_charset$/i} @mbcs_charsets; + my $charset = shift @matches; + + return eval 'String::Multibyte->new($charset)'; +} -- 2.3.2.209.gd67f9d5.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html