On Friday 27 April 2007 22:29:03 you wrote: > Ismail Dönmez <ismail@xxxxxxxxxxxxx> writes: > >> Which means that there is something else going on. Your change > >> may fix what you observed (I do not doubt that it fixed what you > >> observed for you), but without understanding what really is > >> going on (iow, why it is a fix, when the documentation clearly > >> indicates they should be equivalent and it should not fix > >> anything), we cannot tell what *ELSE* we are breaking with this > >> change. > > > > That might be a bug in Encode itself indeed, I will dig a bit more. > > Thanks. > > Thanks. Ok found out the reason. decode() tries to decode data that is already UTF-8 and borks. This is from Encode.pm : sub decode_utf8($;$) { my ( $str, $check ) = @_; return $str if is_utf8($str); <--- Checks if the $str is already UTF-8 if ($check) { return decode( "utf8", $str, $check ); <--- Else do what gitweb does [...] So my patch is indeed correct. I attach it again for reference. Can it be please applied? Regards, ismail
--- gitweb/gitweb.perl 2007-04-24 16:53:00.000000000 +0300 +++ gitweb/gitweb.perl 2007-04-24 16:54:22.000000000 +0300 @@ -566,12 +566,6 @@ return $input; } -# very thin wrapper for decode("utf8", $str, Encode::FB_DEFAULT); -sub to_utf8 { - my $str = shift; - return decode("utf8", $str, Encode::FB_DEFAULT); -} - # quote unsafe chars, but keep the slash, even when it's not # correct, but quoted slashes look too horrible in bookmarks sub esc_param { @@ -596,7 +590,7 @@ my $str = shift; my %opts = @_; - $str = to_utf8($str); + $str = decode_utf8($str); $str = $cgi->escapeHTML($str); if ($opts{'-nbsp'}) { $str =~ s/ / /g; @@ -610,7 +604,7 @@ my $str = shift; my %opts = @_; - $str = to_utf8($str); + $str = decode_utf8($str); $str = $cgi->escapeHTML($str); if ($opts{'-nbsp'}) { $str =~ s/ / /g; @@ -893,7 +887,7 @@ if (length($short) < length($long)) { return $cgi->a({-href => $href, -class => "list subject", - -title => to_utf8($long)}, + -title => decode_utf8($long)}, esc_html($short) . $extra); } else { return $cgi->a({-href => $href, -class => "list subject"}, @@ -1110,7 +1104,7 @@ if (check_export_ok("$projectroot/$path")) { my $pr = { path => $path, - owner => to_utf8($owner), + owner => decode_utf8($owner), }; push @list, $pr } @@ -1139,7 +1133,7 @@ $pr = unescape($pr); $ow = unescape($ow); if ($pr eq $project) { - $owner = to_utf8($ow); + $owner = decode_utf8($ow); last; } } @@ -1613,7 +1607,7 @@ } my $owner = $gcos; $owner =~ s/[,;].*$//; - return to_utf8($owner); + return decode_utf8($owner); } ## ...................................................................... @@ -1696,7 +1690,7 @@ my $title = "$site_name"; if (defined $project) { - $title .= " - " . to_utf8($project); + $title .= " - " . decode_utf8($project); if (defined $action) { $title .= "/$action"; if (defined $file_name) { @@ -1969,7 +1963,7 @@ print "<div class=\"page_path\">"; print $cgi->a({-href => href(action=>"tree", hash_base=>$hb), - -title => 'tree root'}, to_utf8("[$project]")); + -title => 'tree root'}, decode_utf8("[$project]")); print " / "; if (defined $name) { my @dirname = split '/', $name; @@ -2584,7 +2578,7 @@ ($pr->{'age'}, $pr->{'age_string'}) = @aa; if (!defined $pr->{'descr'}) { my $descr = git_get_project_description($pr->{'path'}) || ""; - $pr->{'descr_long'} = to_utf8($descr); + $pr->{'descr_long'} = decode_utf8($descr); $pr->{'descr'} = chop_str($descr, 25, 5); } if (!defined $pr->{'owner'}) { @@ -3616,7 +3610,7 @@ $hash = git_get_head_hash($project); } - my $filename = to_utf8(basename($project)) . "-$hash.tar.$suffix"; + my $filename = decode_utf8(basename($project)) . "-$hash.tar.$suffix"; print $cgi->header( -type => "application/$ctype",