[PATCH/RFC 1/n] gitweb: Better git-unquoting and gitweb-quoting of pathnames

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Extend unquote subroutine, which unquotes quoted and escaped filenames
which git may return, to deal not only with octal char sequence
quoting, but also quoting ordinary characters including '\"' and '\\'
which are respectively quoted '"' and '\', and to deal also with
C escape sequences including '\t' for TAB and '\n' for LF.

Add esc_path subroutine for gitweb quoting and HTML escaping filenames
(currently it does equivalent of ls' --hide-control-chars, which means
showing undisplayable characters (including '\n' and '\t') as '?'
(question mark) character.  Convert gitweb to use esc_path instead of
simply esc_html to print pathnames.

Signed-off-by: Jakub Narebski <jnareb@xxxxxxxxx>
---
By the way, testing this patch uncovered some errors in gitweb, some
related to files with strange name, some unrelated. I'll address them
in further patches. They are:

  1. Using m/..\t(.+)$/; to catch filename instead of m/..\t(.+)$/s;

  2. Lack of '--' after $hash/$hash_base parameter which gives error
     if there exist branch (ref) and file (or directory) with the same
     name

The current implementation of esc_path is meant as preliminary: if you
have better idea for quoting names in gitweb, please tell us, or better
send code/patches.


 gitweb/gitweb.perl |   68 +++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index ec46b80..a15e916 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -563,12 +563,42 @@ sub esc_html {
 	return $str;
 }
 
+# quote unsafe characters and escape filename to HTML
+sub esc_path {
+	my $str = shift;
+	$str = esc_html($str);
+	$str =~ s/[[:cntrl:]\a\b\e\f\n\r\t\011]/?/g; # like --hide-control-chars in ls
+	return $str;
+}
+
 # git may return quoted and escaped filenames
 sub unquote {
 	my $str = shift;
+
+	sub unq {
+		my $seq = shift;
+		my %es = (
+			't' => "\t", # tab            (HT, TAB)
+			'n' => "\n", # newline        (NL)
+			'r' => "\r", # return         (CR)
+			'f' => "\f", # form feed      (FF)
+			'b' => "\b", # backspace      (BS)
+			'a' => "\a", # alarm (bell)   (BEL)
+			#'e' => "\e", # escape        (ESC)
+			'v' => "\011", # vertical tab (VT)
+		);
+
+		# octal char sequence
+		return chr(oct($seq))  if ($seq =~ m/^[0-7]{1,3}$/);
+		# C escape sequence (this includes '\n' (LF) and '\t' (TAB))
+		return $es{$seq}       if ($seq =~ m/^[abefnrtv]$/);
+		# quted ordinary character (this includes '\\' and '\"')
+		return $seq;
+	}
+
 	if ($str =~ m/^"(.*)"$/) {
 		$str = $1;
-		$str =~ s/\\([0-7]{1,3})/chr(oct($1))/eg;
+		$str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
 	}
 	return $str;
 }
@@ -1435,7 +1465,7 @@ sub git_header_html {
 		if (defined $action) {
 			$title .= "/$action";
 			if (defined $file_name) {
-				$title .= " - " . esc_html($file_name);
+				$title .= " - " . esc_path($file_name);
 				if ($action eq "tree" && $file_name !~ m|/$|) {
 					$title .= "/";
 				}
@@ -1706,20 +1736,20 @@ sub git_print_page_path {
 			$fullname .= ($fullname ? '/' : '') . $dir;
 			print $cgi->a({-href => href(action=>"tree", file_name=>$fullname,
 			                             hash_base=>$hb),
-			              -title => $fullname}, esc_html($dir));
+			              -title => $fullname}, esc_path($dir));
 			print " / ";
 		}
 		if (defined $type && $type eq 'blob') {
 			print $cgi->a({-href => href(action=>"blob_plain", file_name=>$file_name,
 			                             hash_base=>$hb),
-			              -title => $name}, esc_html($basename));
+			              -title => $name}, esc_path($basename));
 		} elsif (defined $type && $type eq 'tree') {
 			print $cgi->a({-href => href(action=>"tree", file_name=>$file_name,
 			                             hash_base=>$hb),
-			              -title => $name}, esc_html($basename));
+			              -title => $name}, esc_path($basename));
 			print " / ";
 		} else {
-			print esc_html($basename);
+			print esc_path($basename);
 		}
 	}
 	print "<br/></div>\n";
@@ -1791,7 +1821,7 @@ sub git_print_tree_entry {
 		print "<td class=\"list\">" .
 			$cgi->a({-href => href(action=>"blob", hash=>$t->{'hash'},
 			                       file_name=>"$basedir$t->{'name'}", %base_key),
-			        -class => "list"}, esc_html($t->{'name'})) . "</td>\n";
+			        -class => "list"}, esc_path($t->{'name'})) . "</td>\n";
 		print "<td class=\"link\">";
 		print $cgi->a({-href => href(action=>"blob", hash=>$t->{'hash'},
 					     file_name=>"$basedir$t->{'name'}", %base_key)},
@@ -1818,7 +1848,7 @@ sub git_print_tree_entry {
 		print "<td class=\"list\">";
 		print $cgi->a({-href => href(action=>"tree", hash=>$t->{'hash'},
 		                             file_name=>"$basedir$t->{'name'}", %base_key)},
-		              esc_html($t->{'name'}));
+		              esc_path($t->{'name'}));
 		print "</td>\n";
 		print "<td class=\"link\">";
 		print $cgi->a({-href => href(action=>"tree", hash=>$t->{'hash'},
@@ -1883,7 +1913,7 @@ sub git_difftree_body {
 			print "<td>";
 			print $cgi->a({-href => href(action=>"blob", hash=>$diff{'to_id'},
 			                             hash_base=>$hash, file_name=>$diff{'file'}),
-			              -class => "list"}, esc_html($diff{'file'}));
+			              -class => "list"}, esc_path($diff{'file'}));
 			print "</td>\n";
 			print "<td>$mode_chng</td>\n";
 			print "<td class=\"link\">";
@@ -1899,7 +1929,7 @@ sub git_difftree_body {
 			print "<td>";
 			print $cgi->a({-href => href(action=>"blob", hash=>$diff{'from_id'},
 			                             hash_base=>$parent, file_name=>$diff{'file'}),
-			               -class => "list"}, esc_html($diff{'file'}));
+			               -class => "list"}, esc_path($diff{'file'}));
 			print "</td>\n";
 			print "<td>$mode_chng</td>\n";
 			print "<td class=\"link\">";
@@ -1939,7 +1969,7 @@ sub git_difftree_body {
 			print "<td>";
 			print $cgi->a({-href => href(action=>"blob", hash=>$diff{'to_id'},
 			                             hash_base=>$hash, file_name=>$diff{'file'}),
-			              -class => "list"}, esc_html($diff{'file'}));
+			              -class => "list"}, esc_path($diff{'file'}));
 			print "</td>\n";
 			print "<td>$mode_chnge</td>\n";
 			print "<td class=\"link\">";
@@ -1979,11 +2009,11 @@ sub git_difftree_body {
 			print "<td>" .
 			      $cgi->a({-href => href(action=>"blob", hash_base=>$hash,
 			                             hash=>$diff{'to_id'}, file_name=>$diff{'to_file'}),
-			              -class => "list"}, esc_html($diff{'to_file'})) . "</td>\n" .
+			              -class => "list"}, esc_path($diff{'to_file'})) . "</td>\n" .
 			      "<td><span class=\"file_status $nstatus\">[$nstatus from " .
 			      $cgi->a({-href => href(action=>"blob", hash_base=>$parent,
 			                             hash=>$diff{'from_id'}, file_name=>$diff{'from_file'}),
-			              -class => "list"}, esc_html($diff{'from_file'})) .
+			              -class => "list"}, esc_path($diff{'from_file'})) .
 			      " with " . (int $diff{'similarity'}) . "% similarity$mode_chng]</span></td>\n" .
 			      "<td class=\"link\">";
 			if ($diff{'to_id'} ne $diff{'from_id'}) {
@@ -2113,7 +2143,7 @@ sub git_patchset_body {
 			$file  ||= $diffinfo->{'file'};
 			$file = $cgi->a({-href => href(action=>"blob", hash_base=>$hash_parent,
 			                               hash=>$diffinfo->{'from_id'}, file_name=>$file),
-			                -class => "list"}, esc_html($file));
+			                -class => "list"}, esc_path($file));
 			$patch_line =~ s|a/.*$|a/$file|g;
 			print "<div class=\"diff from_file\">$patch_line</div>\n";
 
@@ -2125,7 +2155,7 @@ sub git_patchset_body {
 			$file  ||= $diffinfo->{'file'};
 			$file = $cgi->a({-href => href(action=>"blob", hash_base=>$hash,
 			                               hash=>$diffinfo->{'to_id'}, file_name=>$file),
-			                -class => "list"}, esc_html($file));
+			                -class => "list"}, esc_path($file));
 			$patch_line =~ s|b/.*|b/$file|g;
 			print "<div class=\"diff to_file\">$patch_line</div>\n";
 
@@ -3373,8 +3403,8 @@ sub git_blobdiff {
 
 	} else {
 		while (my $line = <$fd>) {
-			$line =~ s!a/($hash|$hash_parent)!'a/'.esc_html($diffinfo{'from_file'})!eg;
-			$line =~ s!b/($hash|$hash_parent)!'b/'.esc_html($diffinfo{'to_file'})!eg;
+			$line =~ s!a/($hash|$hash_parent)!'a/'.esc_path($diffinfo{'from_file'})!eg;
+			$line =~ s!b/($hash|$hash_parent)!'b/'.esc_path($diffinfo{'to_file'})!eg;
 
 			print $line;
 
@@ -3729,7 +3759,7 @@ sub git_search {
 						print $cgi->a({-href => href(action=>"blob", hash_base=>$co{'id'},
 						                             hash=>$set{'id'}, file_name=>$set{'file'}),
 						              -class => "list"},
-						              "<span class=\"match\">" . esc_html($set{'file'}) . "</span>") .
+						              "<span class=\"match\">" . esc_path($set{'file'}) . "</span>") .
 						      "<br/>\n";
 					}
 					print "</td>\n" .
@@ -3863,7 +3893,7 @@ XML
 			if (!($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/)) {
 				next;
 			}
-			my $file = esc_html(unquote($7));
+			my $file = esc_path(unquote($7));
 			$file = to_utf8($file);
 			print "$file<br/>\n";
 		}
-- 
1.4.3.3

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]