[PATCH] gitweb.cgi: Use File::MMagic; "a=blob" action knows the blob/file type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Use File::MMagic to determine the MIME type of a blob/file.
The variable magic_mime_file holds the location of the
"magic.mime" file, usually "/usr/share/file/magic.mime".
If not defined, the magic numbers internally stored in the
File::MMagic module are used.

Action "blob" knows the file type: if the file type is
not "text/*" then action "blob" defaults to "blob_plain",
i.e. the file is downloaded raw for the browser to interpret.
If the file type is "text/*", then "blob" defaults to the
current "cat -n"-like output, from which you can click
"plain", to get the "blob_plain" output.

Signed-off-by: Luben Tuikov <ltuikov@xxxxxxxxx>
---
 gitweb/gitweb.cgi |  140 +++++++++++++++++++++--------------------------------
 1 files changed, 56 insertions(+), 84 deletions(-)
diff --git a/gitweb/gitweb.cgi b/gitweb/gitweb.cgi
index cce0753..6798990 100755
--- a/gitweb/gitweb.cgi
+++ b/gitweb/gitweb.cgi
@@ -14,6 +14,8 @@ use CGI::Util qw(unescape);
 use CGI::Carp qw(fatalsToBrowser);
 use Encode;
 use Fcntl ':mode';
+use File::MMagic;
+use FileHandle;
 binmode STDOUT, ':utf8';
 
 our $cgi = new CGI;
@@ -54,9 +56,15 @@ #our $projects_list = $projectroot;
 our $projects_list = "index/index.aux";
 
 # default blob_plain mimetype and default charset for text/plain blob
-our $default_blob_plain_mimetype = 'text/plain';
+our $default_blob_mimetype = 'text/plain';
 our $default_text_plain_charset  = undef;
 
+# magic_mime_file: if defined this file will be used by File::MMagic
+# to guess the file type, else the magic numbers stored internally
+# in File::MMagic will be used.  Either relative or absolute name
+# can be given.  E.g. "/usr/share/file/magic.mime".
+our $magic_mime_file = undef;
+
 # file to use for guessing MIME types before trying /etc/mime.types
 # (relative to the current git repository)
 our $mimetypes_file = undef;
@@ -1455,11 +1463,58 @@ sub git_get_hash_by_path {
 	}
 }
 
+#
+# Strangely enough the File::MMagic package, version 1.27, has a bug
+# whereby reading from a piped filehandle (e.g. STDIN, or "-|") always
+# returns 'text/plain', but reading from a file on a file system (as it
+# would be the case for the checktype_filename() method) properly
+# determines the file type.
+#
+sub get_blob_mimetype {
+	my $blob_file = "$git_temp/blob-$hash";
+	if (! -r $blob_file) {
+	    open my $fd_in, "-|", "$gitbin/git-cat-file blob $hash" or return $default_blob_mimetype;
+	    open my $fd_out, "> $blob_file";
+	    my @file = <$fd_in>;
+	    print $fd_out @file;
+	    close $fd_out;
+	    close $fd_in;
+	}
+	my $mm = $magic_mime_file ? File::MMagic->new($magic_mime_file) : new File::MMagic;
+	my $mime = $mm->checktype_filename($blob_file);
+	return $mime;
+}
+
+sub git_blob_plain {
+	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
+	my $mimetype = get_blob_mimetype();
+
+	# save as filename, even when no $file_name is given
+	my $save_as = "$hash";
+	if (defined $file_name) {
+		$save_as = $file_name;
+	} elsif ($mimetype =~ m/^text\//) {
+		$save_as .= '.txt';
+	}
+
+	print $cgi->header(-type => "$mimetype", '-content-disposition' => "inline; filename=\"$save_as\"");
+	undef $/;
+	binmode STDOUT, ':raw';
+	print <$fd>;
+	binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
+	$/ = "\n";
+	close $fd;
+}
+
 sub git_blob {
 	if (!defined $hash && defined $file_name) {
 		my $base = $hash_base || git_read_head($project);
 		$hash = git_get_hash_by_path($base, $file_name, "blob") || die_error(undef, "Error lookup file.");
 	}
+	my $mimetype = get_blob_mimetype();
+	if ($mimetype !~ m/^text\//) {
+		return git_blob_plain();
+	}
 	my $have_blame = git_get_project_config_bool ('blame');
 	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or die_error(undef, "Open failed.");
 	git_header_html();
@@ -1510,89 +1565,6 @@ sub git_blob {
 	git_footer_html();
 }
 
-sub mimetype_guess_file {
-	my $filename = shift;
-	my $mimemap = shift;
-	-r $mimemap or return undef;
-
-	my %mimemap;
-	open(MIME, $mimemap) or return undef;
-	while (<MIME>) {
-		my ($mime, $exts) = split(/\t+/);
-		my @exts = split(/\s+/, $exts);
-		foreach my $ext (@exts) {
-			$mimemap{$ext} = $mime;
-		}
-	}
-	close(MIME);
-
-	$filename =~ /\.(.*?)$/;
-	return $mimemap{$1};
-}
-
-sub mimetype_guess {
-	my $filename = shift;
-	my $mime;
-	$filename =~ /\./ or return undef;
-
-	if ($mimetypes_file) {
-		my $file = $mimetypes_file;
-		#$file =~ m#^/# or $file = "$projectroot/$path/$file";
-		$mime = mimetype_guess_file($filename, $file);
-	}
-	$mime ||= mimetype_guess_file($filename, '/etc/mime.types');
-	return $mime;
-}
-
-sub git_blob_plain_mimetype {
-	my $fd = shift;
-	my $filename = shift;
-
-	# just in case
-	return $default_blob_plain_mimetype unless $fd;
-
-	if ($filename) {
-		my $mime = mimetype_guess($filename);
-		$mime and return $mime;
-	}
-
-	if (-T $fd) {
-		return 'text/plain' .
-		       ($default_text_plain_charset ? '; charset='.$default_text_plain_charset : '');
-	} elsif (! $filename) {
-		return 'application/octet-stream';
-	} elsif ($filename =~ m/\.png$/i) {
-		return 'image/png';
-	} elsif ($filename =~ m/\.gif$/i) {
-		return 'image/gif';
-	} elsif ($filename =~ m/\.jpe?g$/i) {
-		return 'image/jpeg';
-	} else {
-		return 'application/octet-stream';
-	}
-}
-
-sub git_blob_plain {
-	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
-	my $type = git_blob_plain_mimetype($fd, $file_name);
-
-	# save as filename, even when no $file_name is given
-	my $save_as = "$hash";
-	if (defined $file_name) {
-		$save_as = $file_name;
-	} elsif ($type =~ m/^text\//) {
-		$save_as .= '.txt';
-	}
-
-	print $cgi->header(-type => "$type", '-content-disposition' => "inline; filename=\"$save_as\"");
-	undef $/;
-	binmode STDOUT, ':raw';
-	print <$fd>;
-	binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
-	$/ = "\n";
-	close $fd;
-}
-
 sub git_tree {
 	if (!defined $hash) {
 		$hash = git_read_head($project);
-- 
1.4.1.g2f3c


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]