Use File::MMagic to determine the MIME type of a blob/file.
The variable magic_mime_file holds the location of the
"magic.mime" file, usually "/usr/share/file/magic.mime".
If not defined, the magic numbers internally stored in the
File::MMagic module are used.
Action "blob" knows the file type: if the file type is
not "text/*" then action "blob" defaults to "blob_plain",
i.e. the file is downloaded raw for the browser to interpret.
If the file type is "text/*", then "blob" defaults to the
current "cat -n"-like output, from which you can click
"plain", to get the "blob_plain" output.
Signed-off-by: Luben Tuikov <ltuikov@xxxxxxxxx>
---
gitweb/gitweb.cgi | 140 +++++++++++++++++++++--------------------------------
1 files changed, 56 insertions(+), 84 deletions(-)
diff --git a/gitweb/gitweb.cgi b/gitweb/gitweb.cgi
index cce0753..6798990 100755
--- a/gitweb/gitweb.cgi
+++ b/gitweb/gitweb.cgi
@@ -14,6 +14,8 @@ use CGI::Util qw(unescape);
use CGI::Carp qw(fatalsToBrowser);
use Encode;
use Fcntl ':mode';
+use File::MMagic;
+use FileHandle;
binmode STDOUT, ':utf8';
our $cgi = new CGI;
@@ -54,9 +56,15 @@ #our $projects_list = $projectroot;
our $projects_list = "index/index.aux";
# default blob_plain mimetype and default charset for text/plain blob
-our $default_blob_plain_mimetype = 'text/plain';
+our $default_blob_mimetype = 'text/plain';
our $default_text_plain_charset = undef;
+# magic_mime_file: if defined this file will be used by File::MMagic
+# to guess the file type, else the magic numbers stored internally
+# in File::MMagic will be used. Either relative or absolute name
+# can be given. E.g. "/usr/share/file/magic.mime".
+our $magic_mime_file = undef;
+
# file to use for guessing MIME types before trying /etc/mime.types
# (relative to the current git repository)
our $mimetypes_file = undef;
@@ -1455,11 +1463,58 @@ sub git_get_hash_by_path {
}
}
+#
+# Strangely enough the File::MMagic package, version 1.27, has a bug
+# whereby reading from a piped filehandle (e.g. STDIN, or "-|") always
+# returns 'text/plain', but reading from a file on a file system (as it
+# would be the case for the checktype_filename() method) properly
+# determines the file type.
+#
+sub get_blob_mimetype {
+ my $blob_file = "$git_temp/blob-$hash";
+ if (! -r $blob_file) {
+ open my $fd_in, "-|", "$gitbin/git-cat-file blob $hash" or return $default_blob_mimetype;
+ open my $fd_out, "> $blob_file";
+ my @file = <$fd_in>;
+ print $fd_out @file;
+ close $fd_out;
+ close $fd_in;
+ }
+ my $mm = $magic_mime_file ? File::MMagic->new($magic_mime_file) : new File::MMagic;
+ my $mime = $mm->checktype_filename($blob_file);
+ return $mime;
+}
+
+sub git_blob_plain {
+ open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
+ my $mimetype = get_blob_mimetype();
+
+ # save as filename, even when no $file_name is given
+ my $save_as = "$hash";
+ if (defined $file_name) {
+ $save_as = $file_name;
+ } elsif ($mimetype =~ m/^text\//) {
+ $save_as .= '.txt';
+ }
+
+ print $cgi->header(-type => "$mimetype", '-content-disposition' => "inline; filename=\"$save_as\"");
+ undef $/;
+ binmode STDOUT, ':raw';
+ print <$fd>;
+ binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
+ $/ = "\n";
+ close $fd;
+}
+
sub git_blob {
if (!defined $hash && defined $file_name) {
my $base = $hash_base || git_read_head($project);
$hash = git_get_hash_by_path($base, $file_name, "blob") || die_error(undef, "Error lookup file.");
}
+ my $mimetype = get_blob_mimetype();
+ if ($mimetype !~ m/^text\//) {
+ return git_blob_plain();
+ }
my $have_blame = git_get_project_config_bool ('blame');
open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or die_error(undef, "Open failed.");
git_header_html();
@@ -1510,89 +1565,6 @@ sub git_blob {
git_footer_html();
}
-sub mimetype_guess_file {
- my $filename = shift;
- my $mimemap = shift;
- -r $mimemap or return undef;
-
- my %mimemap;
- open(MIME, $mimemap) or return undef;
- while (<MIME>) {
- my ($mime, $exts) = split(/\t+/);
- my @exts = split(/\s+/, $exts);
- foreach my $ext (@exts) {
- $mimemap{$ext} = $mime;
- }
- }
- close(MIME);
-
- $filename =~ /\.(.*?)$/;
- return $mimemap{$1};
-}
-
-sub mimetype_guess {
- my $filename = shift;
- my $mime;
- $filename =~ /\./ or return undef;
-
- if ($mimetypes_file) {
- my $file = $mimetypes_file;
- #$file =~ m#^/# or $file = "$projectroot/$path/$file";
- $mime = mimetype_guess_file($filename, $file);
- }
- $mime ||= mimetype_guess_file($filename, '/etc/mime.types');
- return $mime;
-}
-
-sub git_blob_plain_mimetype {
- my $fd = shift;
- my $filename = shift;
-
- # just in case
- return $default_blob_plain_mimetype unless $fd;
-
- if ($filename) {
- my $mime = mimetype_guess($filename);
- $mime and return $mime;
- }
-
- if (-T $fd) {
- return 'text/plain' .
- ($default_text_plain_charset ? '; charset='.$default_text_plain_charset : '');
- } elsif (! $filename) {
- return 'application/octet-stream';
- } elsif ($filename =~ m/\.png$/i) {
- return 'image/png';
- } elsif ($filename =~ m/\.gif$/i) {
- return 'image/gif';
- } elsif ($filename =~ m/\.jpe?g$/i) {
- return 'image/jpeg';
- } else {
- return 'application/octet-stream';
- }
-}
-
-sub git_blob_plain {
- open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
- my $type = git_blob_plain_mimetype($fd, $file_name);
-
- # save as filename, even when no $file_name is given
- my $save_as = "$hash";
- if (defined $file_name) {
- $save_as = $file_name;
- } elsif ($type =~ m/^text\//) {
- $save_as .= '.txt';
- }
-
- print $cgi->header(-type => "$type", '-content-disposition' => "inline; filename=\"$save_as\"");
- undef $/;
- binmode STDOUT, ':raw';
- print <$fd>;
- binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
- $/ = "\n";
- close $fd;
-}
-
sub git_tree {
if (!defined $hash) {
$hash = git_read_head($project);
--
1.4.1.g2f3c