From: Pavan Kumar Sunkara <pavan.sss1991@xxxxxxxxx> Create a Gitweb::Util module, which is meant to contain internal utilities used by gitweb. Currently it includes all the quoting/unquoting and escaping subroutines that are used by the gitweb. Update gitweb/Makefile to install Gitweb::Util module alongside gitweb Signed-off-by: Pavan Kumar Sunkara <pavan.sss1991@xxxxxxxxx> Signed-off-by: Jakub Narebski <jnareb@xxxxxxxxx> --- This patch serves two purposes. First, it serves as test that earlier "gitweb: Prepare for splitting gitweb" patch acually work correctly. Second, it might be good starting point to splitting gitweb. Refactoring well defined parts into separate modules (Perl packages) could be a better, easier way than trying to come with good separation (split) into modules upfront. Such leisure approach to splitting gitweb has more chance to be accepted. Perhaps if such approach were proposed on GSoC 2010, maybe "gitweb write" project wouldn't fail midterm evaluations... This module was taken out of unfinished GSoC 2010 project with Pavan Kumar Sunkara as a student git://repo.or.cz/git/gsoc2010-gitweb.git The module was renamed from Gitweb::Escape to Gitweb::Util. Currently the contents is the same, but it might change. Code was updated to more modern codebase; since then esc_path_info and esc_attr were added to gitweb - both of those are now in Gitweb::Util. There were also required some changes and conflicts resolved due to the fact that creating Gitweb::Util (formerly Gitweb::Escape) is no longer in the middle of larger patch series. In particular lack of Gitweb::Config means that $fallback_encoding needed to be added to Gitweb::Util module. While at it do not export quot_cec and quot_upr helper subroutines by default, but mark them exportable nevrtheless. gitweb/Makefile | 3 + gitweb/gitweb.perl | 140 +----------------------------------- gitweb/lib/Gitweb/Util.pm | 177 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 137 deletions(-) create mode 100644 gitweb/lib/Gitweb/Util.pm diff --git a/gitweb/Makefile b/gitweb/Makefile index abe9db8..9a4053b 100644 --- a/gitweb/Makefile +++ b/gitweb/Makefile @@ -114,6 +114,9 @@ endif GITWEB_FILES += static/git-logo.png static/git-favicon.png +# Modules: Gitweb::* +GITWEB_MODULES += Gitweb/Util.pm + GITWEB_REPLACE = \ -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \ -e 's|++GIT_BINDIR++|$(bindir)|g' \ diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index ea8ab56..e8d8589 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -23,11 +23,13 @@ use lib "++GITWEBLIBDIR++"; use CGI qw(:standard :escapeHTML -nosticky); use CGI::Util qw(unescape); use CGI::Carp qw(fatalsToBrowser set_message); -use Encode; use Fcntl ':mode'; use File::Find qw(); use File::Basename qw(basename); use Time::HiRes qw(gettimeofday tv_interval); + +use Gitweb::Util; + binmode STDOUT, ':utf8'; our $t0 = [ gettimeofday() ]; @@ -1382,128 +1384,6 @@ sub validate_refname { return $input; } -# decode sequences of octets in utf8 into Perl's internal form, -# which is utf-8 with utf8 flag set if needed. gitweb writes out -# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning -sub to_utf8 { - my $str = shift; - return undef unless defined $str; - if (utf8::valid($str)) { - utf8::decode($str); - return $str; - } else { - return decode($fallback_encoding, $str, Encode::FB_DEFAULT); - } -} - -# quote unsafe chars, but keep the slash, even when it's not -# correct, but quoted slashes look too horrible in bookmarks -sub esc_param { - my $str = shift; - return undef unless defined $str; - $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg; - $str =~ s/ /\+/g; - return $str; -} - -# the quoting rules for path_info fragment are slightly different -sub esc_path_info { - my $str = shift; - return undef unless defined $str; - - # path_info doesn't treat '+' as space (specially), but '?' must be escaped - $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg; - - return $str; -} - -# quote unsafe chars in whole URL, so some characters cannot be quoted -sub esc_url { - my $str = shift; - return undef unless defined $str; - $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg; - $str =~ s/ /\+/g; - return $str; -} - -# quote unsafe characters in HTML attributes -sub esc_attr { - - # for XHTML conformance escaping '"' to '"' is not enough - return esc_html(@_); -} - -# replace invalid utf8 character with SUBSTITUTION sequence -sub esc_html { - my $str = shift; - my %opts = @_; - - return undef unless defined $str; - - $str = to_utf8($str); - $str = $cgi->escapeHTML($str); - if ($opts{'-nbsp'}) { - $str =~ s/ / /g; - } - $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg; - return $str; -} - -# quote control characters and escape filename to HTML -sub esc_path { - my $str = shift; - my %opts = @_; - - return undef unless defined $str; - - $str = to_utf8($str); - $str = $cgi->escapeHTML($str); - if ($opts{'-nbsp'}) { - $str =~ s/ / /g; - } - $str =~ s|([[:cntrl:]])|quot_cec($1)|eg; - return $str; -} - -# Make control characters "printable", using character escape codes (CEC) -sub quot_cec { - my $cntrl = shift; - my %opts = @_; - my %es = ( # character escape codes, aka escape sequences - "\t" => '\t', # tab (HT) - "\n" => '\n', # line feed (LF) - "\r" => '\r', # carrige return (CR) - "\f" => '\f', # form feed (FF) - "\b" => '\b', # backspace (BS) - "\a" => '\a', # alarm (bell) (BEL) - "\e" => '\e', # escape (ESC) - "\013" => '\v', # vertical tab (VT) - "\000" => '\0', # nul character (NUL) - ); - my $chr = ( (exists $es{$cntrl}) - ? $es{$cntrl} - : sprintf('\%2x', ord($cntrl)) ); - if ($opts{-nohtml}) { - return $chr; - } else { - return "<span class=\"cntrl\">$chr</span>"; - } -} - -# Alternatively use unicode control pictures codepoints, -# Unicode "printable representation" (PR) -sub quot_upr { - my $cntrl = shift; - my %opts = @_; - - my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl)); - if ($opts{-nohtml}) { - return $chr; - } else { - return "<span class=\"cntrl\">$chr</span>"; - } -} - # git may return quoted and escaped filenames sub unquote { my $str = shift; @@ -1540,20 +1420,6 @@ sub unquote { return $str; } -# escape tabs (convert tabs to spaces) -sub untabify { - my $line = shift; - - while ((my $pos = index($line, "\t")) != -1) { - if (my $count = (8 - ($pos % 8))) { - my $spaces = ' ' x $count; - $line =~ s/\t/$spaces/; - } - } - - return $line; -} - sub project_in_list { my $project = shift; my @list = git_get_projects_list(); diff --git a/gitweb/lib/Gitweb/Util.pm b/gitweb/lib/Gitweb/Util.pm new file mode 100644 index 0000000..a213d3f --- /dev/null +++ b/gitweb/lib/Gitweb/Util.pm @@ -0,0 +1,177 @@ +# Gitweb::Util -- Internal utilities used by gitweb (git web interface) +# +# This module is licensed under the GPLv2 + +package Gitweb::Util; + +use strict; +use warnings; +use Exporter qw(import); + +our @EXPORT = qw(to_utf8 + esc_param esc_path_info esc_url + esc_html esc_path esc_attr + untabify + $fallback_encoding); +our @EXPORT_OK = qw(quot_cec quot_upr); + +use Encode; +use CGI; + +# ...................................................................... +# Perl encoding (utf-8) + +# decode sequences of octets in utf8 into Perl's internal form, +# which is utf-8 with utf8 flag set if needed. gitweb writes out +# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning of gitweb.perl +our $fallback_encoding = 'latin1'; +sub to_utf8 { + my $str = shift; + return undef unless defined $str; + if (utf8::valid($str)) { + utf8::decode($str); + return $str; + } else { + return decode($fallback_encoding, $str, Encode::FB_DEFAULT); + } +} + +# ...................................................................... +# CGI encoding + +# quote unsafe chars, but keep the slash, even when it's not +# correct, but quoted slashes look too horrible in bookmarks +sub esc_param { + my $str = shift; + return undef unless defined $str; + + $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg; + $str =~ s/ /\+/g; + + return $str; +} + +# the quoting rules for path_info fragment are slightly different +sub esc_path_info { + my $str = shift; + return undef unless defined $str; + + # path_info doesn't treat '+' as space (specially), but '?' must be escaped + $str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg; + + return $str; +} + +# quote unsafe chars in whole URL, so some characters cannot be quoted +sub esc_url { + my $str = shift; + return undef unless defined $str; + + $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg; + $str =~ s/ /\+/g; + + return $str; +} + +# ...................................................................... +# (X)HTML escaping + +# replace invalid utf8 character with SUBSTITUTION sequence +sub esc_html { + my $str = shift; + my %opts = @_; + + return undef unless defined $str; + + $str = to_utf8($str); + $str = CGI::escapeHTML($str); + if ($opts{'-nbsp'}) { + $str =~ s/ / /g; + } + $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg; + return $str; +} + +# quote unsafe characters in HTML attributes +sub esc_attr { + + # for XHTML conformance escaping '"' to '"' is not enough + return esc_html(@_); +} + +# quote control characters and escape filename to HTML +sub esc_path { + my $str = shift; + my %opts = @_; + + return undef unless defined $str; + + $str = to_utf8($str); + $str = CGI::escapeHTML($str); + if ($opts{'-nbsp'}) { + $str =~ s/ / /g; + } + $str =~ s|([[:cntrl:]])|quot_cec($1)|eg; + return $str; +} + +# ...................................................................... +# Other + +# escape tabs (convert tabs to spaces) +sub untabify { + my $line = shift; + + while ((my $pos = index($line, "\t")) != -1) { + if (my $count = (8 - ($pos % 8))) { + my $spaces = ' ' x $count; + $line =~ s/\t/$spaces/; + } + } + + return $line; +} + +# ---------------------------------------------------------------------- +# Showing "unprintable" characters (utility functions) + +# Make control characters "printable", using character escape codes (CEC) +sub quot_cec { + my $cntrl = shift; + my %opts = @_; + my %es = ( # character escape codes, aka escape sequences + "\t" => '\t', # tab (HT) + "\n" => '\n', # line feed (LF) + "\r" => '\r', # carrige return (CR) + "\f" => '\f', # form feed (FF) + "\b" => '\b', # backspace (BS) + "\a" => '\a', # alarm (bell) (BEL) + "\e" => '\e', # escape (ESC) + "\013" => '\v', # vertical tab (VT) + "\000" => '\0', # nul character (NUL) + ); + my $chr = ( (exists $es{$cntrl}) + ? $es{$cntrl} + : sprintf('\%2x', ord($cntrl)) ); + if ($opts{-nohtml}) { + return $chr; + } else { + return "<span class=\"cntrl\">$chr</span>"; + } +} + +# Alternatively use unicode control pictures codepoints, +# Unicode "printable representation" (PR) +sub quot_upr { + my $cntrl = shift; + my %opts = @_; + + my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl)); + if ($opts{-nohtml}) { + return $chr; + } else { + return "<span class=\"cntrl\">$chr</span>"; + } +} + +1; -- 1.7.3 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html