CGI.pm does by default slight escaping (simple_escape from CGI::Util) of all _attribute_ values (depending on the value of autoEscape(), by default on). This escaping, at least in CGI.pm version 3.10 (most current version at CPAN is 3.43), is minimal: only '"', '&', '<' and '>' are escaped using named HTML entity references (", &, < and > respectively). simple_escape does not do escaping of control characters such as ^X which are invalid in XHTML (in strict mode). Note that IIRC escaping '<' and '>' in attributes is not strictly necessary. New 'esc_attr' subroutine can be used to do more complete escaping of attribute values, including dealing with control characters (forbidden in XHTML in strict validating mode); currently it replaces all control characters by '?' question mark. For esc_attr() to work correctly you have to turn off CGI.pm autoescaping to avoid double escaping. While at it chop_and_esc_str now uses esc_attr instead of simply replacing control characters with '?' by itself. The problem is that when autoescaping is turned off we would have to esc_attr all attributes containing user input (this includes result of href() subroutine), or risk either double escaping, or not escaping some fragment. Alternate solution include: * stripping of control characters before input is passed as attribute value, as it was done in chop_and_esc_str; this would include at minimum format_subject_html * replacing either make_attributes from CGI.pm, or simple_escape subrotuine it uses, making it escape characters better Noticed-by: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx> Signed-off-by: Jakub Narebski <jnareb@xxxxxxxxx> http://thread.gmane.org/gmane.comp.version-control.git/116755/focus=117539 --- This is abandoned version. Still... perhaps esc_attr would be of use to somebody... Also as negative example in archives. gitweb/gitweb.perl | 39 +++++++++++++++++++++++++++++++++------ 1 files changed, 33 insertions(+), 6 deletions(-) diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 05702e4..e505f2f 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -1027,7 +1027,7 @@ sub esc_param { return $str; } -# quote unsafe chars in whole URL, so some charactrs cannot be quoted +# quote unsafe chars in whole URL, so some characters cannot be quoted sub esc_url { my $str = shift; $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&=])/sprintf("%%%02X", ord($1))/eg; @@ -1036,6 +1036,26 @@ sub esc_url { return $str; } +# quote and escape tag attribute values; autoEscape has to be turned off +# otherwise you would get double escaping for '&' ("escape" character) +sub esc_attr { + my $str = shift; + return $str unless defined $str; + + my %ent = ( # named HTML entities + '"' => '"', + '&' => '&', + # those are not strictly necessary + '<' => '<', + '>' => '>', + ); + $str = to_utf8($str); + $str =~ s|([\"&<>])|$ent{$1}|eg; + $str =~ s|([[:cntrl:]])|?|eg; # alternative would be use quot_xxx($1) + + return $str; +} + # replace invalid utf8 character with SUBSTITUTION sequence sub esc_html { my $str = shift; @@ -1236,8 +1256,10 @@ sub chop_and_escape_str { if ($chopped eq $str) { return esc_html($chopped); } else { - $str =~ s/([[:cntrl:]])/?/g; - return $cgi->span({-title=>$str}, esc_html($chopped)); + my $autoescape = $cgi->autoEscape(undef); + my $result = $cgi->span({-title=>esc_attr($str)}, esc_html($chopped)); + $cgi->autoEscape($autoescape); # restore original value + return $result; } } @@ -1458,14 +1480,19 @@ sub format_subject_html { my ($long, $short, $href, $extra) = @_; $extra = '' unless defined($extra); + my $ret = ''; if (length($short) < length($long)) { - return $cgi->a({-href => $href, -class => "list subject", - -title => to_utf8($long)}, + my $autoescape = $cgi->autoEscape(undef); + # or just replace s/([[:cntrl:]])/?/g in -title + $ret = $cgi->a({-href => $href, -class => "list subject", + -title => esc_attr($long)}, esc_html($short) . $extra); + $cgi->autoEscape($autoescape); # restore original value } else { - return $cgi->a({-href => $href, -class => "list subject"}, + $ret = $cgi->a({-href => $href, -class => "list subject"}, esc_html($long) . $extra); } + return $ret; } # format git diff header line, i.e. "diff --(git|combined|cc) ..." -- 1.6.3.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html