[RFC PATCH 11/9] [PoC] gitweb/lib - HTTP-aware output caching

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This commit adds new option, -http_output, to cache_output()
subroutine from GitwebCache::CacheOutput module.  When this subroutine
is called as cache_output(..., -http_output => 1), it assumes that
cached output is HTTP response, consisting of HTTP headers separated
by CR LF pair from the HTTP body (contents of the page).  It adds then
Expires and Cache-Control: max-age headers if they do not exist based
on current cache entry expiration time, and Content-Length header
based on the size of cache entry file.

New subtest in t9512 includes basic tests for this feature.

Enable it in gitweb, via $cache_options{'cache_output'} hashref.

Signed-off-by: Jakub Narebski <jnareb@xxxxxxxxx>
---
This patch is intended as proof of concept about making output caching
in gitweb make use of the fact that we cache HTTP response.  In this
patch the "smarts" (the "HTTP awareness") was added to the part of code
responsible by sending response to client.  Alternate solution would
be to add such "smarts" to saving captured output to cache file, or even
to caching engine itself.  Each of those solutions has its advantages
and disadvantages.

J.H., among others this patch is meant to illustrate that you don't need
treat output of 'snapshot' and 'blob_plain' views in a special way; you
can add Content-Length header in a action-agnostic way.

This patch replaces controversial "[RFC PATCH v7 11/9] [PoC] gitweb/lib
- tee, i.e. print and capture during cache entry generation" for
simplicity, though it is fairly independent, and probably would apply
without problems after it.


NOTE: This is only RFC, and while it passes t9512, I haven't done extensive
testing with it.

 gitweb/gitweb.perl                             |    3 ++
 gitweb/lib/GitwebCache/CacheOutput.pm          |   32 ++++++++++++++++++++++++
 gitweb/lib/GitwebCache/FileCacheWithLocking.pm |   24 ++++++++++++++++++
 t/t9512/test_cache_output.pl                   |   25 ++++++++++++++++++
 4 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index e3f02b0..f3c14a9 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -360,6 +360,9 @@ our %cache_options = (
 		# pages will be cached if were generated in detached process).
 		# Default is undef.
 		'-cache_errors' => undef,
+		# Mark that we are caching HTTP response, and that we want extra treatment,
+		# i.e. automatic adding of Expires/Cache-Control and Content-Length headers
+		'-http_output' => 1,
 	},
 );
 # You define site-wide options for "Generating..." page (if enabled) here
diff --git a/gitweb/lib/GitwebCache/CacheOutput.pm b/gitweb/lib/GitwebCache/CacheOutput.pm
index 188d4ab..e9884ca 100644
--- a/gitweb/lib/GitwebCache/CacheOutput.pm
+++ b/gitweb/lib/GitwebCache/CacheOutput.pm
@@ -19,6 +19,7 @@ use warnings;
 
 use File::Copy qw();
 use Symbol qw(qualify_to_ref);
+use CGI::Util qw(expires);
 
 use Exporter qw(import);
 our @EXPORT      = qw(cache_output);
@@ -69,6 +70,37 @@ sub cache_output {
 		$filename ||= $capture_filename;
 	}
 
+	if ($opts{'-http_output'}) {
+		# we need filehandle; filename is not enough
+		open $fh, '<', $filename unless defined $fh;
+
+		# get HTTP headers first
+		my (@headers, %norm_headers);
+		while (my $line = <$fh>) {
+			last if $line eq "\r\n";
+			push @headers, $line;
+			if ($line =~ /^([^:]+:)\s+(.*)$/) {
+				(my $header = lc($1)) =~ s/_/-/;
+				$norm_headers{$header} = $2;
+			}
+		}
+		print join('', @headers);
+
+		# extra headers
+		if (!exists $norm_headers{lc('Expires')} &&
+		    !exists $norm_headers{lc('Cache-Control')}) {
+			my $expires_in = $cache->expires_in($key);
+			print "Expires: " . expires($expires_in, 'http')."\r\n".
+			      "Cache-Control: max-age=$expires_in\r\n";
+		}
+		if (!exists $norm_headers{lc('Content-Length')}) {
+			my $length = (-s $fh) - (tell $fh);
+			print "Content-Length: $length\r\n" if $length;
+		}
+
+		print "\r\n"; # separates headers from body
+	}
+
 	if (defined $fh || defined $filename) {
 		# set binmode only if $fh is defined (is a filehandle)
 		# File::Copy::copy opens files given by filename in binary mode
diff --git a/gitweb/lib/GitwebCache/FileCacheWithLocking.pm b/gitweb/lib/GitwebCache/FileCacheWithLocking.pm
index 291526e..813331a 100644
--- a/gitweb/lib/GitwebCache/FileCacheWithLocking.pm
+++ b/gitweb/lib/GitwebCache/FileCacheWithLocking.pm
@@ -457,6 +457,30 @@ sub is_valid {
 	return (($now - $mtime) < $expires_in);
 }
 
+# $cache->expires_in($key)
+#
+# Returns number of seconds an entry would be valid, or undef
+# if cache entry for given $key does not exists.
+sub expires_in {
+	my ($self, $key) = @_;
+
+	my $path = $self->path_to_key($key);
+
+	# does file exists in cache?
+	return undef unless -f $path;
+	# get its modification time
+	my $mtime = (stat(_))[9] # _ to reuse stat structure used in -f test
+		or $self->_handle_error("Couldn't stat file '$path' for key '$key': $!");
+
+	my $expires_in = $self->get_expires_in();
+
+	my $now = time();
+	print STDERR __PACKAGE__."now=$now; mtime=$mtime; ".
+		"expires_in=$expires_in; diff=".($now - $mtime)."\n";
+
+	return $expires_in == 0 ? 0 : ($self->get_expires_in() - ($now - $mtime));
+}
+
 # Getting and setting
 
 # ($fh, $filename) = $cache->compute_fh($key, $code);
diff --git a/t/t9512/test_cache_output.pl b/t/t9512/test_cache_output.pl
index 758848c..3492dcf 100755
--- a/t/t9512/test_cache_output.pl
+++ b/t/t9512/test_cache_output.pl
@@ -6,6 +6,8 @@ use strict;
 
 use Test::More;
 
+use CGI qw(:standard);
+
 # test source version
 use lib $ENV{GITWEBLIBDIR} || "$ENV{GIT_BUILD_DIR}/gitweb/lib";
 
@@ -158,5 +160,28 @@ subtest 'errors are cached with -cache_errors => 1' => sub {
 };
 
 
+# caching HTTP output
+subtest 'HTTP output' => sub {
+	$cache->remove($key);
+	$cache->set_expires_in(60);
+
+	my $header =
+		header(-status=>'200 OK', -type=>'text/plain', -charset => 'utf-8');
+	my $data = "1234567890";
+	$action_output = $header.$data;
+	$test_data = capture_output_of_cache_output(\&action, '-http_output' => 1);
+
+	$header =~ s/\r\n$//;
+	my $length = do { use bytes; length($data); };
+	like($test_data, qr/^\Q$header\E/, 'http: starts with provided http header');
+	like($test_data, qr/\Q$data\E$/,   'http: ends with body (payload)');
+	like($test_data, qr/^Expires: /m,  'http: some "Expires:" header added');
+	like($test_data, qr/^Cache-Control: max-age=\d+\r\n/m,
+	                                   'http: "Cache-Control:" with max-age added');
+	like($test_data, qr/^Content-Length: $length\r\n/m,
+	                                   'http: "Content-Length:" header with correct value'); 
+};
+
+
 done_testing();
 __END__
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]