[RFC] Possible optimization for gitweb

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



While looking at the gitweb source yesterday, I noticed a number of
similar expensive workflows used by a number of actions (summary,
shortlog, log, rss, atom, and history).

The current workflows are:
	get ~100 sha1's using rev-list
	foreach sha1
		get/parse 1 commit using rev-list
		output commit

The new workflows I'm proposing would be:
	get/parse ~100 commit's using rev-list
	foreach commit
		output commit

The following simplified commands gives an idea of the git only overhead
between these two workflows.

time \
for r in `git-rev-list --max-count=100 HEAD --` ; \
do git-rev-list --header --parents --max-count=1 $r -- ; \
done > /dev/null

real    0m0.490s
user    0m0.224s
sys     0m0.228s

time \
git-rev-list --header --parents --max-count=100 HEAD -- > /dev/null

real    0m0.058s
user    0m0.008s
sys     0m0.004s

There would seems to be a benefit from making the proposed change to
these workflows, when run on my machine against a clone of Linus's tree.

One issue with this change is that, gitweb is page orientated.  Page 0
shows the first 100 items from a given hash, page 1 uses the same given
hash but show 100 to 199 items, etc.  Using 'git-rev-list --header
--parents' and then throwing away most of the result is very wasteful.

So I'm suggesting we add a new option to git-rev-list which will only
start show results once its has iterated past a given number of items.
Using a caret or tilde doesn't seem to return the same result.

I've attached a discussion patch which adds a new option --start-count
to git-rev-list and changed the summary and showlog actions of gitweb to
use this new option.

I'm sure there are many improvements to this patch, comments?

Robert

-----


diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 4059894..a1e0ccc 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -1260,6 +1260,30 @@ sub parse_tag {
 	return %tag
 }
 
+sub parse_commits {
+	my $commit_id = shift;
+	my $start_count = shift;
+	my $max_count = shift;
+
+	my @cos;
+	my @commit_lines;
+
+	local $/ = "\0";
+	open my $fd, "-|", git_cmd(), "rev-list",
+		"--header", "--parents", "--start-count=$start_count", "--max-count=$max_count",
+		$commit_id, "--"
+		or return;
+	while (my $commit = <$fd>) {
+		@commit_lines = split '\n', $commit;
+		pop @commit_lines;
+		my %co = parse_commit(undef, \@commit_lines);
+		push @cos, \%co;
+	}
+	close $fd or return;
+
+	return @cos;
+}
+
 sub parse_commit {
 	my $commit_id = shift;
 	my $commit_text = shift;
@@ -2633,29 +2657,29 @@ sub git_project_list_body {
 
 sub git_shortlog_body {
 	# uses global variable $project
-	my ($revlist, $from, $to, $refs, $extra) = @_;
+	my ($commitlist, $from, $to, $refs, $extra) = @_;
 
 	$from = 0 unless defined $from;
-	$to = $#{$revlist} if (!defined $to || $#{$revlist} < $to);
+	$to = $#{$commitlist} if (!defined $to || $#{$commitlist} < $to);
 
 	print "<table class=\"shortlog\" cellspacing=\"0\">\n";
 	my $alternate = 1;
 	for (my $i = $from; $i <= $to; $i++) {
-		my $commit = $revlist->[$i];
+		my $co = $commitlist->[$i];
+		my $commit = $co->{'id'};
 		#my $ref = defined $refs ? format_ref_marker($refs, $commit) : '';
 		my $ref = format_ref_marker($refs, $commit);
-		my %co = parse_commit($commit);
 		if ($alternate) {
 			print "<tr class=\"dark\">\n";
 		} else {
 			print "<tr class=\"light\">\n";
 		}
 		$alternate ^= 1;
-		# git_summary() used print "<td><i>$co{'age_string'}</i></td>\n" .
-		print "<td title=\"$co{'age_string_age'}\"><i>$co{'age_string_date'}</i></td>\n" .
-		      "<td><i>" . esc_html(chop_str($co{'author_name'}, 10)) . "</i></td>\n" .
+		# git_summary() used print "<td><i>$co->{'age_string'}</i></td>\n" .
+		print "<td title=\"$co->{'age_string_age'}\"><i>$co->{'age_string_date'}</i></td>\n" .
+		      "<td><i>" . esc_html(chop_str($co->{'author_name'}, 10)) . "</i></td>\n" .
 		      "<td>";
-		print format_subject_html($co{'title'}, $co{'title_short'},
+		print format_subject_html($co->{'title'}, $co->{'title_short'},
 		                          href(action=>"commit", hash=>$commit), $ref);
 		print "</td>\n" .
 		      "<td class=\"link\">" .
@@ -2952,13 +2976,9 @@ sub git_summary {
 		}
 	}
 
-	open my $fd, "-|", git_cmd(), "rev-list", "--max-count=17",
-		git_get_head_hash($project), "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my @commitlist = parse_commits($head, 0, 17);
 	git_print_header_div('shortlog');
-	git_shortlog_body(\@revlist, 0, 15, $refs,
+	git_shortlog_body(\@commitlist, 0, 15, $refs,
 	                  $cgi->a({-href => href(action=>"shortlog")}, "..."));
 
 	if (@taglist) {
@@ -4313,15 +4333,12 @@ sub git_shortlog {
 	}
 	my $refs = git_get_references();
 
-	my $limit = sprintf("--max-count=%i", (100 * ($page+1)));
-	open my $fd, "-|", git_cmd(), "rev-list", $limit, $hash, "--"
-		or die_error(undef, "Open git-rev-list failed");
-	my @revlist = map { chomp; $_ } <$fd>;
-	close $fd;
+	my $max_count = (100 * ($page+1));
+	my @commitlist = parse_commits($hash, (100 * $page), $max_count);
 
-	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#revlist);
+	my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $max_count);
 	my $next_link = '';
-	if ($#revlist >= (100 * ($page+1)-1)) {
+	if ($max_count >= 100) {
 		$next_link =
 			$cgi->a({-href => href(action=>"shortlog", hash=>$hash, page=>$page+1),
 			         -title => "Alt-n"}, "next");
@@ -4332,7 +4349,7 @@ sub git_shortlog {
 	git_print_page_nav('shortlog','', $hash,$hash,$hash, $paging_nav);
 	git_print_header_div('summary', $project);
 
-	git_shortlog_body(\@revlist, ($page * 100), $#revlist, $refs, $next_link);
+	git_shortlog_body(\@commitlist, 0, $#commitlist, $refs, $next_link);
 
 	git_footer_html();
 }
diff --git a/list-objects.c b/list-objects.c
index f1fa21c..d96c8bf 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -108,8 +108,12 @@ void traverse_commit_list(struct rev_info *revs,
 	struct object_array objects = { 0, 0, NULL };
 
 	while ((commit = get_revision(revs)) != NULL) {
-		process_tree(revs, commit->tree, &objects, NULL, "");
-		show_commit(commit);
+		if (revs->start_count <= 0) {
+			process_tree(revs, commit->tree, &objects, NULL, "");
+			show_commit(commit);
+		} else {
+			revs->start_count--;
+		}
 	}
 	for (i = 0; i < revs->pending.nr; i++) {
 		struct object_array_entry *pending = revs->pending.objects + i;
diff --git a/revision.c b/revision.c
index 993bb66..3e3d929 100644
--- a/revision.c
+++ b/revision.c
@@ -524,6 +524,7 @@ void init_revisions(struct rev_info *revs, const char *prefix)
 	revs->prefix = prefix;
 	revs->max_age = -1;
 	revs->min_age = -1;
+	revs->start_count = -1;
 	revs->max_count = -1;
 
 	revs->prune_fn = NULL;
@@ -756,6 +757,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 		const char *arg = argv[i];
 		if (*arg == '-') {
 			int opts;
+			if (!strncmp(arg, "--start-count=", 14)) {
+				revs->start_count = atoi(arg + 14);
+				continue;
+			}
 			if (!strncmp(arg, "--max-count=", 12)) {
 				revs->max_count = atoi(arg + 12);
 				continue;
diff --git a/revision.h b/revision.h
index 3adab95..c2dce8c 100644
--- a/revision.h
+++ b/revision.h
@@ -75,6 +75,7 @@ struct rev_info {
 	struct grep_opt	*grep_filter;
 
 	/* special limits */
+	int start_count;
 	int max_count;
 	unsigned long max_age;
 	unsigned long min_age;
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]