While looking at the gitweb source yesterday, I noticed a number of similar expensive workflows used by a number of actions (summary, shortlog, log, rss, atom, and history). The current workflows are: get ~100 sha1's using rev-list foreach sha1 get/parse 1 commit using rev-list output commit The new workflows I'm proposing would be: get/parse ~100 commit's using rev-list foreach commit output commit The following simplified commands gives an idea of the git only overhead between these two workflows. time \ for r in `git-rev-list --max-count=100 HEAD --` ; \ do git-rev-list --header --parents --max-count=1 $r -- ; \ done > /dev/null real 0m0.490s user 0m0.224s sys 0m0.228s time \ git-rev-list --header --parents --max-count=100 HEAD -- > /dev/null real 0m0.058s user 0m0.008s sys 0m0.004s There would seems to be a benefit from making the proposed change to these workflows, when run on my machine against a clone of Linus's tree. One issue with this change is that, gitweb is page orientated. Page 0 shows the first 100 items from a given hash, page 1 uses the same given hash but show 100 to 199 items, etc. Using 'git-rev-list --header --parents' and then throwing away most of the result is very wasteful. So I'm suggesting we add a new option to git-rev-list which will only start show results once its has iterated past a given number of items. Using a caret or tilde doesn't seem to return the same result. I've attached a discussion patch which adds a new option --start-count to git-rev-list and changed the summary and showlog actions of gitweb to use this new option. I'm sure there are many improvements to this patch, comments? Robert ----- diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 4059894..a1e0ccc 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -1260,6 +1260,30 @@ sub parse_tag { return %tag } +sub parse_commits { + my $commit_id = shift; + my $start_count = shift; + my $max_count = shift; + + my @cos; + my @commit_lines; + + local $/ = "\0"; + open my $fd, "-|", git_cmd(), "rev-list", + "--header", "--parents", "--start-count=$start_count", "--max-count=$max_count", + $commit_id, "--" + or return; + while (my $commit = <$fd>) { + @commit_lines = split '\n', $commit; + pop @commit_lines; + my %co = parse_commit(undef, \@commit_lines); + push @cos, \%co; + } + close $fd or return; + + return @cos; +} + sub parse_commit { my $commit_id = shift; my $commit_text = shift; @@ -2633,29 +2657,29 @@ sub git_project_list_body { sub git_shortlog_body { # uses global variable $project - my ($revlist, $from, $to, $refs, $extra) = @_; + my ($commitlist, $from, $to, $refs, $extra) = @_; $from = 0 unless defined $from; - $to = $#{$revlist} if (!defined $to || $#{$revlist} < $to); + $to = $#{$commitlist} if (!defined $to || $#{$commitlist} < $to); print "<table class=\"shortlog\" cellspacing=\"0\">\n"; my $alternate = 1; for (my $i = $from; $i <= $to; $i++) { - my $commit = $revlist->[$i]; + my $co = $commitlist->[$i]; + my $commit = $co->{'id'}; #my $ref = defined $refs ? format_ref_marker($refs, $commit) : ''; my $ref = format_ref_marker($refs, $commit); - my %co = parse_commit($commit); if ($alternate) { print "<tr class=\"dark\">\n"; } else { print "<tr class=\"light\">\n"; } $alternate ^= 1; - # git_summary() used print "<td><i>$co{'age_string'}</i></td>\n" . - print "<td title=\"$co{'age_string_age'}\"><i>$co{'age_string_date'}</i></td>\n" . - "<td><i>" . esc_html(chop_str($co{'author_name'}, 10)) . "</i></td>\n" . + # git_summary() used print "<td><i>$co->{'age_string'}</i></td>\n" . + print "<td title=\"$co->{'age_string_age'}\"><i>$co->{'age_string_date'}</i></td>\n" . + "<td><i>" . esc_html(chop_str($co->{'author_name'}, 10)) . "</i></td>\n" . "<td>"; - print format_subject_html($co{'title'}, $co{'title_short'}, + print format_subject_html($co->{'title'}, $co->{'title_short'}, href(action=>"commit", hash=>$commit), $ref); print "</td>\n" . "<td class=\"link\">" . @@ -2952,13 +2976,9 @@ sub git_summary { } } - open my $fd, "-|", git_cmd(), "rev-list", "--max-count=17", - git_get_head_hash($project), "--" - or die_error(undef, "Open git-rev-list failed"); - my @revlist = map { chomp; $_ } <$fd>; - close $fd; + my @commitlist = parse_commits($head, 0, 17); git_print_header_div('shortlog'); - git_shortlog_body(\@revlist, 0, 15, $refs, + git_shortlog_body(\@commitlist, 0, 15, $refs, $cgi->a({-href => href(action=>"shortlog")}, "...")); if (@taglist) { @@ -4313,15 +4333,12 @@ sub git_shortlog { } my $refs = git_get_references(); - my $limit = sprintf("--max-count=%i", (100 * ($page+1))); - open my $fd, "-|", git_cmd(), "rev-list", $limit, $hash, "--" - or die_error(undef, "Open git-rev-list failed"); - my @revlist = map { chomp; $_ } <$fd>; - close $fd; + my $max_count = (100 * ($page+1)); + my @commitlist = parse_commits($hash, (100 * $page), $max_count); - my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#revlist); + my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $max_count); my $next_link = ''; - if ($#revlist >= (100 * ($page+1)-1)) { + if ($max_count >= 100) { $next_link = $cgi->a({-href => href(action=>"shortlog", hash=>$hash, page=>$page+1), -title => "Alt-n"}, "next"); @@ -4332,7 +4349,7 @@ sub git_shortlog { git_print_page_nav('shortlog','', $hash,$hash,$hash, $paging_nav); git_print_header_div('summary', $project); - git_shortlog_body(\@revlist, ($page * 100), $#revlist, $refs, $next_link); + git_shortlog_body(\@commitlist, 0, $#commitlist, $refs, $next_link); git_footer_html(); } diff --git a/list-objects.c b/list-objects.c index f1fa21c..d96c8bf 100644 --- a/list-objects.c +++ b/list-objects.c @@ -108,8 +108,12 @@ void traverse_commit_list(struct rev_info *revs, struct object_array objects = { 0, 0, NULL }; while ((commit = get_revision(revs)) != NULL) { - process_tree(revs, commit->tree, &objects, NULL, ""); - show_commit(commit); + if (revs->start_count <= 0) { + process_tree(revs, commit->tree, &objects, NULL, ""); + show_commit(commit); + } else { + revs->start_count--; + } } for (i = 0; i < revs->pending.nr; i++) { struct object_array_entry *pending = revs->pending.objects + i; diff --git a/revision.c b/revision.c index 993bb66..3e3d929 100644 --- a/revision.c +++ b/revision.c @@ -524,6 +524,7 @@ void init_revisions(struct rev_info *revs, const char *prefix) revs->prefix = prefix; revs->max_age = -1; revs->min_age = -1; + revs->start_count = -1; revs->max_count = -1; revs->prune_fn = NULL; @@ -756,6 +757,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch const char *arg = argv[i]; if (*arg == '-') { int opts; + if (!strncmp(arg, "--start-count=", 14)) { + revs->start_count = atoi(arg + 14); + continue; + } if (!strncmp(arg, "--max-count=", 12)) { revs->max_count = atoi(arg + 12); continue; diff --git a/revision.h b/revision.h index 3adab95..c2dce8c 100644 --- a/revision.h +++ b/revision.h @@ -75,6 +75,7 @@ struct rev_info { struct grep_opt *grep_filter; /* special limits */ + int start_count; int max_count; unsigned long max_age; unsigned long min_age; - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html