Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes: > (In fact, I would also suggest we drop or try to fix BISECT_NAMES support, > while at it - it never really worked, and iirc it was partly exactly > *because* of the end-condition not being handled right). I am not opposed to dropping bisect-names. I was having fun with this patch, which made an experiment to bisect between v1.0.0 and v1.5.0 in git.git repository by alternately saying good and bad go from 3.68 seconds to 1.26 seconds. Bisecting the kernel between v2.6.18 and v2.6.20 with the same test script takes 21.84 seconds vs 4.22 seconds. The idea is that count_distance() is expensive and we would want to avoid it when we can. When a commit has only one relevant parent commit, then the number of commits that commit can reach is exactly one more than the number of commits that the parent can reach, so we can avoid running count_distance() on commits that are on straight single strand of pearls. On the other hand, for a merge commit, because the commits reachable from one parent can be reachable from another parent, you cannot just add the parents' up (plus one for yourself). So the patch runs count_distance() on merges (and uses util field to remember them), and then builds the rest by going backwards, finding each commit whose distance is not yet known but the distance of its (sole) parent is known, add one to that, until we count distance for everybody. Another small optimization is whenever we find a half-way (that is, a commit whose distance is half of the number of all commits), we say that is good enough. --- Here is the test script that I ran /usr/bin/time on to benchmark. It takes three params: path to git-rev-list, a good commit (i.e. older one), and a bad commit. The script alternately says "bisect good" and "bisect bad" and finishes when it narrows down the suspects to one. Even with the "half is good enough" optimization in place, somehow I am getting the same bisect sequence to narrow down between v2.6.18 and v2.6.20 (the kernel) and v1.0.0 and v1.5.0 (git). -- >8 -- test script -- >8 -- #!/bin/sh cmd=$1 good=$(git rev-parse --verify "$2") bad=$(git rev-parse --verify "$3") flip=good run_one () { bisect=$($cmd --bisect $bad --not $good) ifgood=$(git-rev-list $bad --not $good $bisect | wc -l) ifbad=$(git-rev-list $bisect --not $good | wc -l) echo "bad $bad" echo "good $good" echo "ifgood $ifgood" echo "ifbad $ifbad" echo if test "$bisect" = "$bad" then false else case "$flip" in good) good="$good $bisect" flip=bad ;; bad) bad="$bisect" flip=good ;; esac true fi } while run_one do : done -- >8 -- end of test script -- >8 -- builtin-rev-list.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 167 insertions(+), 2 deletions(-) diff --git a/builtin-rev-list.c b/builtin-rev-list.c index c2db5a5..c43d88d 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -203,6 +203,167 @@ static struct commit_list *find_bisection(struct commit_list *list) return best; } +static inline int commit_interesting(struct commit_list *elem) +{ + unsigned flags = elem->item->object.flags; + if (flags & UNINTERESTING) + return 0; + return (!revs.prune_fn || (flags & TREECHANGE)); +} + +static inline int weight(struct commit_list *elem) +{ + return *((int*)(elem->item->util)); +} + +static inline void weight_set(struct commit_list *elem, int weight) +{ + *((int*)(elem->item->util)) = weight; +} + +static int count_interesting_parents(struct commit_list *elem) +{ + int cnt = 0; + if (!elem->item->parents) + return cnt; + for (elem = elem->item->parents; elem; elem = elem->next) { + if (commit_interesting(elem)) + cnt++; + } + return cnt; +} + +static struct commit_list *find_bisection_2(struct commit_list *list) +{ + int n, nr, last_counted, counted, distance; + struct commit_list *p, *best; + int *weights; + + for (nr = 0, p = list; p; p = p->next) { + if (commit_interesting(p)) + nr++; + } + weights = xcalloc(nr, sizeof(int*)); + counted = 0; + + for (n = 0, p = list; p; p = p->next) { + if (!commit_interesting(p)) + continue; + if (commit_interesting(p)) { + /* + * positive weight is the number of interesting + * commits it can reach, including itself. + * weight = 0 means it has one parent and + * its distance is unknown. + * weight < 0 means it has more than one + * parent and its distance is unknown. + */ + p->item->util = &weights[n++]; + switch (count_interesting_parents(p)) { + case 0: + weight_set(p, 1); + counted++; + break; + case 1: + weight_set(p, 0); + break; + default: + weight_set(p, -1); + break; + } + } + } + + /* + * If you have only one parent in the resulting set + * then you can reach one commit more than that parent + * can reach. So we do not have to run the expensive + * count_distance() for single strand of pearls. + * + * However, if you have more than one parents, you cannot + * just add their distance and one for yourself, since + * they usually reach the same ancestor and you would + * end up counting them twice that way. + * + * So we will first count distance of merges the usual + * way, and then fill the blanks using cheaper algorithm. + */ + for (p = list; p; p = p->next) { + if (!commit_interesting(p)) + continue; + n = weight(p); + if (0 <= n) + continue; + distance = count_distance(p); + clear_distance(p); + weight_set(p, distance); + distance = weight(p) * 2; + if (nr == distance || (nr+1) == distance) { + p->next = NULL; + free(weights); + return p; + } + counted++; + } + + while (counted < nr) { + do { + last_counted = counted; + for (p = list; p; p = p->next) { + struct commit_list *q; + + if (!commit_interesting(p) || 0 < weight(p)) + continue; + if (weight(p) < 0) + die("OOPS"); + for (q = p->item->parents; + q; + q = q->next) + if (commit_interesting(q) && + 0 < weight(q)) + break; + if (!q) + continue; + weight_set(p, weight(q)+1); + counted++; + + /* + * Do we happen to have found one that + * has the desired half-weight? + */ + distance = weight(p) * 2; + if (nr == distance || (nr+1) == distance) { + p->next = NULL; + free(weights); + return p; + } + } + } while (counted != last_counted); + + } + + /* Then find the best one */ + counted = 0; + best = list; + for (p = list; p; p = p->next) { + if (!commit_interesting(p)) + continue; + distance = weight(p); + if (nr - distance < distance) + distance = nr - distance; + if (distance > counted) { + best = p; + counted = distance; + } + } + if (best) { + best->next = NULL; + } + free(weights); + return best; +} + + static void read_revisions_from_stdin(struct rev_info *revs) { char line[1000]; @@ -285,8 +446,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.tree_objects) mark_edges_uninteresting(revs.commits, &revs, show_edge); - if (bisect_list) - revs.commits = find_bisection(revs.commits); + if (bisect_list) { + if (!revs.prune_fn) + revs.commits = find_bisection_2(revs.commits); + else + revs.commits = find_bisection(revs.commits); + } traverse_commit_list(&revs, show_commit, show_object); - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html