[PATCH 2/2] git-rev-list --bisect: optimization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This improves the performance of revision bisection.

The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set.  When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.

On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.

The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them.  After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.

Another small optimization is whenever we find a half-way (that
is, a commit that can reach exactly half of the commits in the
set), we stop giving counts to remaining commits.

The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds.  Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.

Signed-off-by: Junio C Hamano <junkio@xxxxxxx>
---

 * This is a rebase on top of updated Johannes's patch, which is
   [1/2] of this series.

   I am not enabling this for path limited case, so it is still
   in a WIP state.  I am not sure if parent rewriting is doing
   the right thing (if so, then we should be able to assume that
   "interesting" commits are already made contiguous and we can
   keep giving the number one higher than the parent to commits
   on a single-strand-of-pearls like this patch does).

 builtin-rev-list.c |  162 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/builtin-rev-list.c b/builtin-rev-list.c
index 723e4d4..111ba60 100644
--- a/builtin-rev-list.c
+++ b/builtin-rev-list.c
@@ -207,6 +207,160 @@ static struct commit_list *find_bisection(struct commit_list *list,
 	return best;
 }
 
+static inline int commit_interesting(struct commit_list *elem)
+{
+	unsigned flags = elem->item->object.flags;
+	if (flags & UNINTERESTING)
+		return 0;
+	return (!revs.prune_fn || (flags & TREECHANGE));
+}
+
+static inline int weight(struct commit_list *elem)
+{
+	return *((int*)(elem->item->util));
+}
+
+static inline void weight_set(struct commit_list *elem, int weight)
+{
+	*((int*)(elem->item->util)) = weight;
+}
+
+static int count_interesting_parents(struct commit_list *elem)
+{
+	int cnt = 0;
+	if (!elem->item->parents)
+		return cnt;
+	for (elem = elem->item->parents; elem; elem = elem->next) {
+		if (commit_interesting(elem))
+			cnt++;
+	}
+	return cnt;
+}
+
+static struct commit_list *find_bisection_2(struct commit_list *list,
+					    int *reaches, int *all)
+{
+	int n, nr, counted, distance;
+	struct commit_list *p, *best;
+	int *weights;
+
+	for (nr = 0, p = list; p; p = p->next) {
+		if (commit_interesting(p))
+			nr++;
+	}
+	*all = nr;
+	weights = xcalloc(nr, sizeof(int*));
+	counted = 0;
+
+	for (n = 0, p = list; p; p = p->next) {
+		if (!commit_interesting(p))
+			continue;
+		if (commit_interesting(p)) {
+			/*
+			 * positive weight is the number of interesting
+			 * commits it can reach, including itself.
+			 * weight = 0 means it has one parent and
+			 * its distance is unknown.
+			 * weight < 0 means it has more than one
+			 * parent and its distance is unknown.
+			 */
+			p->item->util = &weights[n++];
+			switch (count_interesting_parents(p)) {
+			case 0:
+				weight_set(p, 1);
+				counted++;
+				break;
+			case 1:
+				weight_set(p, 0);
+				break;
+			default:
+				weight_set(p, -1);
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If you have only one parent in the resulting set
+	 * then you can reach one commit more than that parent
+	 * can reach.  So we do not have to run the expensive
+	 * count_distance() for single strand of pearls.
+	 *
+	 * However, if you have more than one parents, you cannot
+	 * just add their distance and one for yourself, since
+	 * they usually reach the same ancestor and you would
+	 * end up counting them twice that way.
+	 *
+	 * So we will first count distance of merges the usual
+	 * way, and then fill the blanks using cheaper algorithm.
+	 */
+	for (p = list; p; p = p->next) {
+		if (!commit_interesting(p))
+			continue;
+		n = weight(p);
+		if (0 <= n)
+			continue;
+		distance = count_distance(p);
+		clear_distance(p);
+		weight_set(p, distance);
+
+		/* Is it happen to be at exactly half-way? */
+		distance *= 2;
+		if (nr == distance || (nr+1) == distance) {
+			p->next = NULL;
+			*reaches = weight(p);
+			free(weights);
+			return p;
+		}
+		counted++;
+	}
+
+	while (counted < nr) {
+		for (p = list; p; p = p->next) {
+			struct commit_list *q;
+
+			if (!commit_interesting(p) || 0 < weight(p))
+				continue;
+			for (q = p->item->parents; q; q = q->next)
+				if (commit_interesting(q) && 0 < weight(q))
+					break;
+			if (!q)
+				continue;
+			weight_set(p, weight(q)+1);
+			counted++;
+
+			/* Is it happen to be at exactly half-way? */
+			distance = weight(p) * 2;
+			if (nr == distance || (nr+1) == distance) {
+				p->next = NULL;
+				*reaches = weight(p);
+				free(weights);
+				return p;
+			}
+		}
+	}
+
+	/* Then find the best one */
+	counted = 0;
+	best = list;
+	for (p = list; p; p = p->next) {
+		if (!commit_interesting(p))
+			continue;
+		distance = weight(p);
+		if (nr - distance < distance)
+			distance = nr - distance;
+		if (distance > counted) {
+			best = p;
+			counted = distance;
+			*reaches = weight(p);
+		}
+	}
+	if (best)
+		best->next = NULL;
+	free(weights);
+	return best;
+}
+
 static void read_revisions_from_stdin(struct rev_info *revs)
 {
 	char line[1000];
@@ -298,8 +452,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 	if (bisect_list) {
 		int reaches = reaches, all = all;
 
-		revs.commits = find_bisection(revs.commits,
-					      &reaches, &all);
+		if (!revs.prune_fn)
+			revs.commits = find_bisection_2(revs.commits,
+							&reaches, &all);
+		else
+			revs.commits = find_bisection(revs.commits,
+						      &reaches, &all);
 		if (bisect_show_vars) {
 			int cnt;
 			if (!revs.commits)
-- 
1.5.1.rc1.610.g3ba7


-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]