[PATCH] process tree diffs during "rev-list --objects"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 01, 2013 at 04:49:47PM -0400, Jeff King wrote:

> Another avenue I'd like to explore is actually doing a tree-diff from
> the last processed commit, since we should need to examine only the
> changed entries. I suspect it won't be a big benefit, though, because
> even though the tree diff can happen in O(# of entries), we are trying
> to beat doing an O(1) hash for each entry. So it's the same algorithmic
> complexity, and it is hard to beat a few hashcmp() calls. We'll see.

As I suspected, this is not a win:

Test                               origin            this tree
--------------------------------------------------------------------------
0001.1: rev-list --all             0.40(0.37+0.02)   0.40(0.38+0.01) +0.0%
0001.2: rev-list --all --objects   2.22(2.16+0.05)   2.41(2.16+0.24) +8.6%

I've included the patch below for reference. It really does work as
planned; the number of calls to lookup_object for doing "rev-list --all
--objects" on git.git is reduced from ~15M to ~2.6M. But the benefit is
eaten up by the extra tree-processing time. For fun, here's an excerpt
from the "perf diff" between the two versions:

            +10.51%  git                 [.] update_tree_entry
             +8.97%  git                 [.] lookup_object
             +7.81%  git                 [.] process_tree
             +5.20%  libc-2.13.so        [.] __memcmp_sse4_1
             +3.77%  libc-2.13.so        [.] __strlen_sse42
             +2.69%  git                 [.] base_name_compare
             [....]
             -7.90%  git                 [.] tree_entry
            -39.19%  git                 [.] lookup_object

Shawn had suggested trying to store the tree deltas in such a way that
these tree comparisons could be done more cheaply. That might tip things
in favor of the tree-diff approach.

-Peff

---
diff --git a/list-objects.c b/list-objects.c
index 3dd4a96..b87a049 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -59,8 +59,57 @@ static void process_tree(struct rev_info *revs,
 	/* Nothing to do */
 }
 
+static struct tree *skip_processed_entries(struct tree_desc *cur,
+					   struct tree_desc *old)
+{
+	while (cur->size && old->size) {
+		int cmp = base_name_compare(cur->entry.path,
+					    tree_entry_len(&cur->entry),
+					    0, /* ignore mode */
+					    old->entry.path,
+					    tree_entry_len(&old->entry),
+					    0);
+		if (cmp > 0) {
+			/*
+			 * Old tree has something we do not; ignore it, as it
+			 * was already processed.
+			 */
+			update_tree_entry(old);
+		}
+		else if (cmp == 0) {
+			/*
+			 * We have the same path; if the sha1s match, then we
+			 * have already processed it and can ignore. Otherwise,
+			 * return for processing, but also provide the old
+			 * tree so that we can recurse.
+			 */
+			if (!hashcmp(cur->entry.sha1, old->entry.sha1)) {
+				update_tree_entry(cur);
+				update_tree_entry(old);
+			}
+			else {
+				struct object *o = lookup_object(old->entry.sha1);
+				update_tree_entry(old);
+				if (o && o->type == OBJ_TREE)
+					return (struct tree *)o;
+				else
+					return NULL;
+			}
+		}
+		else {
+			/*
+			 * We have an entry the old one does not; we must look
+			 * at it (and there is no matching old tree to report).
+			 */
+			return NULL;
+		}
+	}
+	return NULL;
+}
+
 static void process_tree(struct rev_info *revs,
 			 struct tree *tree,
+			 struct tree *old_tree,
 			 show_object_fn show,
 			 struct name_path *path,
 			 struct strbuf *base,
@@ -68,7 +117,7 @@ static void process_tree(struct rev_info *revs,
 			 void *cb_data)
 {
 	struct object *obj = &tree->object;
-	struct tree_desc desc;
+	struct tree_desc desc, old_desc;
 	struct name_entry entry;
 	struct name_path me;
 	enum interesting match = revs->diffopt.pathspec.nr == 0 ?
@@ -97,7 +146,18 @@ static void process_tree(struct rev_info *revs,
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
 
-	while (tree_entry(&desc, &entry)) {
+	if (old_tree)
+		init_tree_desc(&old_desc, old_tree->buffer, old_tree->size);
+	else
+		init_tree_desc(&old_desc, NULL, 0);
+
+	while (desc.size) {
+		struct tree *old_tree;
+
+		old_tree = skip_processed_entries(&desc, &old_desc);
+		if (!tree_entry(&desc, &entry))
+			break;
+
 		if (match != all_entries_interesting) {
 			match = tree_entry_interesting(&entry, base, 0,
 						       &revs->diffopt.pathspec);
@@ -109,7 +169,7 @@ static void process_tree(struct rev_info *revs,
 
 		if (S_ISDIR(entry.mode))
 			process_tree(revs,
-				     lookup_tree(entry.sha1),
+				     lookup_tree(entry.sha1), old_tree,
 				     show, &me, base, entry.path,
 				     cb_data);
 		else if (S_ISGITLINK(entry.mode))
@@ -123,8 +183,6 @@ static void process_tree(struct rev_info *revs,
 				     cb_data);
 	}
 	strbuf_setlen(base, baselen);
-	free(tree->buffer);
-	tree->buffer = NULL;
 }
 
 static void mark_edge_parents_uninteresting(struct commit *commit,
@@ -173,6 +231,7 @@ void traverse_commit_list(struct rev_info *revs,
 	int i;
 	struct commit *commit;
 	struct strbuf base;
+	struct tree *last_root_tree = NULL;
 
 	strbuf_init(&base, PATH_MAX);
 	while ((commit = get_revision(revs)) != NULL) {
@@ -196,8 +255,9 @@ void traverse_commit_list(struct rev_info *revs,
 			continue;
 		}
 		if (obj->type == OBJ_TREE) {
-			process_tree(revs, (struct tree *)obj, show_object,
-				     NULL, &base, name, data);
+			process_tree(revs, (struct tree *)obj, last_root_tree,
+				     show_object, NULL, &base, name, data);
+			last_root_tree = (struct tree *)obj;
 			continue;
 		}
 		if (obj->type == OBJ_BLOB) {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]