[PATCH 4/5] Optimize "diff-index --cached" using cache-tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway.  Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.

As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:

    (without patch, hot cache)
    $ /usr/bin/time git diff --cached --raw
    :100644 100644 b57e1f5... e69de29... M  Makefile
    :100644 000000 8c86b72... 0000000... D  arch/x86/Makefile
    :000000 100644 0000000... e69de29... A  arche
    0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+9407minor)pagefaults 0swaps

    (with patch, hot cache)
    $ /usr/bin/time ../git.git/git-diff --cached --raw
    :100644 100644 b57e1f5... e69de29... M  Makefile
    :100644 000000 8c86b72... 0000000... D  arch/x86/Makefile
    :000000 100644 0000000... e69de29... A  arche
    0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+0outputs (0major+2446minor)pagefaults 0swaps

Cold cache numbers are very impressive, but it does not matter very much
in practice:

    (without patch, cold cache)
    $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
    $ /usr/bin/time git diff --cached --raw
    :100644 100644 b57e1f5... e69de29... M  Makefile
    :100644 000000 8c86b72... 0000000... D  arch/x86/Makefile
    :000000 100644 0000000... e69de29... A  arche
    0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
    247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps

    (with patch, cold cache)
    $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
    $ /usr/bin/time ../git.git/git-diff --cached --raw
    :100644 100644 b57e1f5... e69de29... M  Makefile
    :100644 000000 8c86b72... 0000000... D  arch/x86/Makefile
    :000000 100644 0000000... e69de29... A  arche
    0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
    18440inputs+0outputs (79major+2369minor)pagefaults 0swaps

This of course helps "git status" as well.

    (without patch, hot cache)
    $ /usr/bin/time ../git.git/git-status >/dev/null
    0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+5336outputs (0major+10970minor)pagefaults 0swaps

    (with patch, hot cache)
    $ /usr/bin/time ../git.git/git-status >/dev/null
    0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
    0inputs+5336outputs (0major+3921minor)pagefaults 0swaps

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 cache-tree.c   |   32 ++++++++++++++++++++++++++++++++
 cache-tree.h   |    3 +++
 diff-lib.c     |    2 ++
 unpack-trees.c |   17 +++++++++++++++++
 unpack-trees.h |    1 +
 5 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/cache-tree.c b/cache-tree.c
index 5481e43..16a65df 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -631,3 +631,35 @@ void prime_cache_tree(struct cache_tree **it, struct tree *tree)
 	*it = cache_tree();
 	prime_cache_tree_rec(*it, tree);
 }
+
+/*
+ * find the cache_tree that corresponds to the current level without
+ * exploding the full path into textual form.  The root of the
+ * cache tree is given as "root", and our current level is "info".
+ * (1) When at root level, info->prev is NULL, so it is "root" itself.
+ * (2) Otherwise, find the cache_tree that corresponds to one level
+ *     above us, and find ourselves in there.
+ */
+static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root,
+							 struct traverse_info *info)
+{
+	struct cache_tree *our_parent;
+
+	if (!info->prev)
+		return root;
+	our_parent = find_cache_tree_from_traversal(root, info->prev);
+	return cache_tree_find(our_parent, info->name.path);
+}
+
+int cache_tree_matches_traversal(struct cache_tree *root,
+				 struct name_entry *ent,
+				 struct traverse_info *info)
+{
+	struct cache_tree *it;
+
+	it = find_cache_tree_from_traversal(root, info);
+	it = cache_tree_find(it, ent->path);
+	if (it && it->entry_count > 0 && !hashcmp(ent->sha1, it->sha1))
+		return it->entry_count;
+	return 0;
+}
diff --git a/cache-tree.h b/cache-tree.h
index eadcad8..3df641f 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -2,6 +2,7 @@
 #define CACHE_TREE_H
 
 #include "tree.h"
+#include "tree-walk.h"
 
 struct cache_tree;
 struct cache_tree_sub {
@@ -42,4 +43,6 @@ int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int)
 int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix);
 void prime_cache_tree(struct cache_tree **, struct tree *);
 
+extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info);
+
 #endif
diff --git a/diff-lib.c b/diff-lib.c
index a310fb2..1cb97af 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -446,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached)
 	memset(&opts, 0, sizeof(opts));
 	opts.head_idx = 1;
 	opts.index_only = cached;
+	opts.diff_index_cached = cached;
 	opts.merge = 1;
 	opts.fn = oneway_diff;
 	opts.unpack_data = revs;
@@ -502,6 +503,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt)
 	memset(&opts, 0, sizeof(opts));
 	opts.head_idx = 1;
 	opts.index_only = 1;
+	opts.diff_index_cached = 1;
 	opts.merge = 1;
 	opts.fn = oneway_diff;
 	opts.unpack_data = &revs;
diff --git a/unpack-trees.c b/unpack-trees.c
index aaacaf1..8eb3ddb 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -326,6 +326,23 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 			if (src[0])
 				conflicts |= 1;
 		}
+
+		/* special case: "diff-index --cached" looking at a tree */
+		if (o->diff_index_cached &&
+		    n == 1 && dirmask == 1 && S_ISDIR(names->mode)) {
+			int matches;
+			matches = cache_tree_matches_traversal(o->src_index->cache_tree,
+							       names, info);
+			/*
+			 * Everything under the name matches.  Adjust o->pos to
+			 * skip the entire hierarchy.
+			 */
+			if (matches) {
+				o->pos += matches;
+				return mask;
+			}
+		}
+
 		if (traverse_trees_recursive(n, dirmask, conflicts,
 					     names, info) < 0)
 			return -1;
diff --git a/unpack-trees.h b/unpack-trees.h
index 0d26f3d..1e0e232 100644
--- a/unpack-trees.h
+++ b/unpack-trees.h
@@ -27,6 +27,7 @@ struct unpack_trees_options {
 		     aggressive:1,
 		     skip_unmerged:1,
 		     initial_checkout:1,
+		     diff_index_cached:1,
 		     gently:1;
 	const char *prefix;
 	int pos;
-- 
1.6.3.1.145.gb74d77

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]