Re: "git reflog expire --all" very slow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes:

> I have not checked if there is anything really obvious going on that could 
> change that whole logic that causes us to do merge-bases into something 
> saner, since the reflog code is not a part of git I'm familiar with. 

When we look at one reflog entry with old and new commit, we decide to
prune an old entry if either side is unreachable from the tip of the
branch:

	if (timestamp < cb->cmd->expire_unreachable) {
		if (!cb->ref_commit)
			goto prune;
		if (!old && !is_null_sha1(osha1))
			old = lookup_commit_reference_gently(osha1, 1);
		if (!new && !is_null_sha1(nsha1))
			new = lookup_commit_reference_gently(nsha1, 1);
		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
			goto prune;
	}

Most of your reflog entries are expected to be reachable from the tip, so
one optimization would be to mark all commits reachable from the tip
upfront, and omit the in_merge_bases() computation for the ones that are
already marked.  Perhaps something like this...

 builtin-reflog.c |   49 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/builtin-reflog.c b/builtin-reflog.c
index d95f515..f0d61bd 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -52,6 +52,7 @@ struct collect_reflog_cb {
 
 #define INCOMPLETE	(1u<<10)
 #define STUDYING	(1u<<11)
+#define REACHABLE	(1u<<12)
 
 static int tree_is_complete(const unsigned char *sha1)
 {
@@ -209,6 +210,43 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
 	return 1;
 }
 
+static void mark_reachable(struct commit *commit, unsigned long expire_limit)
+{
+	/*
+	 * We need to compute if commit on either side of an reflog
+	 * entry is reachable from the tip of the ref for all entries.
+	 * Mark commits that are reachable from the tip down to the
+	 * time threashold first; we know a commit marked thusly is
+	 * reachable from the tip without running in_merge_bases()
+	 * at all.
+	 */
+	struct commit_list *pending = NULL;
+
+	commit_list_insert(commit, &pending);
+	while (pending) {
+		struct commit_list *entry = pending;
+		struct commit_list *parent;
+		pending = entry->next;
+		commit = entry->item;
+		free(entry);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		parent = commit->parents;
+		while (parent) {
+			commit = parent->item;
+			parent = parent->next;
+			if (commit->object.flags & REACHABLE)
+				continue;
+			if (parse_commit(commit))
+				continue;
+			if (commit->date < expire_limit)
+				continue;
+			commit_list_insert(commit, &pending);
+		}
+	}
+}
+
 static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		const char *email, unsigned long timestamp, int tz,
 		const char *message, void *cb_data)
@@ -234,8 +272,12 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 			old = lookup_commit_reference_gently(osha1, 1);
 		if (!new && !is_null_sha1(nsha1))
 			new = lookup_commit_reference_gently(nsha1, 1);
-		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
-		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
+		if ((old &&
+		     !(old->object.flags & REACHABLE) &&
+		     !in_merge_bases(old, &cb->ref_commit, 1)) ||
+		    (new &&
+		     !(new->object.flags & REACHABLE) &&
+		     !in_merge_bases(new, &cb->ref_commit, 1)))
 			goto prune;
 	}
 
@@ -288,7 +330,10 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
 	cb.ref_commit = lookup_commit_reference_gently(sha1, 1);
 	cb.ref = ref;
 	cb.cmd = cmd;
+
+	mark_reachable(cb.ref_commit, cmd->expire_unreachable);
 	for_each_reflog_ent(ref, expire_reflog_ent, &cb);
+	clear_commit_marks(cb.ref_commit, REACHABLE);
  finish:
 	if (cb.newlog) {
 		if (fclose(cb.newlog)) {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux