Re: "git reflog expire --all" very slow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes:

> On Mon, 30 Mar 2009, Junio C Hamano wrote:
>> 
>> Most of your reflog entries are expected to be reachable from the tip, so
>> one optimization would be to mark all commits reachable from the tip
>> upfront, and omit the in_merge_bases() computation for the ones that are
>> already marked.  Perhaps something like this...
>
> This if anything makes things just go slower.
>
> Not much, but some. It went from 36.566s to 38.070s. That may be in the 
> noise, I've not done any sensitivity analysis.

I actually think that the cutoff for history traversal is bogus.  You may
have a 30-day old reflog entry that pulled in a 45-day old commit, and
giving up the smudging at the expiry cutoff timestamp does not make much
sense.

I do have a lot of reflog entries kept around, as my main repository has
these:

        [gc]
                reflogexpire = '2005-01-01 00:00:00 +0000'
                reflogexpireunreachable = '2005-01-01 00:00:00 +0000'

so let me experiment a bit more.

 builtin-reflog.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/builtin-reflog.c b/builtin-reflog.c
index d95f515..ef02b5a 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -36,6 +36,7 @@ struct expire_reflog_cb {
 	FILE *newlog;
 	const char *ref;
 	struct commit *ref_commit;
+	int commit_marked;
 	struct cmd_reflog_expire_cb *cmd;
 	unsigned char last_kept_sha1[20];
 };
@@ -52,6 +53,7 @@ struct collect_reflog_cb {
 
 #define INCOMPLETE	(1u<<10)
 #define STUDYING	(1u<<11)
+#define REACHABLE	(1u<<12)
 
 static int tree_is_complete(const unsigned char *sha1)
 {
@@ -209,6 +211,43 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
 	return 1;
 }
 
+static void mark_reachable(struct commit *commit, unsigned long expire_limit)
+{
+	/*
+	 * We need to compute if commit on either side of an reflog
+	 * entry is reachable from the tip of the ref for all entries.
+	 * Mark commits that are reachable from the tip down to the
+	 * time threashold first; we know a commit marked thusly is
+	 * reachable from the tip without running in_merge_bases()
+	 * at all.
+	 */
+	struct commit_list *pending = NULL;
+
+	commit_list_insert(commit, &pending);
+	while (pending) {
+		struct commit_list *entry = pending;
+		struct commit_list *parent;
+		pending = entry->next;
+		commit = entry->item;
+		free(entry);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		parent = commit->parents;
+		while (parent) {
+			commit = parent->item;
+			parent = parent->next;
+			if (commit->object.flags & REACHABLE)
+				continue;
+			if (parse_commit(commit))
+				continue;
+			if (commit->date < expire_limit)
+				continue;
+			commit_list_insert(commit, &pending);
+		}
+	}
+}
+
 static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		const char *email, unsigned long timestamp, int tz,
 		const char *message, void *cb_data)
@@ -230,12 +269,28 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 	if (timestamp < cb->cmd->expire_unreachable) {
 		if (!cb->ref_commit)
 			goto prune;
+
 		if (!old && !is_null_sha1(osha1))
 			old = lookup_commit_reference_gently(osha1, 1);
 		if (!new && !is_null_sha1(nsha1))
 			new = lookup_commit_reference_gently(nsha1, 1);
-		if ((old && !in_merge_bases(old, &cb->ref_commit, 1)) ||
-		    (new && !in_merge_bases(new, &cb->ref_commit, 1)))
+
+		if (!cb->commit_marked) {
+			if (old) {
+				mark_reachable(cb->ref_commit, old->date);
+				cb->commit_marked = 1;
+			} else if (new) {
+				mark_reachable(cb->ref_commit, new->date);
+				cb->commit_marked = 1;
+			}
+		}
+
+		if ((old &&
+		     !(old->object.flags & REACHABLE) &&
+		     !in_merge_bases(old, &cb->ref_commit, 1)) ||
+		    (new &&
+		     !(new->object.flags & REACHABLE) &&
+		     !in_merge_bases(new, &cb->ref_commit, 1)))
 			goto prune;
 	}
 
@@ -289,6 +344,8 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
 	cb.ref = ref;
 	cb.cmd = cmd;
 	for_each_reflog_ent(ref, expire_reflog_ent, &cb);
+	if (cb.ref_commit && cb.commit_marked)
+		clear_commit_marks(cb.ref_commit, REACHABLE);
  finish:
 	if (cb.newlog) {
 		if (fclose(cb.newlog)) {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux