Re*: Extremely slow progress during 'git reflog expire --all'

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jeff King <peff@xxxxxxxx> writes:

> True. With the technique you mentioned above, you would reverse your
> test and do:
>
>   if (flags & REACHABLE)
>     return 0;
>   if (expanded_reachable_to_root)
>     return 1; /* we know it's not */
>   expand_reachable_to_root();
>   return !(flags & REACHABLE);
>
> I don't think I care enough to do a patch, though. I don't have a
> problem with you applying what you posted earlier.

Actually I do; I think it breaks correctness a big way (the second
paragraph of the proposed log message of the following).

-- >8 --
Subject: [PATCH] reflog --expire-unreachable: avoid merge-base computation

The option tells the command to expire older reflog entries that refer to
commits that are no longer reachable from the tip of the ref the reflog is
associated with.  To avoid repeated merge_base() invocations, we used to
mark commits that are known to be reachable by walking the history from
the tip until we hit commits that are older than expire-total (which is
the timestamp before which all the reflog entries are expired).

However, it is a different matter if a commit is _not_ known to be
reachable and the commit is known to be unreachable.  Because you can
rewind a ref to an ancient commit and then reset it back to the original
tip, a recent reflog entry can point at a commit that older than the
expire-total timestamp and we shouldn't expire it.  For that reason, we
had to run merge-base computation when a commit is _not_ known to be
reachable.

This introduces a lazy/on-demand traversal of the history to mark
reachable commits in steps.  As before, we mark commits that are newer
than expire-total to optimize the normal case before walking reflog, but
we dig deeper from the commits the initial step left off when we encounter
a commit that is not known to be reachable.

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---

 * By the way, the following diff is extremely hard to read, as for some
   reason "diff" failed to notice that the only change is that we have
   added lines at the beginning of mark_reachable(), modified only the
   latter half of unreachable(), and moved mark_reachable() from after
   unreachable() to before.  Neither "git diff --patience" nor running GNU
   diff on to blobs helps.  Hmph...

 builtin-reflog.c |   96 +++++++++++++++++++++++++++++++----------------------
 1 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/builtin-reflog.c b/builtin-reflog.c
index 7498210..9792090 100644
--- a/builtin-reflog.c
+++ b/builtin-reflog.c
@@ -36,6 +36,8 @@ struct expire_reflog_cb {
 	FILE *newlog;
 	const char *ref;
 	struct commit *ref_commit;
+	struct commit_list *mark_list;
+	unsigned long mark_limit;
 	struct cmd_reflog_expire_cb *cmd;
 	unsigned char last_kept_sha1[20];
 };
@@ -210,46 +212,23 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
 	return 1;
 }
 
-static int unreachable(struct expire_reflog_cb *cb, struct commit *commit, unsigned char *sha1)
+/*
+ * Starting from commits in the cb->mark_list, mark commits that are
+ * reachable from them.  Stop the traversal at commits older than
+ * the expire_limit and queue them back, so that the caller can call
+ * us again to restart the traversal with longer expire_limit.
+ */
+static void mark_reachable(struct expire_reflog_cb *cb)
 {
-	/*
-	 * We may or may not have the commit yet - if not, look it
-	 * up using the supplied sha1.
-	 */
-	if (!commit) {
-		if (is_null_sha1(sha1))
-			return 0;
-
-		commit = lookup_commit_reference_gently(sha1, 1);
-
-		/* Not a commit -- keep it */
-		if (!commit)
-			return 0;
-	}
-
-	/* Reachable from the current ref?  Don't prune. */
-	if (commit->object.flags & REACHABLE)
-		return 0;
-	if (in_merge_bases(commit, &cb->ref_commit, 1))
-		return 0;
-
-	/* We can't reach it - prune it. */
-	return 1;
-}
+	struct commit *commit;
+	struct commit_list *pending;
+	unsigned long expire_limit = cb->mark_limit;
+	struct commit_list *leftover = NULL;
 
-static void mark_reachable(struct commit *commit, unsigned long expire_limit)
-{
-	/*
-	 * We need to compute whether the commit on either side of a reflog
-	 * entry is reachable from the tip of the ref for all entries.
-	 * Mark commits that are reachable from the tip down to the
-	 * time threshold first; we know a commit marked thusly is
-	 * reachable from the tip without running in_merge_bases()
-	 * at all.
-	 */
-	struct commit_list *pending = NULL;
+	for (pending = cb->mark_list; pending; pending = pending->next)
+		pending->item->object.flags &= ~REACHABLE;
 
-	commit_list_insert(commit, &pending);
+	pending = cb->mark_list;
 	while (pending) {
 		struct commit_list *entry = pending;
 		struct commit_list *parent;
@@ -261,8 +240,11 @@ static void mark_reachable(struct commit *commit, unsigned long expire_limit)
 		if (parse_commit(commit))
 			continue;
 		commit->object.flags |= REACHABLE;
-		if (commit->date < expire_limit)
+		if (commit->date < expire_limit) {
+			commit_list_insert(commit, &leftover);
 			continue;
+		}
+		commit->object.flags |= REACHABLE;
 		parent = commit->parents;
 		while (parent) {
 			commit = parent->item;
@@ -272,6 +254,36 @@ static void mark_reachable(struct commit *commit, unsigned long expire_limit)
 			commit_list_insert(commit, &pending);
 		}
 	}
+	cb->mark_list = leftover;
+}
+
+static int unreachable(struct expire_reflog_cb *cb, struct commit *commit, unsigned char *sha1)
+{
+	/*
+	 * We may or may not have the commit yet - if not, look it
+	 * up using the supplied sha1.
+	 */
+	if (!commit) {
+		if (is_null_sha1(sha1))
+			return 0;
+
+		commit = lookup_commit_reference_gently(sha1, 1);
+
+		/* Not a commit -- keep it */
+		if (!commit)
+			return 0;
+	}
+
+	/* Reachable from the current ref?  Don't prune. */
+	if (commit->object.flags & REACHABLE)
+		return 0;
+
+	if (cb->mark_list && cb->mark_limit) {
+		cb->mark_limit = 0; /* dig down to the root */
+		mark_reachable(cb);
+	}
+
+	return !(commit->object.flags & REACHABLE);
 }
 
 static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
@@ -348,8 +360,12 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
 	cb.ref_commit = lookup_commit_reference_gently(sha1, 1);
 	cb.ref = ref;
 	cb.cmd = cmd;
-	if (cb.ref_commit)
-		mark_reachable(cb.ref_commit, cmd->expire_total);
+	if (cb.ref_commit) {
+		cb.mark_list = NULL;
+		commit_list_insert(cb.ref_commit, &cb.mark_list);
+		cb.mark_limit = cmd->expire_total;
+		mark_reachable(&cb);
+	}
 	for_each_reflog_ent(ref, expire_reflog_ent, &cb);
 	if (cb.ref_commit)
 		clear_commit_marks(cb.ref_commit, REACHABLE);
-- 
1.7.1.rc0.212.gbd88f

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]