[PATCH v3-wip] revision traversal: show full history with merge simplification

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This one makes it incremental.  It is not relative to v2 but on top of
'master'.

The idea and the logic to find what parents to rewrite is the same as the
previous one, but this one works incrementally as much as possible.  When
you have this topology, where commits with capital letters are the ones
that change the paths you are interested in:


        A---M---o---C---D---o
       /   /
   ---o---B

(1) we can tell that the rightmost one 'o' is not we want to show, without
    digging any further than D;

(2) we can show D after inspecting C without digging any further.  C is
    the sole parent of D, and C itself is an interesting one, so D's
    parent will stay to be C and not its ancestor.

(3) before showing C, we need to know what the rewritten parent of it
    would be; we need to dig down to M and notice that it has two parents
    that simplify to a different commit (both A and B touch the path we
    are interested in), so M simplifies to itself and it becomes the
    parent of C.  IOW, we need to dig no further than A and B in order to
    show C.

$ time sh -c 'git log --pretty=oneline --abbrev-commit \
	--simplify-merges --parents \
	-- kernel/printk.c | head -n 1'
5dfb66b... 1d9b9f6... c9272c4... Merge branch 'for-linus' of git://git.o-hand.com/linux-mfd

real    0m0.344s
user    0m0.324s
sys     0m0.020s

The same query with 's/| head -n 1/>/dev/null' is more expensive.  In fact
it is much more expensive than the non-incremental one (v2), and about
three times more expensive than non-limiting --full-history for explaining
the history of kernel/printk.c.  There must be opportunity to further
optimize this, but I'd stop here for now, as you keep saying this is hard,
and if I continue thinking about this any longer my head would explode ;-)

---
 revision.c |  106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 revision.h |    1 +
 2 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/revision.c b/revision.c
index 3897fec..9554a70 100644
--- a/revision.c
+++ b/revision.c
@@ -1045,6 +1045,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 	} else if (!strcmp(arg, "--topo-order")) {
 		revs->lifo = 1;
 		revs->topo_order = 1;
+	} else if (!strcmp(arg, "--simplify-merges")) {
+		revs->simplify_merges = 1;
+		revs->rewrite_parents = 1;
+		revs->simplify_history = 0;
 	} else if (!strcmp(arg, "--date-order")) {
 		revs->lifo = 0;
 		revs->topo_order = 1;
@@ -1450,9 +1454,10 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp
 	}
 }
 
-static void remove_duplicate_parents(struct commit *commit)
+static int remove_duplicate_parents(struct commit *commit)
 {
 	struct commit_list **pp, *p;
+	int surviving_parents;
 
 	/* Examine existing parents while marking ones we have seen... */
 	pp = &commit->parents;
@@ -1465,9 +1470,13 @@ static void remove_duplicate_parents(struct commit *commit)
 		parent->object.flags |= TMP_MARK;
 		pp = &p->next;
 	}
-	/* ... and clear the temporary mark */
-	for (p = commit->parents; p; p = p->next)
+	/* count them while clearing the temporary mark */
+	surviving_parents = 0;
+	for (p = commit->parents; p; p = p->next) {
 		p->item->object.flags &= ~TMP_MARK;
+		surviving_parents++;
+	}
+	return surviving_parents;
 }
 
 static int rewrite_parents(struct rev_info *revs, struct commit *commit)
@@ -1536,6 +1545,89 @@ enum commit_action simplify_commit(struct rev_info *revs, struct commit *commit)
 	return commit_show;
 }
 
+static void simplify_merges(struct rev_info *revs, struct commit *commit)
+{
+	struct commit_list *work = NULL;
+
+	commit_list_insert(commit, &work);
+	while (!commit->util && work) {
+		struct commit *c;
+		struct commit_list *p;
+		int cnt;
+
+		c = pop_commit(&work);
+		if (c->util)
+			continue;
+		if ((c->object.flags & UNINTERESTING) || !c->parents) {
+			c->util = c;
+			continue;
+		}
+
+		/*
+		 * Do we know what commit all of the parents of this
+		 * should be rewritten to?  Otherwise we are not ready
+		 * to rewrite this one yet.
+		 */
+		for (cnt = 0, p = c->parents; p; p = p->next) {
+			if (!p->item->util) {
+				if (!cnt)
+					commit_list_insert(c, &work);
+				commit_list_insert(p->item, &work);
+				cnt++;
+			}
+		}
+		if (cnt)
+			continue;
+
+		/*
+		 * Rewrite the list of parents.
+		 */
+		for (p = c->parents; p; p = p->next)
+			p->item = p->item->util;
+		cnt = remove_duplicate_parents(c);
+
+		/*
+		 * It is possible that this is a merge and one side
+		 * branch does not have any commit that touches the
+		 * given paths; in such a case, the immediate parents
+		 * will be rewritten to different commits if we do not
+		 * reduce such a false merge of fast-forward parents.
+		 *
+		 *      o----X		X: the commit we are looking at;
+		 *     /    /		o: a commit that touches the paths;
+		 * ---o----'
+		 *
+		 * Further reduce the parents by removing redundant
+		 * parents.
+		 */
+		if (1 < cnt) {
+			struct commit_list *h = reduce_heads(c->parents);
+			cnt = commit_list_count(h);
+			free_commit_list(c->parents);
+			c->parents = h;
+		}
+
+		/*
+		 * A commit simplifies to itself if it is a root, if
+		 * it is UNINTERESTING, if it touches the given paths,
+		 * or if it is a merge and its parents simplifies to
+		 * more than one commits (the first two cases are
+		 * already handled at the beginning of this function).
+		 *
+		 * Otherwise, it simplifies to what its sole parent
+		 * simplifies to.
+		 */
+		if (!cnt ||
+		    (c->object.flags & UNINTERESTING) ||
+		    !(c->object.flags & TREESAME) ||
+		    (1 < cnt))
+			c->util = c;
+		else
+			c->util = c->parents->item->util;
+	}
+	free_commit_list(work);
+}
+
 static struct commit *get_revision_1(struct rev_info *revs)
 {
 	if (!revs->commits)
@@ -1570,8 +1662,14 @@ static struct commit *get_revision_1(struct rev_info *revs)
 		case commit_error:
 			return NULL;
 		default:
-			return commit;
+			break;
+		}
+		if (revs->simplify_merges && !commit->util) {
+			simplify_merges(revs, commit);
+			if (commit->util != commit)
+				continue;
 		}
+		return commit;
 	} while (revs->commits);
 	return NULL;
 }
diff --git a/revision.h b/revision.h
index f64e8ce..dfa06b5 100644
--- a/revision.h
+++ b/revision.h
@@ -41,6 +41,7 @@ struct rev_info {
 			simplify_history:1,
 			lifo:1,
 			topo_order:1,
+			simplify_merges:1,
 			tag_objects:1,
 			tree_objects:1,
 			blob_objects:1,
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux