Re: CFT: merge-recursive in C (updated)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 6/28/06, Alex Riesen <fork0@xxxxxxxxxxx> wrote:
Johannes Schindelin, Tue, Jun 27, 2006 18:42:51 +0200:
> > - use a pipe to "git-update-index --index-info" instead of using command
> > line

...and to take it a step further, a patch (0002) to avoid too many calls to
git-write-tree and to git-update-index. Brought merge times on my test
monster (~25k files) down to 2min 30sec (from something around 11 min).
The 0001 patch is just C++ comments.
From 36147880afad3a2d53f6474bec069e29d82a74a8 Mon Sep 17 00:00:00 2001
From: Riesen <ARiesen@xxxxxxxxxxxxxxxxxxxxxxxxxx>
Date: Wed, 28 Jun 2006 09:46:47 +0200
Subject: update comments and remove C++ //...
---
 graph.c           |   14 ++++--
 merge-recursive.c |  131 +++++++++++++++++++++++++----------------------------
 path-list.c       |    2 -
 3 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/graph.c b/graph.c
index fa2bfee..1a28302 100644
--- a/graph.c
+++ b/graph.c
@@ -5,7 +5,7 @@ #include "cache.h"
 #include "commit.h"
 #include "graph.h"
 
-// does not belong here
+/* does not belong here */
 struct tree *git_write_tree()
 {
 #if 0
@@ -189,8 +189,10 @@ struct node_list *node_list_find_node(co
 	return (struct node_list*)p;
 }
 
-// a & b. a and are invalid after the call,
-// the result will contain all the common nodes
+/*
+ * a & b. a and are invalid after the call,
+ * the result will contain all the common nodes
+ */
 struct node_list *node_list_intersect(struct node_list *a,
 				      struct node_list *b)
 {
@@ -214,7 +216,7 @@ struct node *graph_add_node(struct graph
 {
 	struct node_list **bucket;
 	if ( node->virtual )
-		// virtual nodes hashed by lowest byte of virtual_id
+		/* virtual nodes hashed by lowest byte of virtual_id */
 		bucket = graph->commits + (node->virtual_id & 0xff);
 	else
 		bucket = graph->commits + node->commit->object.sha1[0];
@@ -249,4 +251,6 @@ void node_list_print(const char *msg, co
 }
 #endif
 
-// vim: sw=8 noet
+/*
+vim: sw=8 noet
+*/
diff --git a/merge-recursive.c b/merge-recursive.c
index 9bbb426..ba5b024 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1,7 +1,7 @@
-//
-// Recursive Merge algorithm stolen from git-merge-recursive.py by
-// Fredrik Kuivinen.
-//
+/*
+ * Recursive Merge algorithm stolen from git-merge-recursive.py by
+ * Fredrik Kuivinen.
+ */
 #include <stdarg.h>
 #include <string.h>
 #include <assert.h>
@@ -47,7 +47,7 @@ struct index_entry
 
 struct index_entry *index_entry_alloc(const char *path)
 {
-	size_t n = strlen(path); // index_entry::path has room for \0
+	size_t n = strlen(path); /* index_entry::path has room for \0 */
 	struct index_entry *p = xmalloc(sizeof(struct index_entry) + n);
 	if ( !p )
 		return NULL;
@@ -102,14 +102,16 @@ static void setup_index(int temp)
 	setenv("GIT_INDEX_FILE", idx, 1);
 }
 
-// This is a global variable which is used in a number of places but
-// only written to in the 'merge' function.
-
-// index_only == 1    => Don't leave any non-stage 0 entries in the cache and
-//                       don't update the working directory.
-//               0    => Leave unmerged entries in the cache and update
-//                       the working directory.
-static int index_only = 0; // cacheOnly
+/*
+ * This is a global variable which is used in a number of places but
+ * only written to in the 'merge' function.
+ *
+ * index_only == 1    => Don't leave any non-stage 0 entries in the cache and
+ *                       don't update the working directory.
+ *               0    => Leave unmerged entries in the cache and update
+ *                       the working directory.
+ */
+static int index_only = 0;
 
 static int git_read_tree(const struct tree *tree)
 {
@@ -157,10 +159,10 @@ struct merge_tree_result merge_trees(str
 
 static int fget_sha1(unsigned char *sha, FILE *fp, int *ch);
 
-// The entry point to the merge code
-
-// Merge the commits h1 and h2, return the resulting virtual
-// commit object and a flag indicating the cleaness of the merge.
+/*
+ * Merge the commits h1 and h2, return the resulting virtual
+ * commit object and a flag indicating the cleaness of the merge.
+ */
 static
 struct merge_result merge(struct node *h1,
 			  struct node *h2,
@@ -262,7 +264,7 @@ typedef int (*read_tree_rt_fn_t)(const c
 				 unsigned mode,
 				 void *data);
 
-// git-ls-tree -r -t <tree>
+/* git-ls-tree -r -t <tree> */
 static int read_tree_rt(struct tree *tree,
 			const char *base,
 			int baselen,
@@ -391,8 +393,10 @@ static int find_entry(const char *sha,
 	return READ_TREE_RECURSIVE;
 }
 
-// Returns a CacheEntry object which doesn't have to correspond to
-// a real cache entry in Git's index.
+/*
+ * Returns a index_entry instance which doesn't have to correspond to
+ * a real cache entry in Git's index.
+ */
 struct index_entry *index_entry_from_db(const char *path,
 					struct tree *o,
 					struct tree *a,
@@ -404,24 +408,18 @@ struct index_entry *index_entry_from_db(
 	data.sha = e->stages[1].sha;
 	data.mode = &e->stages[1].mode;
 	if ( read_tree_rt(o, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "1: %s:%s not found\n",
-		// 	sha1_to_hex(o->object.sha1), path);
 		memcpy(e->stages[1].sha, null_sha1, 20);
 		e->stages[1].mode = 0;
 	}
 	data.sha = e->stages[2].sha;
 	data.mode = &e->stages[2].mode;
 	if ( read_tree_rt(a, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "2: %s:%s not found\n",
-		// 	sha1_to_hex(a->object.sha1), path);
 		memcpy(e->stages[2].sha, null_sha1, 20);
 		e->stages[2].mode = 0;
 	}
 	data.sha = e->stages[3].sha;
 	data.mode = &e->stages[3].mode;
 	if ( read_tree_rt(b, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "3: %s:%s not found\n",
-		// 	sha1_to_hex(b->object.sha1), path);
 		memcpy(e->stages[3].sha, null_sha1, 20);
 		e->stages[3].mode = 0;
 	}
@@ -469,8 +467,11 @@ static int fget_sha1(unsigned char *sha,
 		return -1;
 	return 0;
 }
-// Create a dictionary mapping file names to CacheEntry objects. The
-// dictionary contains one entry for every path with a non-zero stage entry.
+
+/*
+ * Create a dictionary mapping file names to CacheEntry objects. The
+ * dictionary contains one entry for every path with a non-zero stage entry.
+ */
 struct index_entry *unmergedCacheEntries()
 {
 	struct index_entry *unmerged = NULL;
@@ -484,17 +485,17 @@ struct index_entry *unmergedCacheEntries
 		char stage = '0';
 		char path[PATH_MAX];
 		int p;
-		// mode
+		/* mode */
 		if ( fget_mode(&mode, fp, &ch) )
 			goto wait_eol;
 		if ( '\x20' != ch )
 			goto wait_eol;
-		// SHA1
+		/* SHA1 */
 		if ( fget_sha1(sha, fp, &ch) )
 			goto wait_eol;
 		if ( '\x20' != ch )
 			goto wait_eol;
-		// stage
+		/* stage */
 		if ( (ch = fgetc(fp)) != EOF ) {
 			stage = ch;
 			if ( ch < '1' || ch > '3' )
@@ -502,7 +503,7 @@ struct index_entry *unmergedCacheEntries
 		}
 		if ( (ch = fgetc(fp)) == EOF || '\t' != ch )
 			goto wait_eol;
-		// path
+		/* path */
 		for ( p = 0; (ch = fgetc(fp)) != EOF; ++p ) {
 			path[p] = ch;
 			if ( !ch )
@@ -515,7 +516,6 @@ struct index_entry *unmergedCacheEntries
 		}
 		if ( ch )
 			goto wait_eol;
-		// printf("unmerged %08o %s %c %s\n",mode,sha1_to_hex(sha),stage,path);
 		struct index_entry *e = index_entry_get(&unmerged, path);
 		e->stages[stage - '1' + 1].mode = mode;
 		memcpy(e->stages[stage - '1' + 1].sha, sha, 20);
@@ -583,10 +583,12 @@ void free_rename_entries(struct rename_e
 	}
 }
 
-// Get information of all renames which occured between 'oTree' and
-// 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
-// 'bTree') to be able to associate the correct cache entries with
-// the rename information. 'tree' is always equal to either aTree or bTree.
+/*
+ * Get information of all renames which occured between 'oTree' and
+ * 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
+ * 'bTree') to be able to associate the correct cache entries with
+ * the rename information. 'tree' is always equal to either aTree or bTree.
+ */
 struct rename_entry *getRenames(struct tree *tree,
 				struct tree *oTree,
 				struct tree *aTree,
@@ -831,7 +833,6 @@ void updateFileExt(FILE *fp,
 	}
 	if ( updateCache )
 	{
-		// XXX just always use "git update-index --index-info"?
 		fprintf(fp, "%06o %s\t%s", mode, sha1_to_hex(sha), path);
 		fputc('\0', fp);
 	}
@@ -846,8 +847,8 @@ void updateFile(FILE *fp,
 	updateFileExt(fp, sha, mode, path, index_only || clean, !index_only);
 }
 
-// Low level file merging, update and removal
-// ------------------------------------------
+/* Low level file merging, update and removal */
+
 struct merge_file_info
 {
 	unsigned char sha[20];
@@ -991,9 +992,6 @@ int processRenames(struct rename_entry *
 		   const char *branchNameB)
 {
 	int cleanMerge = 1;
-	//    printf("process renames %s:%s -> %s:%s\n",
-	//	   branchNameA, renamesA ? renamesA->src: "(none)",
-	//	   branchNameB, renamesB ? renamesB->dst: "(none)");
 
 	struct path_list srcNames = {NULL, 0, 0};
 	const struct rename_entry *sre;
@@ -1033,7 +1031,7 @@ int processRenames(struct rename_entry *
 		ren1->processed = 1;
 
 		if ( ren2 ) {
-			// Renamed in 1 and renamed in 2
+			/* Renamed in 1 and renamed in 2 */
 			if ( strcmp(ren1->src, ren2->src) != 0 )
 				die("ren1.srcName != ren2.srcName");
 			ren2->dst_entry->processed = 1;
@@ -1097,7 +1095,7 @@ int processRenames(struct rename_entry *
 				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
 			}
 		} else {
-			// Renamed in 1, maybe changed in 2
+			/* Renamed in 1, maybe changed in 2 */
 			removeFile(fp, 1, ren1->src);
 
 			unsigned char srcShaOtherBranch[20], dstShaOtherBranch[20];
@@ -1224,15 +1222,15 @@ static int sha_eq(const unsigned char *a
 	return a && b && memcmp(a, b, 20) == 0;
 }
 
-// Per entry merge function
-// ------------------------
-// Merge one cache entry.
+/* Per entry merge function */
 int processEntry(struct index_entry *entry,
 		 const char *branch1Name,
 		 const char *branch2Name)
 {
-	//    printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
-	//    print_index_entry("\tpath: ", entry);
+	/*
+	printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
+	print_index_entry("\tpath: ", entry);
+	*/
 	int cleanMerge = 1;
 	const char *path = entry->path;
 	unsigned char *oSha = has_sha(entry->stages[1].sha);
@@ -1244,18 +1242,17 @@ int processEntry(struct index_entry *ent
 	FILE *fp = git_update_index_pipe();
 
 	if ( oSha && (!aSha || !bSha) ) {
-		//
-		// Case A: Deleted in one
-		//
+		/* Case A: Deleted in one */
 		if ( (!aSha && !bSha) ||
 		     (sha_eq(aSha, oSha) && !bSha) ||
 		     (!aSha && sha_eq(bSha, oSha)) ) {
-			// Deleted in both or deleted in one and unchanged in the other
+			/* Deleted in both or deleted in one and
+			 * unchanged in the other */
 			if ( aSha )
 				output("Removing %s", path);
 			removeFile(fp, 1, path);
 		} else {
-			// Deleted in one and changed in the other
+			/* Deleted in one and changed in the other */
 			cleanMerge = 0;
 			if ( !aSha ) {
 				output("CONFLICT (delete/modify): %s deleted in %s "
@@ -1274,9 +1271,7 @@ int processEntry(struct index_entry *ent
 
 	} else if ( (!oSha && aSha && !bSha) ||
 		    (!oSha && !aSha && bSha) ) {
-		//
-		// Case B: Added in one.
-		//
+		/* Case B: Added in one. */
 		const char *addBranch;
 		const char *otherBranch;
 		unsigned mode;
@@ -1309,9 +1304,7 @@ int processEntry(struct index_entry *ent
 			updateFile(fp, 1, sha, mode, path);
 		}
 	} else if ( !oSha && aSha && bSha ) {
-		//
-		// Case C: Added in both (check for same permissions).
-		//
+		/* Case C: Added in both (check for same permissions). */
 		if ( sha_eq(aSha, bSha) ) {
 			if ( aMode != bMode ) {
 				cleanMerge = 0;
@@ -1321,7 +1314,7 @@ int processEntry(struct index_entry *ent
 				output("CONFLICT: adding with permission: %06o", aMode);
 				updateFile(fp, 0, aSha, aMode, path);
 			} else {
-				// This case is handled by git-read-tree
+				/* This case is handled by git-read-tree */
 				assert(0 && "This case must be handled by git-read-tree");
 			}
 		} else {
@@ -1337,9 +1330,7 @@ int processEntry(struct index_entry *ent
 		}
 
 	} else if ( oSha && aSha && bSha ) {
-		//
-		// case D: Modified in both, but differently.
-		//
+		/* case D: Modified in both, but differently. */
 		output("Auto-merging %s\n", path);
 		struct merge_file_info mfi;
 		mfi = mergeFile(path, oSha, oMode,
@@ -1472,15 +1463,15 @@ struct graph *graph_build(struct node_li
 			break;
 		if (EOF == ch)
 			break;
-		// a commit
+		/* a commit */
 		struct node *node = graph_node_bysha(graph, sha);
 		if (!node)
 		{
 			node = node_alloc(lookup_commit(sha));
 			graph_add_node(graph, node);
 		}
-		// ...and its parents. I assume a parent cannot be mentioned
-		// before the children.
+		/* ...and its parents. I assume a parent cannot be mentioned
+		   before the children. */
 		struct node_list *parents = NULL;
 		while ('\n' != ch) {
 			if (fget_sha1(sha, fp, &ch)) {
@@ -1573,4 +1564,6 @@ int main(int argc, char *argv[])
 	return result.clean ? 0: 1;
 }
 
-// vim: sw=8 noet
+/*
+vim: sw=8 noet
+*/
diff --git a/path-list.c b/path-list.c
index fbfc103..95395ab 100644
--- a/path-list.c
+++ b/path-list.c
@@ -67,7 +67,7 @@ int path_list_has_path(const struct path
 	return exact_match;
 }
 
-// in place
+/* in place */
 void path_list_union_update(struct path_list *dst, const struct path_list *src)
 {
 	char **new_paths;
-- 
1.4.1.rc1.g8b09-dirty

From b2fbfc22526fb2a1f56ef56836b7412c53524e60 Mon Sep 17 00:00:00 2001
From: Riesen <ARiesen@xxxxxxxxxxxxxxxxxxxxxxxxxx>
Date: Wed, 28 Jun 2006 10:45:05 +0200
Subject: fix processEntries to avoid multiple calls to write-tree and update-index
---
 merge-recursive.c |   47 ++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index ba5b024..5b7ed51 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -19,6 +19,13 @@ #include "run-command.h"
 #include "graph.h"
 #include "path-list.h"
 
+#define DEBUG
+#ifdef DEBUG
+#define debug(args, ...) fprintf(stderr, args, ## __VA_ARGS__)
+#else
+#define debug(args, ...)
+#endif
+
 #define for_each_commit(p,list) for ( p = (list); p; p = p->next )
 
 struct merge_result
@@ -345,9 +352,14 @@ int getFilesAndDirs(struct tree *tree,
 	path_list_clear(dirs, 1);
 	data.files = files;
 	data.dirs = dirs;
-	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 )
+	debug("getFilesAndDirs ...\n");
+	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 ) {
+		debug("\tgetFilesAndDirs done (0)\n");
 		return 0;
-	return path_list_count(files) + path_list_count(dirs);
+	}
+	int n = path_list_count(files) + path_list_count(dirs);
+	debug("\tgetFilesAndDirs done (%d)\n", n);
+	return n;
 }
 
 struct index_entry *index_entry_find(struct index_entry *ents, const char *path)
@@ -478,6 +490,7 @@ struct index_entry *unmergedCacheEntries
 	FILE *fp = popen("git-ls-files -z --unmerged", "r");
 	if ( !fp )
 		return NULL;
+	debug("unmergedCacheEntries...\n");
 	int ch;
 	while ( !feof(fp) ) {
 		unsigned mode;
@@ -524,6 +537,7 @@ struct index_entry *unmergedCacheEntries
 		while ( (ch = fgetc(fp)) != EOF && ch );
 	}
 	pclose(fp);
+	debug("\tunmergedCacheEntries done\n");
 	return unmerged;
 }
 
@@ -595,6 +609,7 @@ struct rename_entry *getRenames(struct t
 				struct tree *bTree,
 				struct index_entry **entries)
 {
+	debug("getRenames ...\n");
 	struct rename_entry *renames = NULL;
 	struct rename_entry **rptr = &renames;
 	struct diff_options opts;
@@ -639,6 +654,7 @@ struct rename_entry *getRenames(struct t
 	}
 	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 	diff_flush(&opts);
+	debug("\tgetRenames done\n");
 	return renames;
 }
 
@@ -991,6 +1007,7 @@ int processRenames(struct rename_entry *
 		   const char *branchNameA,
 		   const char *branchNameB)
 {
+	debug("processRenames...\n");
 	int cleanMerge = 1;
 
 	struct path_list srcNames = {NULL, 0, 0};
@@ -1207,6 +1224,7 @@ int processRenames(struct rename_entry *
 	if (pclose(fp)) {
 		die("git update-index --index-info failed");
 	}
+	debug("\tprocessRenames done\n");
 	return cleanMerge;
 }
 
@@ -1223,7 +1241,8 @@ static int sha_eq(const unsigned char *a
 }
 
 /* Per entry merge function */
-int processEntry(struct index_entry *entry,
+int processEntry(FILE *fp,
+		 struct index_entry *entry,
 		 const char *branch1Name,
 		 const char *branch2Name)
 {
@@ -1239,7 +1258,6 @@ int processEntry(struct index_entry *ent
 	unsigned oMode = entry->stages[1].mode;
 	unsigned aMode = entry->stages[2].mode;
 	unsigned bMode = entry->stages[3].mode;
-	FILE *fp = git_update_index_pipe();
 
 	if ( oSha && (!aSha || !bSha) ) {
 		/* Case A: Deleted in one */
@@ -1353,8 +1371,6 @@ int processEntry(struct index_entry *ent
 	} else
 		die("Fatal merge failure, shouldn't happen.");
 
-	if (pclose(fp))
-		die("updating entry failed in git update-index");
 	return cleanMerge;
 }
 
@@ -1373,6 +1389,7 @@ struct merge_tree_result merge_trees(str
 		return result;
 	}
 
+	debug("merge_trees ...\n");
 	code = git_merge_trees(index_only ? "-i": "-u", common, head, merge);
 
 	if ( code != 0 )
@@ -1397,17 +1414,22 @@ struct merge_tree_result merge_trees(str
 		renamesMerge = getRenames(merge, common, head, merge, &entries);
 		result.clean = processRenames(renamesHead, renamesMerge,
 					      branch1Name, branch2Name);
+		debug("\tprocessing entries...\n");
+		FILE *fp = git_update_index_pipe();
 		struct index_entry *e;
 		for ( e = entries; e; e = e->next ) {
 			if ( e->processed )
 				continue;
-			if ( !processEntry(e, branch1Name, branch2Name) )
+			if ( !processEntry(fp, e, branch1Name, branch2Name) )
 				result.clean = 0;
-			if ( result.clean || index_only )
-				result.tree = git_write_tree();
-			else
-				result.tree = NULL;
 		}
+		if (pclose(fp))
+			die("updating entry failed in git update-index");
+		if ( result.clean || index_only )
+			result.tree = git_write_tree();
+		else
+			result.tree = NULL;
+		debug("\t\tprocessing entries done\n");
 		free_rename_entries(&renamesMerge);
 		free_rename_entries(&renamesHead);
 		free_index_entries(&entries);
@@ -1419,6 +1441,7 @@ struct merge_tree_result merge_trees(str
 		       sha1_to_hex(result.tree->object.sha1));
 	}
 
+	debug("\tmerge_trees done\n");
 	return result;
 }
 
@@ -1558,7 +1581,9 @@ int main(int argc, char *argv[])
 		struct node_list *commits = NULL;
 		node_list_insert(h1, &commits);
 		node_list_insert(h2, &commits->next);
+		debug("building graph...\n");
 		struct graph *graph = graph_build(commits);
+		debug("\tbuilding graph...\n");
 		result = merge(h1, h2, branch1, branch2, graph, 0, NULL);
 	}
 	return result.clean ? 0: 1;
-- 
1.4.1.rc1.g8b09-dirty


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]