On 6/28/06, Alex Riesen <fork0@xxxxxxxxxxx> wrote:
Johannes Schindelin, Tue, Jun 27, 2006 18:42:51 +0200: > > - use a pipe to "git-update-index --index-info" instead of using command > > line
...and to take it a step further, a patch (0002) to avoid too many calls to git-write-tree and to git-update-index. Brought merge times on my test monster (~25k files) down to 2min 30sec (from something around 11 min). The 0001 patch is just C++ comments.
From 36147880afad3a2d53f6474bec069e29d82a74a8 Mon Sep 17 00:00:00 2001 From: Riesen <ARiesen@xxxxxxxxxxxxxxxxxxxxxxxxxx> Date: Wed, 28 Jun 2006 09:46:47 +0200 Subject: update comments and remove C++ //... --- graph.c | 14 ++++-- merge-recursive.c | 131 +++++++++++++++++++++++++---------------------------- path-list.c | 2 - 3 files changed, 72 insertions(+), 75 deletions(-) diff --git a/graph.c b/graph.c index fa2bfee..1a28302 100644 --- a/graph.c +++ b/graph.c @@ -5,7 +5,7 @@ #include "cache.h" #include "commit.h" #include "graph.h" -// does not belong here +/* does not belong here */ struct tree *git_write_tree() { #if 0 @@ -189,8 +189,10 @@ struct node_list *node_list_find_node(co return (struct node_list*)p; } -// a & b. a and are invalid after the call, -// the result will contain all the common nodes +/* + * a & b. a and are invalid after the call, + * the result will contain all the common nodes + */ struct node_list *node_list_intersect(struct node_list *a, struct node_list *b) { @@ -214,7 +216,7 @@ struct node *graph_add_node(struct graph { struct node_list **bucket; if ( node->virtual ) - // virtual nodes hashed by lowest byte of virtual_id + /* virtual nodes hashed by lowest byte of virtual_id */ bucket = graph->commits + (node->virtual_id & 0xff); else bucket = graph->commits + node->commit->object.sha1[0]; @@ -249,4 +251,6 @@ void node_list_print(const char *msg, co } #endif -// vim: sw=8 noet +/* +vim: sw=8 noet +*/ diff --git a/merge-recursive.c b/merge-recursive.c index 9bbb426..ba5b024 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1,7 +1,7 @@ -// -// Recursive Merge algorithm stolen from git-merge-recursive.py by -// Fredrik Kuivinen. -// +/* + * Recursive Merge algorithm stolen from git-merge-recursive.py by + * Fredrik Kuivinen. + */ #include <stdarg.h> #include <string.h> #include <assert.h> @@ -47,7 +47,7 @@ struct index_entry struct index_entry *index_entry_alloc(const char *path) { - size_t n = strlen(path); // index_entry::path has room for \0 + size_t n = strlen(path); /* index_entry::path has room for \0 */ struct index_entry *p = xmalloc(sizeof(struct index_entry) + n); if ( !p ) return NULL; @@ -102,14 +102,16 @@ static void setup_index(int temp) setenv("GIT_INDEX_FILE", idx, 1); } -// This is a global variable which is used in a number of places but -// only written to in the 'merge' function. - -// index_only == 1 => Don't leave any non-stage 0 entries in the cache and -// don't update the working directory. -// 0 => Leave unmerged entries in the cache and update -// the working directory. -static int index_only = 0; // cacheOnly +/* + * This is a global variable which is used in a number of places but + * only written to in the 'merge' function. + * + * index_only == 1 => Don't leave any non-stage 0 entries in the cache and + * don't update the working directory. + * 0 => Leave unmerged entries in the cache and update + * the working directory. + */ +static int index_only = 0; static int git_read_tree(const struct tree *tree) { @@ -157,10 +159,10 @@ struct merge_tree_result merge_trees(str static int fget_sha1(unsigned char *sha, FILE *fp, int *ch); -// The entry point to the merge code - -// Merge the commits h1 and h2, return the resulting virtual -// commit object and a flag indicating the cleaness of the merge. +/* + * Merge the commits h1 and h2, return the resulting virtual + * commit object and a flag indicating the cleaness of the merge. + */ static struct merge_result merge(struct node *h1, struct node *h2, @@ -262,7 +264,7 @@ typedef int (*read_tree_rt_fn_t)(const c unsigned mode, void *data); -// git-ls-tree -r -t <tree> +/* git-ls-tree -r -t <tree> */ static int read_tree_rt(struct tree *tree, const char *base, int baselen, @@ -391,8 +393,10 @@ static int find_entry(const char *sha, return READ_TREE_RECURSIVE; } -// Returns a CacheEntry object which doesn't have to correspond to -// a real cache entry in Git's index. +/* + * Returns a index_entry instance which doesn't have to correspond to + * a real cache entry in Git's index. + */ struct index_entry *index_entry_from_db(const char *path, struct tree *o, struct tree *a, @@ -404,24 +408,18 @@ struct index_entry *index_entry_from_db( data.sha = e->stages[1].sha; data.mode = &e->stages[1].mode; if ( read_tree_rt(o, "", 0, find_entry, &data) != READ_TREE_FOUND ) { - // fprintf(stderr, "1: %s:%s not found\n", - // sha1_to_hex(o->object.sha1), path); memcpy(e->stages[1].sha, null_sha1, 20); e->stages[1].mode = 0; } data.sha = e->stages[2].sha; data.mode = &e->stages[2].mode; if ( read_tree_rt(a, "", 0, find_entry, &data) != READ_TREE_FOUND ) { - // fprintf(stderr, "2: %s:%s not found\n", - // sha1_to_hex(a->object.sha1), path); memcpy(e->stages[2].sha, null_sha1, 20); e->stages[2].mode = 0; } data.sha = e->stages[3].sha; data.mode = &e->stages[3].mode; if ( read_tree_rt(b, "", 0, find_entry, &data) != READ_TREE_FOUND ) { - // fprintf(stderr, "3: %s:%s not found\n", - // sha1_to_hex(b->object.sha1), path); memcpy(e->stages[3].sha, null_sha1, 20); e->stages[3].mode = 0; } @@ -469,8 +467,11 @@ static int fget_sha1(unsigned char *sha, return -1; return 0; } -// Create a dictionary mapping file names to CacheEntry objects. The -// dictionary contains one entry for every path with a non-zero stage entry. + +/* + * Create a dictionary mapping file names to CacheEntry objects. The + * dictionary contains one entry for every path with a non-zero stage entry. + */ struct index_entry *unmergedCacheEntries() { struct index_entry *unmerged = NULL; @@ -484,17 +485,17 @@ struct index_entry *unmergedCacheEntries char stage = '0'; char path[PATH_MAX]; int p; - // mode + /* mode */ if ( fget_mode(&mode, fp, &ch) ) goto wait_eol; if ( '\x20' != ch ) goto wait_eol; - // SHA1 + /* SHA1 */ if ( fget_sha1(sha, fp, &ch) ) goto wait_eol; if ( '\x20' != ch ) goto wait_eol; - // stage + /* stage */ if ( (ch = fgetc(fp)) != EOF ) { stage = ch; if ( ch < '1' || ch > '3' ) @@ -502,7 +503,7 @@ struct index_entry *unmergedCacheEntries } if ( (ch = fgetc(fp)) == EOF || '\t' != ch ) goto wait_eol; - // path + /* path */ for ( p = 0; (ch = fgetc(fp)) != EOF; ++p ) { path[p] = ch; if ( !ch ) @@ -515,7 +516,6 @@ struct index_entry *unmergedCacheEntries } if ( ch ) goto wait_eol; - // printf("unmerged %08o %s %c %s\n",mode,sha1_to_hex(sha),stage,path); struct index_entry *e = index_entry_get(&unmerged, path); e->stages[stage - '1' + 1].mode = mode; memcpy(e->stages[stage - '1' + 1].sha, sha, 20); @@ -583,10 +583,12 @@ void free_rename_entries(struct rename_e } } -// Get information of all renames which occured between 'oTree' and -// 'tree'. We need the three trees in the merge ('oTree', 'aTree' and -// 'bTree') to be able to associate the correct cache entries with -// the rename information. 'tree' is always equal to either aTree or bTree. +/* + * Get information of all renames which occured between 'oTree' and + * 'tree'. We need the three trees in the merge ('oTree', 'aTree' and + * 'bTree') to be able to associate the correct cache entries with + * the rename information. 'tree' is always equal to either aTree or bTree. + */ struct rename_entry *getRenames(struct tree *tree, struct tree *oTree, struct tree *aTree, @@ -831,7 +833,6 @@ void updateFileExt(FILE *fp, } if ( updateCache ) { - // XXX just always use "git update-index --index-info"? fprintf(fp, "%06o %s\t%s", mode, sha1_to_hex(sha), path); fputc('\0', fp); } @@ -846,8 +847,8 @@ void updateFile(FILE *fp, updateFileExt(fp, sha, mode, path, index_only || clean, !index_only); } -// Low level file merging, update and removal -// ------------------------------------------ +/* Low level file merging, update and removal */ + struct merge_file_info { unsigned char sha[20]; @@ -991,9 +992,6 @@ int processRenames(struct rename_entry * const char *branchNameB) { int cleanMerge = 1; - // printf("process renames %s:%s -> %s:%s\n", - // branchNameA, renamesA ? renamesA->src: "(none)", - // branchNameB, renamesB ? renamesB->dst: "(none)"); struct path_list srcNames = {NULL, 0, 0}; const struct rename_entry *sre; @@ -1033,7 +1031,7 @@ int processRenames(struct rename_entry * ren1->processed = 1; if ( ren2 ) { - // Renamed in 1 and renamed in 2 + /* Renamed in 1 and renamed in 2 */ if ( strcmp(ren1->src, ren2->src) != 0 ) die("ren1.srcName != ren2.srcName"); ren2->dst_entry->processed = 1; @@ -1097,7 +1095,7 @@ int processRenames(struct rename_entry * updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst); } } else { - // Renamed in 1, maybe changed in 2 + /* Renamed in 1, maybe changed in 2 */ removeFile(fp, 1, ren1->src); unsigned char srcShaOtherBranch[20], dstShaOtherBranch[20]; @@ -1224,15 +1222,15 @@ static int sha_eq(const unsigned char *a return a && b && memcmp(a, b, 20) == 0; } -// Per entry merge function -// ------------------------ -// Merge one cache entry. +/* Per entry merge function */ int processEntry(struct index_entry *entry, const char *branch1Name, const char *branch2Name) { - // printf("processing entry, clean cache: %s\n", index_only ? "yes": "no"); - // print_index_entry("\tpath: ", entry); + /* + printf("processing entry, clean cache: %s\n", index_only ? "yes": "no"); + print_index_entry("\tpath: ", entry); + */ int cleanMerge = 1; const char *path = entry->path; unsigned char *oSha = has_sha(entry->stages[1].sha); @@ -1244,18 +1242,17 @@ int processEntry(struct index_entry *ent FILE *fp = git_update_index_pipe(); if ( oSha && (!aSha || !bSha) ) { - // - // Case A: Deleted in one - // + /* Case A: Deleted in one */ if ( (!aSha && !bSha) || (sha_eq(aSha, oSha) && !bSha) || (!aSha && sha_eq(bSha, oSha)) ) { - // Deleted in both or deleted in one and unchanged in the other + /* Deleted in both or deleted in one and + * unchanged in the other */ if ( aSha ) output("Removing %s", path); removeFile(fp, 1, path); } else { - // Deleted in one and changed in the other + /* Deleted in one and changed in the other */ cleanMerge = 0; if ( !aSha ) { output("CONFLICT (delete/modify): %s deleted in %s " @@ -1274,9 +1271,7 @@ int processEntry(struct index_entry *ent } else if ( (!oSha && aSha && !bSha) || (!oSha && !aSha && bSha) ) { - // - // Case B: Added in one. - // + /* Case B: Added in one. */ const char *addBranch; const char *otherBranch; unsigned mode; @@ -1309,9 +1304,7 @@ int processEntry(struct index_entry *ent updateFile(fp, 1, sha, mode, path); } } else if ( !oSha && aSha && bSha ) { - // - // Case C: Added in both (check for same permissions). - // + /* Case C: Added in both (check for same permissions). */ if ( sha_eq(aSha, bSha) ) { if ( aMode != bMode ) { cleanMerge = 0; @@ -1321,7 +1314,7 @@ int processEntry(struct index_entry *ent output("CONFLICT: adding with permission: %06o", aMode); updateFile(fp, 0, aSha, aMode, path); } else { - // This case is handled by git-read-tree + /* This case is handled by git-read-tree */ assert(0 && "This case must be handled by git-read-tree"); } } else { @@ -1337,9 +1330,7 @@ int processEntry(struct index_entry *ent } } else if ( oSha && aSha && bSha ) { - // - // case D: Modified in both, but differently. - // + /* case D: Modified in both, but differently. */ output("Auto-merging %s\n", path); struct merge_file_info mfi; mfi = mergeFile(path, oSha, oMode, @@ -1472,15 +1463,15 @@ struct graph *graph_build(struct node_li break; if (EOF == ch) break; - // a commit + /* a commit */ struct node *node = graph_node_bysha(graph, sha); if (!node) { node = node_alloc(lookup_commit(sha)); graph_add_node(graph, node); } - // ...and its parents. I assume a parent cannot be mentioned - // before the children. + /* ...and its parents. I assume a parent cannot be mentioned + before the children. */ struct node_list *parents = NULL; while ('\n' != ch) { if (fget_sha1(sha, fp, &ch)) { @@ -1573,4 +1564,6 @@ int main(int argc, char *argv[]) return result.clean ? 0: 1; } -// vim: sw=8 noet +/* +vim: sw=8 noet +*/ diff --git a/path-list.c b/path-list.c index fbfc103..95395ab 100644 --- a/path-list.c +++ b/path-list.c @@ -67,7 +67,7 @@ int path_list_has_path(const struct path return exact_match; } -// in place +/* in place */ void path_list_union_update(struct path_list *dst, const struct path_list *src) { char **new_paths; -- 1.4.1.rc1.g8b09-dirty
From b2fbfc22526fb2a1f56ef56836b7412c53524e60 Mon Sep 17 00:00:00 2001 From: Riesen <ARiesen@xxxxxxxxxxxxxxxxxxxxxxxxxx> Date: Wed, 28 Jun 2006 10:45:05 +0200 Subject: fix processEntries to avoid multiple calls to write-tree and update-index --- merge-recursive.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 files changed, 36 insertions(+), 11 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index ba5b024..5b7ed51 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -19,6 +19,13 @@ #include "run-command.h" #include "graph.h" #include "path-list.h" +#define DEBUG +#ifdef DEBUG +#define debug(args, ...) fprintf(stderr, args, ## __VA_ARGS__) +#else +#define debug(args, ...) +#endif + #define for_each_commit(p,list) for ( p = (list); p; p = p->next ) struct merge_result @@ -345,9 +352,14 @@ int getFilesAndDirs(struct tree *tree, path_list_clear(dirs, 1); data.files = files; data.dirs = dirs; - if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 ) + debug("getFilesAndDirs ...\n"); + if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 ) { + debug("\tgetFilesAndDirs done (0)\n"); return 0; - return path_list_count(files) + path_list_count(dirs); + } + int n = path_list_count(files) + path_list_count(dirs); + debug("\tgetFilesAndDirs done (%d)\n", n); + return n; } struct index_entry *index_entry_find(struct index_entry *ents, const char *path) @@ -478,6 +490,7 @@ struct index_entry *unmergedCacheEntries FILE *fp = popen("git-ls-files -z --unmerged", "r"); if ( !fp ) return NULL; + debug("unmergedCacheEntries...\n"); int ch; while ( !feof(fp) ) { unsigned mode; @@ -524,6 +537,7 @@ struct index_entry *unmergedCacheEntries while ( (ch = fgetc(fp)) != EOF && ch ); } pclose(fp); + debug("\tunmergedCacheEntries done\n"); return unmerged; } @@ -595,6 +609,7 @@ struct rename_entry *getRenames(struct t struct tree *bTree, struct index_entry **entries) { + debug("getRenames ...\n"); struct rename_entry *renames = NULL; struct rename_entry **rptr = &renames; struct diff_options opts; @@ -639,6 +654,7 @@ struct rename_entry *getRenames(struct t } opts.output_format = DIFF_FORMAT_NO_OUTPUT; diff_flush(&opts); + debug("\tgetRenames done\n"); return renames; } @@ -991,6 +1007,7 @@ int processRenames(struct rename_entry * const char *branchNameA, const char *branchNameB) { + debug("processRenames...\n"); int cleanMerge = 1; struct path_list srcNames = {NULL, 0, 0}; @@ -1207,6 +1224,7 @@ int processRenames(struct rename_entry * if (pclose(fp)) { die("git update-index --index-info failed"); } + debug("\tprocessRenames done\n"); return cleanMerge; } @@ -1223,7 +1241,8 @@ static int sha_eq(const unsigned char *a } /* Per entry merge function */ -int processEntry(struct index_entry *entry, +int processEntry(FILE *fp, + struct index_entry *entry, const char *branch1Name, const char *branch2Name) { @@ -1239,7 +1258,6 @@ int processEntry(struct index_entry *ent unsigned oMode = entry->stages[1].mode; unsigned aMode = entry->stages[2].mode; unsigned bMode = entry->stages[3].mode; - FILE *fp = git_update_index_pipe(); if ( oSha && (!aSha || !bSha) ) { /* Case A: Deleted in one */ @@ -1353,8 +1371,6 @@ int processEntry(struct index_entry *ent } else die("Fatal merge failure, shouldn't happen."); - if (pclose(fp)) - die("updating entry failed in git update-index"); return cleanMerge; } @@ -1373,6 +1389,7 @@ struct merge_tree_result merge_trees(str return result; } + debug("merge_trees ...\n"); code = git_merge_trees(index_only ? "-i": "-u", common, head, merge); if ( code != 0 ) @@ -1397,17 +1414,22 @@ struct merge_tree_result merge_trees(str renamesMerge = getRenames(merge, common, head, merge, &entries); result.clean = processRenames(renamesHead, renamesMerge, branch1Name, branch2Name); + debug("\tprocessing entries...\n"); + FILE *fp = git_update_index_pipe(); struct index_entry *e; for ( e = entries; e; e = e->next ) { if ( e->processed ) continue; - if ( !processEntry(e, branch1Name, branch2Name) ) + if ( !processEntry(fp, e, branch1Name, branch2Name) ) result.clean = 0; - if ( result.clean || index_only ) - result.tree = git_write_tree(); - else - result.tree = NULL; } + if (pclose(fp)) + die("updating entry failed in git update-index"); + if ( result.clean || index_only ) + result.tree = git_write_tree(); + else + result.tree = NULL; + debug("\t\tprocessing entries done\n"); free_rename_entries(&renamesMerge); free_rename_entries(&renamesHead); free_index_entries(&entries); @@ -1419,6 +1441,7 @@ struct merge_tree_result merge_trees(str sha1_to_hex(result.tree->object.sha1)); } + debug("\tmerge_trees done\n"); return result; } @@ -1558,7 +1581,9 @@ int main(int argc, char *argv[]) struct node_list *commits = NULL; node_list_insert(h1, &commits); node_list_insert(h2, &commits->next); + debug("building graph...\n"); struct graph *graph = graph_build(commits); + debug("\tbuilding graph...\n"); result = merge(h1, h2, branch1, branch2, graph, 0, NULL); } return result.clean ? 0: 1; -- 1.4.1.rc1.g8b09-dirty