Hi, On Tue, 27 Jun 2006, Alex Riesen wrote: > - use a pipe to "git-update-index --index-info" instead of using command > line It might be an even better idea to call the cache functions directly. But in that case, you definitely want a function set_index_file() in environment.c, as well as a way to invalidate the active cache. Something like if (active_cache) { free(active_cache); active_cache = NULL; active_nr = active_alloc = 0; free(active_cache_tree); active_cache_tree = NULL; read_cache(); } > Good news is that it is faster: 6min vs 10min. Bad news is that it is > still not enough for me and it is only on Linux (Windows will be slower, > still testing), uses an awful lot of memory and CPU. No fear. I am sure we'll get there. > Path list optimization should be next (and I'll be glad if someone does > this before me). See below. > Also graph algos have a greate optimization potential (intersection, all > parents of a node, node_by_sha). The longer I look at it, the more I am convinced that this graph thing is not necessary. I was hesitant at first that addCommonRoot() would not be possible, but nobody says we cannot override commit->parents. I have some commits in a private branch to split out get_merge_bases() from merge-base.c, so I'll try that next. BTW, before actually finishing this, we might want to do away with capitalizedFunctionNames and 4-space indent. And here comes my patch (on top of what you sent) to avoid linked lists: merge-recursive.c | 61 ++++++++++++------------- path-list.c | 128 +++++++++++++++++++++++++++++++++++------------------ path-list.h | 31 +++++++------ 3 files changed, 129 insertions(+), 91 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index 547150e..b608a49 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -75,8 +75,8 @@ void print_index_entry(const char *text, } #endif -static struct path_list *currentFileSet; -static struct path_list *currentDirectorySet; +static struct path_list currentFileSet = {NULL, 0, 0}; +static struct path_list currentDirectorySet = {NULL, 0, 0}; static int output_indent = 0; @@ -297,8 +297,8 @@ static int read_tree_rt(struct tree *tre struct files_and_dirs { - struct path_list **files; - struct path_list **dirs; + struct path_list *files; + struct path_list *dirs; }; static int save_files_dirs(const char *sha1, @@ -309,19 +309,16 @@ static int save_files_dirs(const char *s struct files_and_dirs *data = data_; char *path = strdup(path_); - if (S_ISDIR(mode)) { + if (S_ISDIR(mode)) path_list_insert(path, data->dirs); - data->dirs = &(*data->dirs)->next; - } else { + else path_list_insert(path, data->files); - data->files = &(*data->files)->next; - } return READ_TREE_RECURSIVE; } int getFilesAndDirs(struct tree *tree, - struct path_list **files, - struct path_list **dirs) + struct path_list *files, + struct path_list *dirs) { struct files_and_dirs data; path_list_clear(files, 1); @@ -330,7 +327,7 @@ int getFilesAndDirs(struct tree *tree, data.dirs = dirs; if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 ) return 0; - return path_list_count(*files) + path_list_count(*dirs); + return path_list_count(files) + path_list_count(dirs); } struct index_entry *index_entry_find(struct index_entry *ents, const char *path) @@ -714,8 +711,8 @@ char *uniquePath(const char *path, const *p = '_'; int suffix = 0; struct stat st; - while ( path_list_has_path(currentFileSet, newpath) || - path_list_has_path(currentDirectorySet, newpath) || + while ( path_list_has_path(¤tFileSet, newpath) || + path_list_has_path(¤tDirectorySet, newpath) || lstat(newpath, &st) == 0 ) { sprintf(p, "_%d", suffix++); } @@ -980,23 +977,21 @@ int processRenames(struct rename_entry * // branchNameA, renamesA ? renamesA->src: "(none)", // branchNameB, renamesB ? renamesB->dst: "(none)"); - struct path_list *srcNames = NULL; + struct path_list srcNames = {NULL, 0, 0}; const struct rename_entry *sre; + char **src; for (sre = renamesA; sre; sre = sre->next) - if (!path_list_has_path(srcNames, sre->src)) - path_list_insert(sre->src, &srcNames); + path_list_insert(sre->src, &srcNames); for (sre = renamesB; sre; sre = sre->next) - if (!path_list_has_path(srcNames, sre->src)) - path_list_insert(sre->src, &srcNames); + path_list_insert(sre->src, &srcNames); FILE *fp = git_update_index_pipe(); - struct path_list *src; - for_each_path(src,srcNames) { + for_each_path(src,&srcNames) { struct rename_entry *renames1, *renames2, *ren1, *ren2; const char *branchName1, *branchName2; - ren1 = find_rename_bysrc(renamesA, src->path); - ren2 = find_rename_bysrc(renamesB, src->path); + ren1 = find_rename_bysrc(renamesA, *src); + ren2 = find_rename_bysrc(renamesB, *src); if ( ren1 ) { renames1 = renamesA; renames2 = renamesB; @@ -1029,17 +1024,17 @@ int processRenames(struct rename_entry * output("CONFLICT (rename/rename): " "Rename %s->%s in branch %s " "rename %s->%s in %s", - src->path, ren1->dst, branchName1, - src->path, ren2->dst, branchName2); + *src, ren1->dst, branchName1, + *src, ren2->dst, branchName2); cleanMerge = 0; char *dstName1 = ren1->dst, *dstName2 = ren2->dst; - if ( path_list_has_path(currentDirectorySet, ren1->dst) ) { + if ( path_list_has_path(¤tDirectorySet, ren1->dst) ) { dstName1 = uniquePath(ren1->dst, branchName1); output("%s is a directory in %s adding as %s instead", ren1->dst, branchName2, dstName1); removeFile(fp, 0, ren1->dst); } - if ( path_list_has_path(currentDirectorySet, ren2->dst) ) { + if ( path_list_has_path(¤tDirectorySet, ren2->dst) ) { dstName2 = uniquePath(ren2->dst, branchName2); output("%s is a directory in %s adding as %s instead", ren2->dst, branchName1, dstName2); @@ -1063,7 +1058,7 @@ int processRenames(struct rename_entry * ren2->dst, ren2->dst_sha, ren2->dst_mode, branchName1, branchName2); if ( mfi.merge || !mfi.clean ) - output("Renaming %s->%s", src->path, ren1->dst); + output("Renaming %s->%s", *src, ren1->dst); if ( mfi.merge ) output("Auto-merging %s", ren1->dst); @@ -1102,7 +1097,7 @@ int processRenames(struct rename_entry * char *newPath; struct rename_entry *dst2; - if ( path_list_has_path(currentDirectorySet, ren1->dst) ) { + if ( path_list_has_path(¤tDirectorySet, ren1->dst) ) { newPath = uniquePath(ren1->dst, branchName1); output("CONFLICT (rename/directory): Rename %s->%s in %s " " directory %s added in %s", @@ -1283,7 +1278,7 @@ int processEntry(struct index_entry *ent sha = bSha; conf = "directory/file"; } - if ( path_list_has_path(currentDirectorySet, path) ) { + if ( path_list_has_path(¤tDirectorySet, path) ) { cleanMerge = 0; const char *newPath = uniquePath(path, addBranch); output("CONFLICT (%s): There is a directory with name %s in %s. " @@ -1379,13 +1374,13 @@ struct merge_tree_result merge_trees(str result.tree = git_write_tree(); if ( !result.tree ) { - struct path_list *filesM = NULL, *dirsM = NULL; + struct path_list filesM = {NULL, 0, 0}, dirsM = {NULL, 0, 0}; getFilesAndDirs(head, ¤tFileSet, ¤tDirectorySet); getFilesAndDirs(merge, &filesM, &dirsM); - path_list_union_update(¤tFileSet, filesM); - path_list_union_update(¤tDirectorySet, dirsM); + path_list_union_update(¤tFileSet, &filesM); + path_list_union_update(¤tDirectorySet, &dirsM); struct index_entry *entries = unmergedCacheEntries(); struct rename_entry *renamesHead, *renamesMerge; diff --git a/path-list.c b/path-list.c index f969116..fbfc103 100644 --- a/path-list.c +++ b/path-list.c @@ -2,67 +2,109 @@ #include <stdio.h> #include "cache.h" #include "path-list.h" -static struct path_list *path_list_pool = NULL; - -struct path_list *path_list_insert(char *path, struct path_list **list) +/* if there is no exact match, point to the index where the entry could be + * inserted */ +static int get_entry_index(const struct path_list *container, const char *path, + int *exact_match) { - struct path_list *e; - if ( path_list_pool ) { - e = path_list_pool; - path_list_pool = path_list_pool->next; - } else - e = xmalloc(sizeof(struct path_list)); - e->next = *list; - e->path = path; - *list = e; - return e; + int left = -1, right = container->nr; + + while (left + 1 < right) { + int middle = (left + right) / 2; + int compare = strcmp(path, container->paths[middle]); + if (compare < 0) + right = middle; + else if (compare > 0) + left = middle; + else { + *exact_match = 1; + return middle; + } + } + + *exact_match = 0; + return right; } -void path_list_clear(struct path_list **list, int free_path) +/* returns -1-index if already exists */ +static int add_entry(struct path_list *container, const char *path) { - while ( *list ) { - struct path_list *next = (*list)->next; - if (free_path) - free((*list)->path); - (*list)->path = NULL; - (*list)->next = path_list_pool; - path_list_pool = *list; - *list = next; - } + int exact_match; + int index = get_entry_index(container, path, &exact_match); + + if (exact_match) + return -1 - index; + + if (container->nr + 1 >= container->alloc) { + container->alloc += 32; + container->paths = realloc(container->paths, + container->alloc * sizeof(char *)); + } + if (index < container->nr) + memmove(container->paths + index + 1, + container->paths + index, + (container->nr - index) * sizeof(char *)); + container->paths[index] = strdup(path); + container->nr++; + + return index; } -int path_list_count(const struct path_list *list) +char *path_list_insert(char *path, struct path_list *list) { - int n = 0; - for ( ; list; list = list->next ) - ++n; - return n; + int index = add_entry(list, path); + + if (index < 0) + index = 1 - index; + + return list->paths[index]; } int path_list_has_path(const struct path_list *list, const char *path) { - for ( ; list; list = list->next ) - if ( strcmp(path, list->path) == 0 ) - return 1; - return 0; + int exact_match; + get_entry_index(list, path, &exact_match); + return exact_match; } // in place -void path_list_union_update(struct path_list **dst, const struct path_list *src) +void path_list_union_update(struct path_list *dst, const struct path_list *src) { - const struct path_list *i = src; - while ( i ) { - if ( !path_list_has_path(*dst, i->path) ) - path_list_insert(i->path, dst); - i = i->next; - } + char **new_paths; + int i = 0, j = 0, nr = 0, alloc = dst->nr + dst->nr; + + new_paths = xcalloc(sizeof(char *), alloc); + + while (i < dst->nr || j < src->nr) { + char **entry = new_paths + nr++; + if (i == dst->nr) + *entry = src->paths[j++]; + else if (j == src->nr) + *entry = dst->paths[i++]; + else { + int compare = strcmp(dst->paths[i], src->paths[j]); + if (compare > 0) + *entry = src->paths[j++]; + else { + *entry = dst->paths[i++]; + if (!compare) + free(src->paths[j++]); + } + } + } + + free(dst->paths); + dst->paths = new_paths; + dst->nr = nr; + dst->alloc = alloc; } void print_path_list(const char *text, const struct path_list *p) { - if ( text ) - printf("%s\n", text); - for ( ; p; p = p->next ) - printf("%s\n", p->path); + int i; + if ( text ) + printf("%s\n", text); + for (i = 0; i < p->nr; i++) + printf("%s\n", p->paths[i]); } diff --git a/path-list.h b/path-list.h index 7e79f17..a12c7a4 100644 --- a/path-list.h +++ b/path-list.h @@ -3,29 +3,30 @@ #define _PATH_LIST_H_ struct path_list { - struct path_list *next; - char *path; + char **paths; + unsigned int nr, alloc; }; -#define for_each_path(p,list) for ( p = (list); p; p = p->next ) +#define for_each_path(p,list) for ( p = (list)->paths; p != (list)->paths + (list)->nr; p++ ) void print_path_list(const char *text, const struct path_list *p); -static inline struct path_list *path_list_shift(struct path_list **list) -{ - struct path_list *cur = *list; - *list = cur->next; - return cur; -} +#define path_list_count(list) (list)->nr -int path_list_count(const struct path_list *list); int path_list_has_path(const struct path_list *list, const char *path); -void path_list_union_update(struct path_list **dst, const struct path_list *src); -void path_list_clear(struct path_list **list, int free_path); -static inline void path_list_free(struct path_list *list) +void path_list_union_update(struct path_list *dst, const struct path_list *src); +static inline void path_list_clear(struct path_list *list, int free_paths) { - path_list_clear(&list, 1); + if (list->paths) { + int i; + if (free_paths) + for (i = 0; i < list->nr; i++) + free(list->paths[i]); + free(list->paths); + } + list->paths = NULL; + list->nr = list->alloc = 0; } -struct path_list *path_list_insert(char *path, struct path_list **list); +char *path_list_insert(char *path, struct path_list *list); #endif /* _PATH_LIST_H_ */ -- 1.4.1.rc1.g3583-dirty - : send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html