Jens Axboe noticed that recent "git push" has become very slow since we made --thin transfer the default. Thin pack generation to push a handful revisions that touch relatively small number of paths out of huge tree was stupid; it registered _everything_ from the excluded revisions. As a result, "Counting objects" phase was unnecessarily expensive. This changes the logic to register the blobs and trees from excluded revisions only for paths we are actually going to send to the other end. Signed-off-by: Junio C Hamano <junkio@xxxxxxx> --- pack-objects.c | 119 +++++++++++++++++++++++++++++++++++++------------------- 1 files changed, 79 insertions(+), 40 deletions(-) b5026f319fc873d03a02f15992b7e431b4b5ad03 diff --git a/pack-objects.c b/pack-objects.c index 9346392..f1bd8a6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -505,24 +505,17 @@ static unsigned name_hash(struct name_pa * but close enough. */ hash = (name_hash<<DIRBITS) | (hash & ((1U<<DIRBITS )-1)); - - if (0) { /* debug */ - n = name + strlen(name); - if (n != name && n[-1] == '\n') - n--; - while (name <= --n) - fputc(*n, stderr); - for (p = path; p; p = p->up) { - fputc('/', stderr); - n = p->elem + p->len; - while (p->elem <= --n) - fputc(*n, stderr); - } - fprintf(stderr, "\t%08x\n", hash); - } return hash; } + +static struct pbase_tree { + struct pbase_tree *next; + unsigned char sha1[20]; + void *tree_data; + unsigned long tree_size; +} *pbase_tree; + static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude) { unsigned int idx = nr_objects; @@ -585,58 +578,99 @@ static int add_object_entry(const unsign } } return status; +} + +static int name_cmp_len(const char *name) +{ + int i; + for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++) + ; + return i; } -static void add_pbase_tree(struct tree_desc *tree, struct name_path *up) +static void add_pbase_object(struct tree_desc *tree, + struct name_path *up, + const char *name, + int cmplen) { while (tree->size) { const unsigned char *sha1; - const char *name; - unsigned mode, hash; + const char *entry_name; + int entry_len; + unsigned mode; unsigned long size; char type[20]; - sha1 = tree_entry_extract(tree, &name, &mode); + sha1 = tree_entry_extract(tree, &entry_name, &mode); update_tree_entry(tree); - if (!has_sha1_file(sha1)) - continue; - if (sha1_object_info(sha1, type, &size)) - continue; - - hash = name_hash(up, name); - if (!add_object_entry(sha1, hash, 1)) + entry_len = strlen(entry_name); + if (entry_len != cmplen || + memcmp(entry_name, name, cmplen) || + !has_sha1_file(sha1) || + sha1_object_info(sha1, type, &size)) continue; - + if (name[cmplen] != '/') { + unsigned hash = name_hash(up, name); + add_object_entry(sha1, hash, 1); + return; + } if (!strcmp(type, tree_type)) { struct tree_desc sub; void *elem; struct name_path me; + /* We probably should cache these + * intermediate tree buffers... + */ elem = read_sha1_file(sha1, type, &sub.size); sub.buf = elem; if (sub.buf) { + const char *down = name+cmplen+1; + int downlen = name_cmp_len(down); + me.up = up; - me.elem = name; - me.len = strlen(name); - add_pbase_tree(&sub, &me); + me.elem = entry_name; + me.len = entry_len; + add_pbase_object(&sub, &me, down, downlen); free(elem); } } } } + +static void add_preferred_base_object(char *name) +{ + struct pbase_tree *it; + int cmplen = name_cmp_len(name); + for (it = pbase_tree; it; it = it->next) { + if (cmplen == 0) { + unsigned hash = name_hash(NULL, ""); + add_object_entry(it->sha1, hash, 1); + } + else { + struct tree_desc tree; + tree.buf = it->tree_data; + tree.size = it->tree_size; + add_pbase_object(&tree, NULL, name, cmplen); + } + } +} + static void add_preferred_base(unsigned char *sha1) { - struct tree_desc tree; - void *elem; + struct pbase_tree *it; - elem = read_object_with_reference(sha1, tree_type, &tree.size, NULL); - tree.buf = elem; - if (!tree.buf) + for (it = pbase_tree; it && memcmp(it->sha1, sha1, 20); it = it->next) + ; + if (it) return; - if (add_object_entry(sha1, name_hash(NULL, ""), 1)) - add_pbase_tree(&tree, NULL); - free(elem); + it = xcalloc(1, sizeof(*it)); + it->next = pbase_tree; + pbase_tree = it; + memcpy(it->sha1, sha1, 20); + it->tree_data = read_object_with_reference(sha1, tree_type, + &it->tree_size, NULL); } static void check_object(struct object_entry *entry) @@ -1051,6 +1085,7 @@ int main(int argc, char **argv) char line[PATH_MAX + 20]; int window = 10, depth = 10, pack_to_stdout = 0; struct object_entry **list; + int num_preferred_base = 0; int i; setup_git_directory(); @@ -1116,6 +1151,7 @@ int main(int argc, char **argv) for (;;) { unsigned char sha1[20]; + unsigned hash; if (!fgets(line, sizeof(line), stdin)) { if (feof(stdin)) @@ -1132,12 +1168,15 @@ int main(int argc, char **argv) if (get_sha1_hex(line+1, sha1)) die("expected edge sha1, got garbage:\n %s", line+1); - add_preferred_base(sha1); + if (num_preferred_base++ < window) + add_preferred_base(sha1); continue; } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - add_object_entry(sha1, name_hash(NULL, line+41), 0); + add_preferred_base_object(line+41); + hash = name_hash(NULL, line+41); + add_object_entry(sha1, hash, 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects); -- 1.3.0.rc2.g9cda - : send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html