Testing index-pack generating the compatibilty hashes on a large repository (in this case the linux kernel) resulted in a stack overflow. I confirmed this by using ulimit -s to force the stack to a much larger size, and rerunning the test and the code succeeded. Still it is not a good look to overflow the stack in the default configuration. Ideally the objects would be ordered such that no object has any references to any object that comes after it. With such an ordering convert_object_file followed by hash_object_file to could just be on every object in order to compute the compatibility hashes for every object. Unfortunately the work to compute such an order is roughly equaivalent to the depth first processing compute_compat_oid is doing. The objects have to be loaded to get which other objects they reference. Knowning which objects reference which others is necessary to compute such an order. Long story short I can see how to move the depth first traversal into a topological sort, but that just moves the problem that caused the deep recursion into another function, and makes everything more expensive by requiring reading the objects yet another time. Avoid stack overflow by using an explicitly stack made of heap allocated objects instead of using the C call stack. To get a feel for how much this explicit stack consumes I instrumented up the code. Testing against a linux kernel 2.16GiB packfile. This packfile had 9,033,248 objects, and 7,470,317 deltas. There were 6,543,758 mappings that cound not be computed opportunistically when the data was first read. In the function compute_compat_oid I measured a maximum cco stack depth of 66,415. I measured a maximum memory consumption of 103,783,520 bytes, or about 1563 bytes per level of the stack. In short call it 100MiB extra to compute the mappings in a 2GiB packfile. Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> --- builtin/index-pack.c | 106 +++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f5da671ed82d..6827d14b91ce 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -2015,54 +2015,90 @@ static void *get_object_data(struct object_entry *obj, size_t *result_size) return NULL; /* The code never reaches here */ } -static void compute_compat_oid(struct object_entry *obj) -{ - struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; +struct cco { + struct cco *prev; + struct object_entry *obj; struct object_file_convert_state state; - struct strbuf out = STRBUF_INIT; + struct strbuf out; size_t data_size; void *data; - int ret; +}; - if (obj->idx.compat_oid.algo) - return; +static struct cco *cco_push(struct cco *prev, struct object_entry *obj) +{ + struct repository *repo = the_repository; + const struct git_hash_algo *algo = repo->hash_algo; + const struct git_hash_algo *compat = repo->compat_hash_algo; + struct cco *cco; if (obj->real_type == OBJ_BLOB) - die("Blob object not converted"); + BUG("Blob object not converted"); - data = get_object_data(obj, &data_size); + cco = xmallocz(sizeof(*cco)); + cco->prev = prev; + cco->obj = obj; + strbuf_init(&cco->out, 0); - convert_object_file_begin(&state, &out, algo, compat, - data, data_size, obj->real_type); + cco->data = get_object_data(obj, &cco->data_size); - for (;;) { - struct object_entry *pobj; - ret = convert_object_file_step(&state); - if (ret != 1) - break; - /* Does it name an object in the pack? */ - pobj = find_in_oid_index(&state.oid); - if (pobj) { - compute_compat_oid(pobj); - oidcpy(&state.mapped_oid, &pobj->idx.compat_oid); - } else if (repo_oid_to_algop(repo, &state.oid, compat, - &state.mapped_oid)) - die(_("No mapping for oid %s to %s\n"), - oid_to_hex(&state.oid), compat->name); - } - convert_object_file_end(&state, ret); - if (ret != 0) - die(_("Bad object %s\n"), oid_to_hex(&obj->idx.oid)); - hash_object_file(compat, out.buf, out.len, obj->real_type, - &obj->idx.compat_oid); - strbuf_release(&out); + convert_object_file_begin(&cco->state, &cco->out, algo, compat, + cco->data, cco->data_size, obj->real_type); + return cco; +} - free(data); +static struct cco *cco_pop(struct cco *cco, int ret) +{ + struct repository *repo = the_repository; + const struct git_hash_algo *compat = repo->compat_hash_algo; + struct cco *prev = cco->prev; + + convert_object_file_end(&cco->state, ret); + if (ret != 0) + die(_("Bad object %s\n"), oid_to_hex(&cco->obj->idx.oid)); + hash_object_file(compat, cco->out.buf, cco->out.len, + cco->obj->real_type, &cco->obj->idx.compat_oid); + strbuf_release(&cco->out); + if (prev) + oidcpy(&prev->state.mapped_oid, &cco->obj->idx.compat_oid); nr_resolved_mappings++; display_progress(progress, nr_resolved_mappings); + + free(cco->data); + free(cco); + + return prev; +} + +static void compute_compat_oid(struct object_entry *obj) +{ + struct repository *repo = the_repository; + const struct git_hash_algo *compat = repo->compat_hash_algo; + struct cco *cco; + + cco = cco_push(NULL, obj); + for (;cco;) { + struct object_entry *pobj; + + int ret = convert_object_file_step(&cco->state); + if (ret != 1) { + cco = cco_pop(cco, ret); + continue; + } + + /* Does it name an object in the pack? */ + pobj = find_in_oid_index(&cco->state.oid); + if (pobj && pobj->idx.compat_oid.algo) + oidcpy(&cco->state.mapped_oid, &pobj->idx.compat_oid); + else if (pobj) + cco = cco_push(cco, pobj); + else if (repo_oid_to_algop(repo, &cco->state.oid, compat, + &cco->state.mapped_oid)) + die(_("When converting %s no mapping for oid %s to %s\n"), + oid_to_hex(&cco->obj->idx.oid), + oid_to_hex(&cco->state.oid), + compat->name); + } } static void compute_compat_oids(void) -- 2.41.0