Thomas Rast <trast@xxxxxxxxxxxxxxx> writes: > Similar to the recursion in packed_object_info(), this leads to > problems on stack-space-constrained systems in the presence of long > delta chains. > > We proceed in three phases: > > 1. Dig through the delta chain, saving each delta object's offsets and > size on an ad-hoc stack. > > 2. Unpack the base object at the bottom. > > 3. Unpack and apply the deltas from the stack. > > Signed-off-by: Thomas Rast <trast@xxxxxxxxxxxxxxx> > --- Sounds sensible. Do we keep the packfiles open that hold the deltas involved in the chain so that they won't be removed by a concurrent repack? I do not think this rewrite changes anything with respect to that access pattern, though. Will replace and merge to 'next'. Thanks. > sha1_file.c | 231 +++++++++++++++++++++++++++++++++++++++--------------------- > 1 file changed, 150 insertions(+), 81 deletions(-) > > diff --git a/sha1_file.c b/sha1_file.c > index da41f51..f9191aa 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > @@ -1943,68 +1943,6 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, > static void *read_object(const unsigned char *sha1, enum object_type *type, > unsigned long *size); > > -static void *unpack_delta_entry(struct packed_git *p, > - struct pack_window **w_curs, > - off_t curpos, > - unsigned long delta_size, > - off_t obj_offset, > - enum object_type *type, > - unsigned long *sizep) > -{ > - void *delta_data, *result, *base; > - unsigned long base_size; > - off_t base_offset; > - > - base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset); > - if (!base_offset) { > - error("failed to validate delta base reference " > - "at offset %"PRIuMAX" from %s", > - (uintmax_t)curpos, p->pack_name); > - return NULL; > - } > - unuse_pack(w_curs); > - base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0); > - if (!base) { > - /* > - * We're probably in deep shit, but let's try to fetch > - * the required base anyway from another pack or loose. > - * This is costly but should happen only in the presence > - * of a corrupted pack, and is better than failing outright. > - */ > - struct revindex_entry *revidx; > - const unsigned char *base_sha1; > - revidx = find_pack_revindex(p, base_offset); > - if (!revidx) > - return NULL; > - base_sha1 = nth_packed_object_sha1(p, revidx->nr); > - error("failed to read delta base object %s" > - " at offset %"PRIuMAX" from %s", > - sha1_to_hex(base_sha1), (uintmax_t)base_offset, > - p->pack_name); > - mark_bad_packed_object(p, base_sha1); > - base = read_object(base_sha1, type, &base_size); > - if (!base) > - return NULL; > - } > - > - delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size); > - if (!delta_data) { > - error("failed to unpack compressed delta " > - "at offset %"PRIuMAX" from %s", > - (uintmax_t)curpos, p->pack_name); > - free(base); > - return NULL; > - } > - result = patch_delta(base, base_size, > - delta_data, delta_size, > - sizep); > - if (!result) > - die("failed to apply delta"); > - free(delta_data); > - add_delta_base_cache(p, base_offset, base, base_size, *type); > - return result; > -} > - > static void write_pack_access_log(struct packed_git *p, off_t obj_offset) > { > static FILE *log_file; > @@ -2025,48 +1963,179 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset) > > int do_check_packed_object_crc; > > +#define UNPACK_ENTRY_STACK_PREALLOC 64 > +struct unpack_entry_stack_ent { > + off_t obj_offset; > + off_t curpos; > + unsigned long size; > +}; > + > void *unpack_entry(struct packed_git *p, off_t obj_offset, > - enum object_type *type, unsigned long *sizep) > + enum object_type *final_type, unsigned long *final_size) > { > struct pack_window *w_curs = NULL; > off_t curpos = obj_offset; > - void *data; > + void *data = NULL; > + unsigned long size; > + enum object_type type; > + struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC]; > + struct unpack_entry_stack_ent *delta_stack = small_delta_stack; > + int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; > + int base_from_cache = 0; > > if (log_pack_access) > write_pack_access_log(p, obj_offset); > > - if (do_check_packed_object_crc && p->index_version > 1) { > - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); > - unsigned long len = revidx[1].offset - obj_offset; > - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { > - const unsigned char *sha1 = > - nth_packed_object_sha1(p, revidx->nr); > - error("bad packed object CRC for %s", > - sha1_to_hex(sha1)); > - mark_bad_packed_object(p, sha1); > - unuse_pack(&w_curs); > - return NULL; > + /* PHASE 1: drill down to the innermost base object */ > + for (;;) { > + off_t base_offset; > + int i; > + struct delta_base_cache_entry *ent; > + > + if (do_check_packed_object_crc && p->index_version > 1) { > + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); > + unsigned long len = revidx[1].offset - obj_offset; > + if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { > + const unsigned char *sha1 = > + nth_packed_object_sha1(p, revidx->nr); > + error("bad packed object CRC for %s", > + sha1_to_hex(sha1)); > + mark_bad_packed_object(p, sha1); > + unuse_pack(&w_curs); > + return NULL; > + } > + } > + > + ent = get_delta_base_cache_entry(p, curpos); > + if (eq_delta_base_cache_entry(ent, p, curpos)) { > + type = ent->type; > + data = ent->data; > + size = ent->size; > + clear_delta_base_cache_entry(ent); > + base_from_cache = 1; > + break; > + } > + > + type = unpack_object_header(p, &w_curs, &curpos, &size); > + if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA) > + break; > + > + base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); > + if (!base_offset) { > + error("failed to validate delta base reference " > + "at offset %"PRIuMAX" from %s", > + (uintmax_t)curpos, p->pack_name); > + /* bail to phase 2, in hopes of recovery */ > + data = NULL; > + break; > } > + > + /* push object, proceed to base */ > + if (delta_stack_nr >= delta_stack_alloc > + && delta_stack == small_delta_stack) { > + delta_stack_alloc = alloc_nr(delta_stack_nr); > + delta_stack = xmalloc(sizeof(*delta_stack)*delta_stack_alloc); > + memcpy(delta_stack, small_delta_stack, > + sizeof(*delta_stack)*delta_stack_nr); > + } else { > + ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc); > + } > + i = delta_stack_nr++; > + delta_stack[i].obj_offset = obj_offset; > + delta_stack[i].curpos = curpos; > + delta_stack[i].size = size; > + > + curpos = obj_offset = base_offset; > } > > - *type = unpack_object_header(p, &w_curs, &curpos, sizep); > - switch (*type) { > + /* PHASE 2: handle the base */ > + switch (type) { > case OBJ_OFS_DELTA: > case OBJ_REF_DELTA: > - data = unpack_delta_entry(p, &w_curs, curpos, *sizep, > - obj_offset, type, sizep); > + if (data) > + die("BUG in unpack_entry: left loop at a valid delta"); > break; > case OBJ_COMMIT: > case OBJ_TREE: > case OBJ_BLOB: > case OBJ_TAG: > - data = unpack_compressed_entry(p, &w_curs, curpos, *sizep); > + if (!base_from_cache) > + data = unpack_compressed_entry(p, &w_curs, curpos, size); > break; > default: > data = NULL; > error("unknown object type %i at offset %"PRIuMAX" in %s", > - *type, (uintmax_t)obj_offset, p->pack_name); > + type, (uintmax_t)obj_offset, p->pack_name); > } > + > + /* PHASE 3: apply deltas in order */ > + > + /* invariants: > + * 'data' holds the base data, or NULL if there was corruption > + */ > + while (delta_stack_nr) { > + void *delta_data; > + void *base = data; > + unsigned long delta_size, base_size = size; > + int i; > + > + data = NULL; > + > + if (base) > + add_delta_base_cache(p, obj_offset, base, base_size, type); > + > + if (!base) { > + /* > + * We're probably in deep shit, but let's try to fetch > + * the required base anyway from another pack or loose. > + * This is costly but should happen only in the presence > + * of a corrupted pack, and is better than failing outright. > + */ > + struct revindex_entry *revidx; > + const unsigned char *base_sha1; > + revidx = find_pack_revindex(p, obj_offset); > + if (revidx) { > + base_sha1 = nth_packed_object_sha1(p, revidx->nr); > + error("failed to read delta base object %s" > + " at offset %"PRIuMAX" from %s", > + sha1_to_hex(base_sha1), (uintmax_t)obj_offset, > + p->pack_name); > + mark_bad_packed_object(p, base_sha1); > + base = read_object(base_sha1, &type, &base_size); > + } > + } > + > + i = --delta_stack_nr; > + obj_offset = delta_stack[i].obj_offset; > + curpos = delta_stack[i].curpos; > + delta_size = delta_stack[i].size; > + > + if (!base) > + continue; > + > + delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size); > + > + if (!delta_data) { > + error("failed to unpack compressed delta " > + "at offset %"PRIuMAX" from %s", > + (uintmax_t)curpos, p->pack_name); > + free(base); > + data = NULL; > + continue; > + } > + > + data = patch_delta(base, base_size, > + delta_data, delta_size, > + &size); > + if (!data) > + die("failed to apply delta"); > + > + free (delta_data); > + } > + > + *final_type = type; > + *final_size = size; > + > unuse_pack(&w_curs); > return data; > } -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html