Hi, Here is a revision of the first of two series to prepare for and introduce an on-disk alternative for storing the reverse index. In this revision, I addressed feedback from Junio, Peff, and Stolee. A range-diff is included below, but the main changes are: - Error messages are improved to include the pack and offset when applicable. - Variable names were made clearer (e.g., n -> index_pos). - Comments were added in pack-revindex.h to introduce relevant terminology, and which methods convert between what orderings. - int-sized lower- and upper-bounds were converted to be unsigned. I believe that this revision should be ready for queueing. I'll send a v2 of the corresponding latter series shortly. Thanks in advance for your review. Taylor Blau (20): pack-revindex: introduce a new API write_reuse_object(): convert to new revindex API write_reused_pack_one(): convert to new revindex API write_reused_pack_verbatim(): convert to new revindex API check_object(): convert to new revindex API bitmap_position_packfile(): convert to new revindex API show_objects_for_type(): convert to new revindex API get_size_by_pos(): convert to new revindex API try_partial_reuse(): convert to new revindex API rebuild_existing_bitmaps(): convert to new revindex API get_delta_base_oid(): convert to new revindex API retry_bad_packed_offset(): convert to new revindex API packed_object_info(): convert to new revindex API unpack_entry(): convert to new revindex API for_each_object_in_pack(): convert to new revindex API builtin/gc.c: guess the size of the revindex pack-revindex: remove unused 'find_pack_revindex()' pack-revindex: remove unused 'find_revindex_position()' pack-revindex: hide the definition of 'revindex_entry' pack-revindex.c: avoid direct revindex access in 'offset_to_pack_pos()' builtin/gc.c | 2 +- builtin/pack-objects.c | 37 +++++++++++++++++--------- pack-bitmap.c | 44 +++++++++++++++---------------- pack-revindex.c | 51 ++++++++++++++++++++++------------- pack-revindex.h | 60 +++++++++++++++++++++++++++++++++++++----- packfile.c | 54 ++++++++++++++++++++++++------------- 6 files changed, 168 insertions(+), 80 deletions(-) Range-diff against v1: 1: fa6b830908 < -: ---------- pack-revindex: introduce a new API -: ---------- > 1: e1aa89244a pack-revindex: introduce a new API 2: 00668523e1 ! 2: 0fca7d5812 write_reuse_object(): convert to new revindex API @@ builtin/pack-objects.c: static off_t write_reuse_object(struct hashfile *f, stru - revidx = find_pack_revindex(p, offset); - datalen = revidx[1].offset - offset; + if (offset_to_pack_pos(p, offset, &pos) < 0) -+ die(_("write_reuse_object: could not locate %s"), -+ oid_to_hex(&entry->idx.oid)); ++ die(_("write_reuse_object: could not locate %s, expected at " ++ "offset %"PRIuMAX" in pack %s"), ++ oid_to_hex(&entry->idx.oid), (uintmax_t)offset, ++ p->pack_name); + datalen = pack_pos_to_offset(p, pos + 1) - offset; if (!pack_to_stdout && p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { 3: 81ab11e18c ! 3: 7676822a54 write_reused_pack_one(): convert to new revindex API @@ builtin/pack-objects.c: static void write_reused_pack_one(size_t pos, struct has struct object_id base_oid; + if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0) -+ die(_("expected object at offset %"PRIuMAX), -+ (uintmax_t)base_offset); ++ die(_("expected object at offset %"PRIuMAX" " ++ "in pack %s"), ++ (uintmax_t)base_offset, ++ reuse_packfile->pack_name); + nth_packed_object_id(&base_oid, reuse_packfile, - reuse_packfile->revindex[base_pos].nr); 4: 14b35d01a0 = 4: dd7133fdb7 write_reused_pack_verbatim(): convert to new revindex API 5: c47e77a30e = 5: 8e93ca3886 check_object(): convert to new revindex API 6: 3b170663dd = 6: 084bbf2145 bitmap_position_packfile(): convert to new revindex API 7: bc67bb462a ! 7: 68794e9484 show_objects_for_type(): convert to new revindex API @@ pack-bitmap.c: static void show_objects_for_type( struct object_id oid; - struct revindex_entry *entry; - uint32_t hash = 0; -+ uint32_t hash = 0, n; ++ uint32_t hash = 0, index_pos; + off_t ofs; if ((word >> offset) == 0) @@ pack-bitmap.c: static void show_objects_for_type( - entry = &bitmap_git->pack->revindex[pos + offset]; - nth_packed_object_id(&oid, bitmap_git->pack, entry->nr); -+ n = pack_pos_to_index(bitmap_git->pack, pos + offset); ++ index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); + ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); -+ nth_packed_object_id(&oid, bitmap_git->pack, n); ++ nth_packed_object_id(&oid, bitmap_git->pack, index_pos); if (bitmap_git->hashes) - hash = get_be32(bitmap_git->hashes + entry->nr); -+ hash = get_be32(bitmap_git->hashes + n); ++ hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset); + show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs); 8: 541fe679f3 = 8: 31ac6f5703 get_size_by_pos(): convert to new revindex API 9: 54f4ad329f ! 9: acd80069a2 try_partial_reuse(): convert to new revindex API @@ Commit message 'pack_pos_to_offset()' instead (the caller here does not care about the index position of the object at position 'pos'). - Somewhat confusingly, the subsequent call to unpack_object_header() - takes a pointer to &offset and then updates it with a new value. But, - try_partial_reuse() cares about the offset of both the base's header and - contents. The existing code made a copy of the offset field, and only - addresses and manipulates one of them. - - Instead, store the return of pack_pos_to_offset twice: once in header - and another in offset. Header will be left untouched, but offset will be - addressed and modified by unpack_object_header(). + Note that we cannot just use the existing "offset" variable to store the + value we get from pack_pos_to_offset(). It is incremented by + unpack_object_header(), but we later need the original value. Since + we'll no longer have revindex->offset to read it from, we'll store that + in a separate variable ("header" since it points to the entry's header + bytes). Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> 10: 97eaa7b2d6 = 10: 569acdca7f rebuild_existing_bitmaps(): convert to new revindex API 11: e00c434ab2 = 11: 9881637724 get_delta_base_oid(): convert to new revindex API 12: aae01d7029 = 12: df8bb571a5 retry_bad_packed_offset(): convert to new revindex API 13: eab7ab1f35 ! 13: 41b2e00947 packed_object_info(): convert to new revindex API @@ packfile.c: int packed_object_info(struct repository *r, struct packed_git *p, - *oi->disk_sizep = revidx[1].offset - obj_offset; + uint32_t pos; + if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { ++ error("could not find object at offset %"PRIuMAX" " ++ "in pack %s", (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; + goto out; + } 14: 13c49ed40c ! 14: 8ad49d231f unpack_entry(): convert to new revindex API @@ Commit message Remove direct manipulation of the 'struct revindex_entry' type as well as calls to the deprecated API in 'packfile.c:unpack_entry()'. Usual clean-up is performed (replacing '->nr' with calls to - 'pack_pos_to_index()' and so on). Add an additional check to make - sure that 'obj_offset()' points at a valid object. + 'pack_pos_to_index()' and so on). + + Add an additional check to make sure that 'obj_offset()' points at a + valid object. In the case this check is violated, we cannot call + 'mark_bad_packed_object()' because we don't know the OID. At the top of + the call stack is do_oid_object_info_extended() (via + packed_object_info()), which does mark the object. Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> @@ packfile.c: void *unpack_entry(struct repository *r, struct packed_git *p, off_t - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - off_t len = revidx[1].offset - obj_offset; - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { -+ uint32_t pos, nr; ++ uint32_t pack_pos, index_pos; + off_t len; + -+ if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { ++ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { ++ error("could not find object at offset %"PRIuMAX" in pack %s", ++ (uintmax_t)obj_offset, p->pack_name); + data = NULL; + goto out; + } + -+ len = pack_pos_to_offset(p, pos + 1) - obj_offset; -+ nr = pack_pos_to_index(p, pos); -+ if (check_pack_crc(p, &w_curs, obj_offset, len, nr)) { ++ len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; ++ index_pos = pack_pos_to_index(p, pack_pos); ++ if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) { struct object_id oid; - nth_packed_object_id(&oid, p, revidx->nr); -+ nth_packed_object_id(&oid, p, nr); ++ nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); mark_bad_packed_object(p, oid.hash); 15: a3249986f9 = 15: e757476351 for_each_object_in_pack(): convert to new revindex API 16: 7c17db7a7d = 16: a500311e33 builtin/gc.c: guess the size of the revindex 17: c4c88bcc3d ! 17: 67d14da04a pack-revindex: remove unused 'find_pack_revindex()' @@ pack-revindex.h: struct revindex_entry { -struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs); - - int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos); - uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos); - off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos); + /* + * offset_to_pack_pos converts an object offset to a pack position. This + * function returns zero on success, and a negative number otherwise. The 18: d60411d524 ! 18: 3b5c92be68 pack-revindex: remove unused 'find_revindex_position()' @@ Commit message until _after_ 'load_pack_revindex()' (which loads the index as a side-effect) has been called. + Another small fix that is included is converting the upper- and + lower-bounds to be unsigned's instead of ints. This dates back to + 92e5c77c37 (revindex: export new APIs, 2013-10-24)--ironically, the last + time we introduced new APIs here--but this unifies the types. + Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> ## pack-revindex.c ## @@ pack-revindex.c: int load_pack_revindex(struct packed_git *p) -int find_revindex_position(struct packed_git *p, off_t ofs) +int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) { - int lo = 0; +- int lo = 0; - int hi = p->num_objects + 1; - const struct revindex_entry *revindex = p->revindex; -+ int hi; ++ unsigned lo, hi; + const struct revindex_entry *revindex; + + if (load_pack_revindex(p) < 0) + return -1; + ++ lo = 0; + hi = p->num_objects + 1; + revindex = p->revindex; @@ pack-revindex.c: int find_revindex_position(struct packed_git *p, off_t ofs) - - ret = find_revindex_position(p, ofs); - if (ret < 0) -- return -1; +- return ret; - *pos = ret; - return 0; -} @@ pack-revindex.c: int find_revindex_position(struct packed_git *p, off_t ofs) ## pack-revindex.h ## @@ pack-revindex.h: struct revindex_entry { - }; - + * given pack, returning zero on success and a negative value otherwise. + */ int load_pack_revindex(struct packed_git *p); -int find_revindex_position(struct packed_git *p, off_t ofs); - int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos); - uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos); + /* + * offset_to_pack_pos converts an object offset to a pack position. This 19: 7c0e4acc84 ! 19: cabafce4a1 pack-revindex: hide the definition of 'revindex_entry' @@ pack-revindex.h - unsigned int nr; -}; - - int load_pack_revindex(struct packed_git *p); - - int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos); + /* + * load_pack_revindex populates the revindex's internal data-structures for the + * given pack, returning zero on success and a negative value otherwise. 20: eada1ffcfa ! 20: 8400ff6c96 pack-revindex.c: avoid direct revindex access in 'offset_to_pack_pos()' @@ Commit message Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> ## pack-revindex.c ## -@@ pack-revindex.c: int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) +@@ pack-revindex.c: int load_pack_revindex(struct packed_git *p) + int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) { - int lo = 0; - int hi; + unsigned lo, hi; - const struct revindex_entry *revindex; if (load_pack_revindex(p) < 0) return -1; + lo = 0; hi = p->num_objects + 1; - revindex = p->revindex; -- 2.30.0.138.g6d7191ea01