From: Jeff King <peff@xxxxxxxx> In a recent patch we added a function 'find_kept_pack_entry()' to look for an object only among kept packs. While this function avoids doing any lookup work in non-kept packs, it is still linear in the number of packs, since we have to traverse the linked list of packs once per object. Let's cache a reduced version of that list to save us time. Note that this cache will last the lifetime of the program. We could invalidate it on reprepare_packed_git(), but there's not much point in being rigorous here: - we might already fail to notice new .keep packs showing up after the program starts. We only reprepare_packed_git() when we fail to find an object. But adding a new pack won't cause that to happen. Somebody repacking could add a new pack and delete an old one, but most of the time we'd have a descriptor or mmap open to the old pack anyway, so we might not even notice. - in pack-objects we already cache the .keep state at startup, since 56dfeb6263 (pack-objects: compute local/ignore_pack_keep early, 2016-07-29). So this is just extending that concept further. - we don't have to worry about any packed_git being removed; we always keep the old structs around, even after reprepare_packed_git() Here are p5303 results (as always, measured against the kernel): Test HEAD^ HEAD ---------------------------------------------------------------------------------------------- 5303.5: repack (1) 57.44(54.71+10.78) 57.06(54.29+10.96) -0.7% 5303.6: repack with --stdin-packs (1) 0.01(0.00+0.01) 0.01(0.01+0.00) +0.0% 5303.10: repack (50) 71.32(88.38+4.90) 71.47(88.60+5.04) +0.2% 5303.11: repack with --stdin-packs (50) 3.43(11.81+0.22) 3.49(12.21+0.26) +1.7% 5303.15: repack (1000) 215.59(493.75+14.62) 217.41(495.36+14.85) +0.8% 5303.16: repack with --stdin-packs (1000) 131.44(314.24+8.11) 126.75(309.88+8.09) -3.6% Signed-off-by: Jeff King <peff@xxxxxxxx> Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- builtin/pack-objects.c | 6 +-- object-store.h | 10 ++++ packfile.c | 103 +++++++++++++++++++++++------------------ packfile.h | 4 -- revision.c | 8 ++-- 5 files changed, 76 insertions(+), 55 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fbd7b54d70..b2ba5aa14f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1225,9 +1225,9 @@ static int want_found_object(const struct object_id *oid, int exclude, */ unsigned flags = 0; if (ignore_packed_keep_on_disk) - flags |= ON_DISK_KEEP_PACKS; + flags |= CACHE_ON_DISK_KEEP_PACKS; if (ignore_packed_keep_in_core) - flags |= IN_CORE_KEEP_PACKS; + flags |= CACHE_IN_CORE_KEEP_PACKS; if (ignore_packed_keep_on_disk && p->pack_keep) return 0; @@ -3089,7 +3089,7 @@ static void read_packs_list_from_stdin(void) * an optimization during delta selection. */ revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.keep_pack_cache_flags |= CACHE_IN_CORE_KEEP_PACKS; revs.blob_objects = 1; revs.tree_objects = 1; revs.tag_objects = 1; diff --git a/object-store.h b/object-store.h index c4fc9dd74e..4cbe8eae3c 100644 --- a/object-store.h +++ b/object-store.h @@ -105,6 +105,14 @@ static inline int pack_map_entry_cmp(const void *unused_cmp_data, return strcmp(pg1->pack_name, key ? key : pg2->pack_name); } +#define CACHE_ON_DISK_KEEP_PACKS 1 +#define CACHE_IN_CORE_KEEP_PACKS 2 + +struct kept_pack_cache { + struct packed_git **packs; + unsigned flags; +}; + struct raw_object_store { /* * Set of all object directories; the main directory is first (and @@ -150,6 +158,8 @@ struct raw_object_store { /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; + struct kept_pack_cache *kept_pack_cache; + /* * A map of packfiles to packed_git structs for tracking which * packs have been loaded already. diff --git a/packfile.c b/packfile.c index 5f35cfe788..2a139c907b 100644 --- a/packfile.c +++ b/packfile.c @@ -2031,10 +2031,7 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -static int find_one_pack_entry(struct repository *r, - const struct object_id *oid, - struct pack_entry *e, - int kept_only) +int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { struct list_head *pos; struct multi_pack_index *m; @@ -2044,49 +2041,64 @@ static int find_one_pack_entry(struct repository *r, return 0; for (m = r->objects->multi_pack_index; m; m = m->next) { - if (!fill_midx_entry(r, oid, e, m)) - continue; - - if (!kept_only) - return 1; - - if (((kept_only & ON_DISK_KEEP_PACKS) && e->p->pack_keep) || - ((kept_only & IN_CORE_KEEP_PACKS) && e->p->pack_keep_in_core)) + if (fill_midx_entry(r, oid, e, m)) return 1; } list_for_each(pos, &r->objects->packed_git_mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); - if (p->multi_pack_index && !kept_only) { - /* - * If this pack is covered by the MIDX, we'd have found - * the object already in the loop above if it was here, - * so don't bother looking. - * - * The exception is if we are looking only at kept - * packs. An object can be present in two packs covered - * by the MIDX, one kept and one not-kept. And as the - * MIDX points to only one copy of each object, it might - * have returned only the non-kept version above. We - * have to check again to be thorough. - */ - continue; - } - if (!kept_only || - (((kept_only & ON_DISK_KEEP_PACKS) && p->pack_keep) || - ((kept_only & IN_CORE_KEEP_PACKS) && p->pack_keep_in_core))) { - if (fill_pack_entry(oid, e, p)) { - list_move(&p->mru, &r->objects->packed_git_mru); - return 1; - } + if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { + list_move(&p->mru, &r->objects->packed_git_mru); + return 1; } } return 0; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static void maybe_invalidate_kept_pack_cache(struct repository *r, + unsigned flags) { - return find_one_pack_entry(r, oid, e, 0); + if (!r->objects->kept_pack_cache) + return; + if (r->objects->kept_pack_cache->flags == flags) + return; + free(r->objects->kept_pack_cache->packs); + FREE_AND_NULL(r->objects->kept_pack_cache); +} + +static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +{ + maybe_invalidate_kept_pack_cache(r, flags); + + if (!r->objects->kept_pack_cache) { + struct packed_git **packs = NULL; + size_t nr = 0, alloc = 0; + struct packed_git *p; + + /* + * We want "all" packs here, because we need to cover ones that + * are used by a midx, as well. We need to look in every one of + * them (instead of the midx itself) to cover duplicates. It's + * possible that an object is found in two packs that the midx + * covers, one kept and one not kept, but the midx returns only + * the non-kept version. + */ + for (p = get_all_packs(r); p; p = p->next) { + if ((p->pack_keep && (flags & CACHE_ON_DISK_KEEP_PACKS)) || + (p->pack_keep_in_core && (flags & CACHE_IN_CORE_KEEP_PACKS))) { + ALLOC_GROW(packs, nr + 1, alloc); + packs[nr++] = p; + } + } + ALLOC_GROW(packs, nr + 1, alloc); + packs[nr] = NULL; + + r->objects->kept_pack_cache = xmalloc(sizeof(*r->objects->kept_pack_cache)); + r->objects->kept_pack_cache->packs = packs; + r->objects->kept_pack_cache->flags = flags; + } + + return r->objects->kept_pack_cache->packs; } int find_kept_pack_entry(struct repository *r, @@ -2094,13 +2106,15 @@ int find_kept_pack_entry(struct repository *r, unsigned flags, struct pack_entry *e) { - /* - * Load all packs, including midx packs, since our "kept" strategy - * relies on that. We're relying on the side effect of it setting up - * r->objects->packed_git, which is a little ugly. - */ - get_all_packs(r); - return find_one_pack_entry(r, oid, e, flags); + struct packed_git **cache; + + for (cache = kept_pack_cache(r, flags); *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } + + return 0; } int has_object_pack(const struct object_id *oid) @@ -2109,7 +2123,8 @@ int has_object_pack(const struct object_id *oid) return find_pack_entry(the_repository, oid, &e); } -int has_object_kept_pack(const struct object_id *oid, unsigned flags) +int has_object_kept_pack(const struct object_id *oid, + unsigned flags) { struct pack_entry e; return find_kept_pack_entry(the_repository, oid, flags, &e); diff --git a/packfile.h b/packfile.h index 624327f64d..eb56db2a7b 100644 --- a/packfile.h +++ b/packfile.h @@ -161,10 +161,6 @@ int packed_object_info(struct repository *r, void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); -#define ON_DISK_KEEP_PACKS 1 -#define IN_CORE_KEEP_PACKS 2 -#define ALL_KEEP_PACKS (ON_DISK_KEEP_PACKS | IN_CORE_KEEP_PACKS) - /* * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. diff --git a/revision.c b/revision.c index 4c5adb90b1..41c0478705 100644 --- a/revision.c +++ b/revision.c @@ -2338,14 +2338,14 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg die(_("--unpacked=<packfile> no longer supported")); } else if (!strcmp(arg, "--no-kept-objects")) { revs->no_kept_objects = 1; - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= CACHE_IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= CACHE_ON_DISK_KEEP_PACKS; } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) { revs->no_kept_objects = 1; if (!strcmp(optarg, "in-core")) - revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs->keep_pack_cache_flags |= CACHE_IN_CORE_KEEP_PACKS; if (!strcmp(optarg, "on-disk")) - revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS; + revs->keep_pack_cache_flags |= CACHE_ON_DISK_KEEP_PACKS; } else if (!strcmp(arg, "-r")) { revs->diff = 1; revs->diffopt.flags.recursive = 1; -- 2.30.0.533.g2f8b6b552f.dirty