From: Jeff King <peff@xxxxxxxx> Now that we have find_kept_pack_entry(), we don't have to manually keep hunting through every pack to find a possible "kept" duplicate of the object. This should be faster, assuming only a portion of your total packs are actually kept. Note that we have to re-order the logic a bit here; we can deal with the "kept" situation completely, and then just fall back to the "--local" question. It might be worth having a similar optimized function to look at only local packs. Here are the results from p5303 (measurements again taken on the kernel): Test HEAD^ HEAD ----------------------------------------------------------------------------------------------- 5303.5: repack (1) 57.42(54.88+10.64) 57.44(54.71+10.78) +0.0% 5303.6: repack with --stdin-packs (1) 0.01(0.01+0.00) 0.01(0.00+0.01) +0.0% 5303.10: repack (50) 71.26(88.24+4.96) 71.32(88.38+4.90) +0.1% 5303.11: repack with --stdin-packs (50) 3.49(11.82+0.28) 3.43(11.81+0.22) -1.7% 5303.15: repack (1000) 215.64(491.33+14.80) 215.59(493.75+14.62) -0.0% 5303.16: repack with --stdin-packs (1000) 198.79(380.51+7.97) 131.44(314.24+8.11) -33.9% So our --stdin-packs case with many packs is now finally faster than the non-keep case (because it gets the speed benefit of looking at fewer objects, but not as big a penalty for looking at many packs). Signed-off-by: Jeff King <peff@xxxxxxxx> Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- builtin/pack-objects.c | 125 ++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 52 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6d19eb000a..fbd7b54d70 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid, return 1; } -static int want_found_object(int exclude, struct packed_git *p) +static int want_found_object(const struct object_id *oid, int exclude, + struct packed_git *p) { if (exclude) return 1; @@ -1209,22 +1210,73 @@ static int want_found_object(int exclude, struct packed_git *p) * Otherwise, we signal "-1" at the end to tell the caller that we do * not know either way, and it needs to check more packs. */ - if (!ignore_packed_keep_on_disk && - !ignore_packed_keep_in_core && - (!local || !have_non_local_packs)) + + /* + * Handle .keep first, as we have a fast(er) path there. + */ + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + /* + * Set the flags for the kept-pack cache to be the ones we want + * to ignore. + * + * That is, if we are ignoring objects in on-disk keep packs, + * then we want to search through the on-disk keep and ignore + * the in-core ones. + */ + unsigned flags = 0; + if (ignore_packed_keep_on_disk) + flags |= ON_DISK_KEEP_PACKS; + if (ignore_packed_keep_in_core) + flags |= IN_CORE_KEEP_PACKS; + + if (ignore_packed_keep_on_disk && p->pack_keep) + return 0; + if (ignore_packed_keep_in_core && p->pack_keep_in_core) + return 0; + if (has_object_kept_pack(oid, flags)) + return 0; + } + + /* + * At this point we know definitively that either we don't care about + * keep-packs, or the object is not in one. Keep checking other + * conditions... + */ + + if (!local || !have_non_local_packs) return 1; - if (local && !p->pack_local) return 0; - if (p->pack_local && - ((ignore_packed_keep_on_disk && p->pack_keep) || - (ignore_packed_keep_in_core && p->pack_keep_in_core))) - return 0; /* we don't know yet; keep looking for more packs */ return -1; } +static int want_object_in_pack_one(struct packed_git *p, + const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset) +{ + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(oid->hash, p); + + if (offset) { + if (!*found_pack) { + if (!is_pack_valid(p)) + return -1; + *found_offset = offset; + *found_pack = p; + } + return want_found_object(oid, exclude, p); + } + return -1; +} + /* * Check whether we want the object in the pack (e.g., we do not want * objects found in non-local stores if the "--local" option was used). @@ -1252,7 +1304,7 @@ static int want_object_in_pack(const struct object_id *oid, * are present we will determine the answer right now. */ if (*found_pack) { - want = want_found_object(exclude, *found_pack); + want = want_found_object(oid, exclude, *found_pack); if (want != -1) return want; } @@ -1260,53 +1312,22 @@ static int want_object_in_pack(const struct object_id *oid, for (m = get_multi_pack_index(the_repository); m; m = m->next) { struct pack_entry e; if (fill_midx_entry(the_repository, oid, &e, m)) { - struct packed_git *p = e.p; - off_t offset; - - if (p == *found_pack) - offset = *found_offset; - else - offset = find_pack_entry_one(oid->hash, p); - - if (offset) { - if (!*found_pack) { - if (!is_pack_valid(p)) - continue; - *found_offset = offset; - *found_pack = p; - } - want = want_found_object(exclude, p); - if (want != -1) - return want; - } - } - } - - list_for_each(pos, get_packed_git_mru(the_repository)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); - off_t offset; - - if (p == *found_pack) - offset = *found_offset; - else - offset = find_pack_entry_one(oid->hash, p); - - if (offset) { - if (!*found_pack) { - if (!is_pack_valid(p)) - continue; - *found_offset = offset; - *found_pack = p; - } - want = want_found_object(exclude, p); - if (!exclude && want > 0) - list_move(&p->mru, - get_packed_git_mru(the_repository)); + want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset); if (want != -1) return want; } } + list_for_each(pos, get_packed_git_mru(the_repository)) { + struct packed_git *p = list_entry(pos, struct packed_git, mru); + want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset); + if (!exclude && want > 0) + list_move(&p->mru, + get_packed_git_mru(the_repository)); + if (want != -1) + return want; + } + if (uri_protocols.nr) { struct configured_exclusion *ex = oidmap_get(&configured_exclusions, oid); -- 2.30.0.533.g2f8b6b552f.dirty