Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple disjoint packs for verbatim reuse, allow marking objects from all disjoint packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack which is flagged as disjoint by the MIDX as a reuse candidate. If no such packs exist (i.e because we are reading a MIDX written before the "DISP" chunk was introduced), then treat the preferred pack as disjoint for the purposes of reuse. This is a safe assumption to make since all duplicate objects are resolved in favor of the preferred pack. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- Documentation/config/pack.txt | 6 +- builtin/pack-objects.c | 6 +- pack-bitmap.c | 73 +++++++++--- pack-bitmap.h | 3 +- t/t5332-multi-pack-reuse.sh | 219 ++++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 17 deletions(-) create mode 100755 t/t5332-multi-pack-reuse.sh diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index fe100d0fb7..9fe48d41c9 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -30,7 +30,11 @@ to linkgit:git-repack[1]. pack.allowPackReuse:: When true or "single", and when reachability bitmaps are enabled, pack-objects will try to send parts of the bitmapped packfile - verbatim. This can reduce memory and CPU usage to serve fetches, + verbatim. When "multi", and when a multi-pack reachability bitmap is + available, pack-objects will try to send parts of all packs marked as + disjoint by the MIDX. If only a single pack bitmap is available, and + `pack.allowPackReuse` is set to "multi", reuse parts of just the + bitmapped packfile. This can reduce memory and CPU usage to serve fetches, but might result in sending a slightly larger pack. Defaults to true. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4853e91251..43b77bff7c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -233,6 +233,7 @@ static int use_bitmap_index = -1; static enum { NO_PACK_REUSE = 0, SINGLE_PACK_REUSE, + MULTI_PACK_REUSE, } allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, @@ -3253,6 +3254,8 @@ static int git_pack_config(const char *k, const char *v, if (res < 0) { if (!strcasecmp(v, "single")) allow_pack_reuse = SINGLE_PACK_REUSE; + else if (!strcasecmp(v, "multi")) + allow_pack_reuse = MULTI_PACK_REUSE; else die(_("invalid pack.allowPackReuse value: '%s'"), v); } else if (res) { @@ -4032,7 +4035,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, &reuse_packfiles_nr, - &reuse_packfile_bitmap); + &reuse_packfile_bitmap, + allow_pack_reuse == MULTI_PACK_REUSE); if (reuse_packfiles) { reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); diff --git a/pack-bitmap.c b/pack-bitmap.c index be53fc6da5..561690c679 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2061,10 +2061,19 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git unuse_pack(&w_curs); } +static void make_disjoint_pack(struct bitmapped_pack *out, struct packed_git *p) +{ + out->p = p; + out->bitmap_pos = 0; + out->bitmap_nr = p->num_objects; + out->disjoint = 1; +} + void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out) + struct bitmap **reuse_out, + int multi_pack_reuse) { struct repository *r = the_repository; struct bitmapped_pack *packs = NULL; @@ -2088,24 +2097,62 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, free(packs); return; } - if (!pack.bitmap_nr) - continue; /* no objects from this pack */ - if (pack.bitmap_pos) - continue; /* not preferred pack */ + + if (!pack.disjoint) + continue; + + if (!multi_pack_reuse && pack.bitmap_pos) { + /* + * If we're only reusing a single pack, skip + * over any packs which are not positioned at + * the beginning of the MIDX bitmap. + * + * This is consistent with the existing + * single-pack reuse behavior, which only reuses + * parts of the MIDX's preferred pack. + */ + continue; + } ALLOC_GROW(packs, packs_nr + 1, packs_alloc); memcpy(&packs[packs_nr++], &pack, sizeof(pack)); objects_nr += pack.p->num_objects; + + if (!multi_pack_reuse) + break; + } + + if (!packs_nr) { + /* + * Old MIDXs (i.e. those written before the "DISP" chunk + * existed) will not have any packs marked as disjoint. + * + * But we still want to perform pack reuse with the + * special "preferred pack" as before. To do this, form + * the singleton set containing just the preferred pack, + * which is trivially disjoint with itself. + * + * Moreover, the MIDX is guaranteed to resolve duplicate + * objects in favor of the copy in the preferred pack + * (if one exists). Thus, we can safely perform pack + * reuse on this pack. + */ + uint32_t preferred_pack_pos; + struct packed_git *preferred_pack; + + preferred_pack_pos = midx_preferred_pack(bitmap_git); + preferred_pack = bitmap_git->midx->packs[preferred_pack_pos]; + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + + make_disjoint_pack(&packs[packs_nr], preferred_pack); + objects_nr = packs[packs_nr++].p->num_objects; } } else { ALLOC_GROW(packs, packs_nr + 1, packs_alloc); - packs[packs_nr].p = bitmap_git->pack; - packs[packs_nr].bitmap_pos = 0; - packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects; - packs[packs_nr].disjoint = 1; - + make_disjoint_pack(&packs[packs_nr], bitmap_git->pack); objects_nr = packs[packs_nr++].p->num_objects; } @@ -2114,10 +2161,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, word_alloc++; reuse = bitmap_word_alloc(word_alloc); - if (packs_nr != 1) - BUG("pack reuse not yet implemented for multiple packs"); - - reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse); + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); if (!bitmap_popcount(reuse)) { free(packs); diff --git a/pack-bitmap.h b/pack-bitmap.h index 901a3b86ed..8bb316ce52 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -81,7 +81,8 @@ uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out); + struct bitmap **reuse_out, + int multi_pack_reuse); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh new file mode 100755 index 0000000000..a9bd3870e6 --- /dev/null +++ b/t/t5332-multi-pack-reuse.sh @@ -0,0 +1,219 @@ +#!/bin/sh + +test_description='pack-objects multi-pack reuse' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-bitmap.sh +. "$TEST_DIRECTORY"/lib-disjoint.sh + +objdir=.git/objects +packdir=$objdir/pack + +all_packs () { + find $packdir -type f -name "*.idx" | sed -e 's/^.*\/\([^\/]\)/\1/g' +} + +all_disjoint () { + all_packs | sed -e 's/^/+/g' +} + +test_pack_reused () { + test_trace2_data pack-objects pack-reused "$1" +} + +test_packs_reused () { + test_trace2_data pack-objects packs-reused "$1" +} + + +# pack_position <object> </path/to/pack.idx +pack_position () { + git show-index >objects && + grep "$1" objects | cut -d" " -f1 +} + +test_expect_success 'setup' ' + git config pack.allowPackReuse multi +' + +test_expect_success 'preferred pack is reused without packs marked disjoint' ' + test_commit A && + test_commit B && + + A="$(echo A | git pack-objects --unpacked --delta-base-offset $packdir/pack)" && + B="$(echo B | git pack-objects --unpacked --delta-base-offset $packdir/pack)" && + + git prune-packed && + + git multi-pack-index write --bitmap && + + test_must_not_be_disjoint "pack-$A.pack" && + test_must_not_be_disjoint "pack-$B.pack" && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'reuse all objects from subset of disjoint packs' ' + test_commit C && + + C="$(echo C | git pack-objects --unpacked --delta-base-offset $packdir/pack)" && + + git prune-packed && + + cat >in <<-EOF && + pack-$A.idx + +pack-$B.idx + +pack-$C.idx + EOF + git multi-pack-index write --bitmap --stdin-packs <in && + + test_must_not_be_disjoint "pack-$A.pack" && + test_must_be_disjoint "pack-$B.pack" && + test_must_be_disjoint "pack-$C.pack" && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 6 <trace2.txt && + test_packs_reused 2 <trace2.txt +' + +test_expect_success 'reuse all objects from all disjoint packs' ' + rm -fr $packdir/multi-pack-index* && + + all_disjoint >in && + git multi-pack-index write --bitmap --stdin-packs <in && + + test_must_be_disjoint "pack-$A.pack" && + test_must_be_disjoint "pack-$B.pack" && + test_must_be_disjoint "pack-$C.pack" && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 9 <trace2.txt && + test_packs_reused 3 <trace2.txt +' + +test_expect_success 'reuse objects from first disjoint pack with middle gap' ' + test_commit D && + test_commit E && + test_commit F && + + # Set "pack.window" to zero to ensure that we do not create any + # deltas, which could alter the amount of pack reuse we perform + # (if, for e.g., we are not sending one or more bases). + D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" && + + d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" && + e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" && + f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" && + + # commits F, E, and D, should appear in that order at the + # beginning of the pack + test $f_pos -lt $e_pos && + test $e_pos -lt $d_pos && + + # Ensure that the pack we are constructing sorts ahead of any + # other packs in lexical/bitmap order by choosing it as the + # preferred pack. + all_disjoint >in && + git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" \ + --stdin-packs <in && + + test_must_be_disjoint pack-$A.pack && + test_must_be_disjoint pack-$B.pack && + test_must_be_disjoint pack-$C.pack && + test_must_be_disjoint pack-$D.pack && + + cat >in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'reuse objects from middle disjoint pack with middle gap' ' + rm -fr $packdir/multi-pack-index* && + + # Ensure that the pack we are constructing sort into any + # position *but* the first one, by choosing a different pack as + # the preferred one. + all_disjoint >in && + git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" \ + --stdin-packs <in && + + cat >in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_expect_success 'omit delta with uninteresting base' ' + git repack -adk && + + test_seq 32 >f && + git add f && + test_tick && + git commit -m "delta" && + delta="$(git rev-parse HEAD)" && + + test_seq 64 >f && + test_tick && + git commit -a -m "base" && + base="$(git rev-parse HEAD)" && + + test_commit other && + + git repack -d && + + have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" && + + all_disjoint >in && + git multi-pack-index write --bitmap --stdin-packs <in && + + cat >in <<-EOF && + $(git rev-parse other) + ^$base + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs <in >/dev/null && + + # Even though all packs are marked disjoint, we can only reuse + # the 3 objects corresponding to "other" from the latest pack. + # + # This is because even though we want "delta", we do not want + # "base", meaning that we have to inflate the delta/base-pair + # corresponding to the blob in commit "delta", which bypasses + # the pack-reuse mechanism. + # + # The remaining objects from the other pack are similarly not + # reused because their objects are on the uninteresting side of + # the query. + test_pack_reused 3 <trace2.txt && + test_packs_reused 1 <trace2.txt +' + +test_done -- 2.43.0.24.g980b318f98