On Tue, Oct 8, 2024 at 1:14 AM Han Young <hanyang.tony@xxxxxxxxxxxxx> wrote: > > In a partial repository, creating a local commit and then fetching > causes the following state to occur: > > commit tree blob > C3 ---- T3 -- B3 (fetched from remote, in promisor pack) > | > C2 ---- T2 -- B2 (created locally, in non-promisor pack) > | > C1 ---- T1 -- B1 (fetched from remote, in promisor pack) > > During garbage collection, parents of promisor objects are marked as > UNINTERESTING and are subsequently garbage collected. In this case, C2 > would be deleted and attempts to access that commit would result in "bad > object" errors (originally reported here[1]). > > For partial repos, repacking is done in two steps. We first repack all the > objects in promisor packfile, then repack all the non-promisor objects. > It turns out C2, T2 and B2 are not repacked in either steps, ended up deleted. > We can avoid this by packing everything into a promisor packfile, if the repo > is partial cloned. > > [1] https://lore.kernel.org/git/20240802073143.56731-1-hanyang.tony@xxxxxxxxxxxxx/ > > Helped-by: Calvin Wan <calvinwan@xxxxxxxxxx> > Signed-off-by: Han Young <hanyang.tony@xxxxxxxxxxxxx> > --- > builtin/repack.c | 257 ++++++++++++++++++++++++++--------------------- > 1 file changed, 143 insertions(+), 114 deletions(-) > > diff --git a/builtin/repack.c b/builtin/repack.c > index cb4420f085..e9e18a31fe 100644 > --- a/builtin/repack.c > +++ b/builtin/repack.c > @@ -321,6 +321,23 @@ static int write_oid(const struct object_id *oid, > return 0; > } > > +static int write_loose_oid(const struct object_id *oid, > + const char *path UNUSED, > + void *data) > +{ > + struct child_process *cmd = data; > + > + if (cmd->in == -1) { > + if (start_command(cmd)) > + die(_("could not start pack-objects to repack promisor objects")); > + } > + > + if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || > + write_in_full(cmd->in, "\n", 1) < 0) > + die(_("failed to feed promisor objects to pack-objects")); > + return 0; > +} > + > static struct { > const char *name; > unsigned optional:1; > @@ -370,12 +387,15 @@ static int has_pack_ext(const struct generated_pack_data *data, > BUG("unknown pack extension: '%s'", ext); > } > > -static void repack_promisor_objects(const struct pack_objects_args *args, > - struct string_list *names) > +static int repack_promisor_objects(const struct pack_objects_args *args, > + struct string_list *names, > + struct string_list *list, > + int pack_all) > { > struct child_process cmd = CHILD_PROCESS_INIT; > FILE *out; > struct strbuf line = STRBUF_INIT; > + struct string_list_item *item; > > prepare_pack_objects(&cmd, args, packtmp); > cmd.in = -1; > @@ -387,13 +407,19 @@ static void repack_promisor_objects(const struct pack_objects_args *args, > * {type -> existing pack order} ordering when computing deltas instead > * of a {type -> size} ordering, which may produce better deltas. > */ > - for_each_packed_object(write_oid, &cmd, > - FOR_EACH_OBJECT_PROMISOR_ONLY); > + if (pack_all) > + for_each_packed_object(write_oid, &cmd, 0); > + else > + for_each_string_list_item(item, list) { > + pack_mark_retained(item); > + } > + > + for_each_loose_object(write_loose_oid, &cmd, 0); > > if (cmd.in == -1) { > /* No packed objects; cmd was never started */ > child_process_clear(&cmd); > - return; > + return 0; > } > > close(cmd.in); > @@ -431,6 +457,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, > if (finish_command(&cmd)) > die(_("could not finish pack-objects to repack promisor objects")); > strbuf_release(&line); > + return 0; > } > > struct pack_geometry { > @@ -1312,8 +1339,7 @@ int cmd_repack(int argc, > strvec_push(&cmd.args, "--reflog"); > strvec_push(&cmd.args, "--indexed-objects"); > } > - if (repo_has_promisor_remote(the_repository)) > - strvec_push(&cmd.args, "--exclude-promisor-objects"); > + > if (!write_midx) { > if (write_bitmaps > 0) > strvec_push(&cmd.args, "--write-bitmap-index"); > @@ -1323,125 +1349,128 @@ int cmd_repack(int argc, > if (use_delta_islands) > strvec_push(&cmd.args, "--delta-islands"); > > - if (pack_everything & ALL_INTO_ONE) { > - repack_promisor_objects(&po_args, &names); > - > - if (has_existing_non_kept_packs(&existing) && > - delete_redundant && > - !(pack_everything & PACK_CRUFT)) { > - for_each_string_list_item(item, &names) { > - strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack", > - packtmp_name, item->string); > - } > - if (unpack_unreachable) { > - strvec_pushf(&cmd.args, > - "--unpack-unreachable=%s", > - unpack_unreachable); > - } else if (pack_everything & LOOSEN_UNREACHABLE) { > - strvec_push(&cmd.args, > - "--unpack-unreachable"); > - } else if (keep_unreachable) { > - strvec_push(&cmd.args, "--keep-unreachable"); > - strvec_push(&cmd.args, "--pack-loose-unreachable"); > + if (repo_has_promisor_remote(the_repository)) { > + ret = repack_promisor_objects(&po_args, &names, > + &existing.non_kept_packs, pack_everything & ALL_INTO_ONE); Using a goto jump would be easier to both read your patch and remove the need to indent this entire code block.