Re: [PATCH v2 1/3] repack: pack everything into packfile

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 8, 2024 at 1:14 AM Han Young <hanyang.tony@xxxxxxxxxxxxx> wrote:
>
> In a partial repository, creating a local commit and then fetching
> causes the following state to occur:
>
> commit  tree  blob
>  C3 ---- T3 -- B3 (fetched from remote, in promisor pack)
>  |
>  C2 ---- T2 -- B2 (created locally, in non-promisor pack)
>  |
>  C1 ---- T1 -- B1 (fetched from remote, in promisor pack)
>
> During garbage collection, parents of promisor objects are marked as
> UNINTERESTING and are subsequently garbage collected. In this case, C2
> would be deleted and attempts to access that commit would result in "bad
> object" errors (originally reported here[1]).
>
> For partial repos, repacking is done in two steps. We first repack all the
> objects in promisor packfile, then repack all the non-promisor objects.
> It turns out C2, T2 and B2 are not repacked in either steps, ended up deleted.
> We can avoid this by packing everything into a promisor packfile, if the repo
> is partial cloned.
>
> [1] https://lore.kernel.org/git/20240802073143.56731-1-hanyang.tony@xxxxxxxxxxxxx/
>
> Helped-by: Calvin Wan <calvinwan@xxxxxxxxxx>
> Signed-off-by: Han Young <hanyang.tony@xxxxxxxxxxxxx>
> ---
>  builtin/repack.c | 257 ++++++++++++++++++++++++++---------------------
>  1 file changed, 143 insertions(+), 114 deletions(-)
>
> diff --git a/builtin/repack.c b/builtin/repack.c
> index cb4420f085..e9e18a31fe 100644
> --- a/builtin/repack.c
> +++ b/builtin/repack.c
> @@ -321,6 +321,23 @@ static int write_oid(const struct object_id *oid,
>         return 0;
>  }
>
> +static int write_loose_oid(const struct object_id *oid,
> +                                const char *path UNUSED,
> +                                void *data)
> +{
> +       struct child_process *cmd = data;
> +
> +       if (cmd->in == -1) {
> +               if (start_command(cmd))
> +                       die(_("could not start pack-objects to repack promisor objects"));
> +       }
> +
> +       if (write_in_full(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
> +           write_in_full(cmd->in, "\n", 1) < 0)
> +               die(_("failed to feed promisor objects to pack-objects"));
> +       return 0;
> +}
> +
>  static struct {
>         const char *name;
>         unsigned optional:1;
> @@ -370,12 +387,15 @@ static int has_pack_ext(const struct generated_pack_data *data,
>         BUG("unknown pack extension: '%s'", ext);
>  }
>
> -static void repack_promisor_objects(const struct pack_objects_args *args,
> -                                   struct string_list *names)
> +static int repack_promisor_objects(const struct pack_objects_args *args,
> +                                   struct string_list *names,
> +                                   struct string_list *list,
> +                                   int pack_all)
>  {
>         struct child_process cmd = CHILD_PROCESS_INIT;
>         FILE *out;
>         struct strbuf line = STRBUF_INIT;
> +       struct string_list_item *item;
>
>         prepare_pack_objects(&cmd, args, packtmp);
>         cmd.in = -1;
> @@ -387,13 +407,19 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
>          * {type -> existing pack order} ordering when computing deltas instead
>          * of a {type -> size} ordering, which may produce better deltas.
>          */
> -       for_each_packed_object(write_oid, &cmd,
> -                              FOR_EACH_OBJECT_PROMISOR_ONLY);
> +       if (pack_all)
> +               for_each_packed_object(write_oid, &cmd, 0);
> +       else
> +               for_each_string_list_item(item, list) {
> +                       pack_mark_retained(item);
> +               }
> +
> +       for_each_loose_object(write_loose_oid, &cmd, 0);
>
>         if (cmd.in == -1) {
>                 /* No packed objects; cmd was never started */
>                 child_process_clear(&cmd);
> -               return;
> +               return 0;
>         }
>
>         close(cmd.in);
> @@ -431,6 +457,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
>         if (finish_command(&cmd))
>                 die(_("could not finish pack-objects to repack promisor objects"));
>         strbuf_release(&line);
> +       return 0;
>  }
>
>  struct pack_geometry {
> @@ -1312,8 +1339,7 @@ int cmd_repack(int argc,
>                 strvec_push(&cmd.args, "--reflog");
>                 strvec_push(&cmd.args, "--indexed-objects");
>         }
> -       if (repo_has_promisor_remote(the_repository))
> -               strvec_push(&cmd.args, "--exclude-promisor-objects");
> +
>         if (!write_midx) {
>                 if (write_bitmaps > 0)
>                         strvec_push(&cmd.args, "--write-bitmap-index");
> @@ -1323,125 +1349,128 @@ int cmd_repack(int argc,
>         if (use_delta_islands)
>                 strvec_push(&cmd.args, "--delta-islands");
>
> -       if (pack_everything & ALL_INTO_ONE) {
> -               repack_promisor_objects(&po_args, &names);
> -
> -               if (has_existing_non_kept_packs(&existing) &&
> -                   delete_redundant &&
> -                   !(pack_everything & PACK_CRUFT)) {
> -                       for_each_string_list_item(item, &names) {
> -                               strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
> -                                            packtmp_name, item->string);
> -                       }
> -                       if (unpack_unreachable) {
> -                               strvec_pushf(&cmd.args,
> -                                            "--unpack-unreachable=%s",
> -                                            unpack_unreachable);
> -                       } else if (pack_everything & LOOSEN_UNREACHABLE) {
> -                               strvec_push(&cmd.args,
> -                                           "--unpack-unreachable");
> -                       } else if (keep_unreachable) {
> -                               strvec_push(&cmd.args, "--keep-unreachable");
> -                               strvec_push(&cmd.args, "--pack-loose-unreachable");
> +       if (repo_has_promisor_remote(the_repository)) {
> +               ret = repack_promisor_objects(&po_args, &names,
> +                       &existing.non_kept_packs, pack_everything & ALL_INTO_ONE);

Using a goto jump would be easier to both read your patch and
remove the need to indent this entire code block.





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux