Re: [PATCH v2 3/3] builtin/repack.c: implement support for `--max-cruft-size`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Oct 02, 2023 at 08:44:32PM -0400, Taylor Blau wrote:
[snip]
> diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt
> index 90806fd26a..fa0541b416 100644
> --- a/Documentation/git-gc.txt
> +++ b/Documentation/git-gc.txt
> @@ -59,6 +59,13 @@ be performed as well.
>  	cruft pack instead of storing them as loose objects. `--cruft`
>  	is on by default.
>  
> +--max-cruft-size=<n>::
> +	When packing unreachable objects into a cruft pack, limit the
> +	size of new cruft packs to be at most `<n>`. Overrides any

We should probably mention the unit here, which is bytes.

[snip]
> @@ -806,6 +846,72 @@ static void remove_redundant_bitmaps(struct string_list *include,
>  	strbuf_release(&path);
>  }
>  
> +static int existing_cruft_pack_cmp(const void *va, const void *vb)
> +{
> +	struct packed_git *a = *(struct packed_git **)va;
> +	struct packed_git *b = *(struct packed_git **)vb;
> +
> +	if (a->pack_size < b->pack_size)
> +		return -1;
> +	if (a->pack_size > b->pack_size)
> +		return 1;
> +	return 0;
> +}
> +
> +static void collapse_small_cruft_packs(FILE *in, size_t max_size,
> +				       struct existing_packs *existing)
> +{
> +	struct packed_git **existing_cruft, *p;
> +	struct strbuf buf = STRBUF_INIT;
> +	size_t total_size = 0;
> +	size_t existing_cruft_nr = 0;
> +	size_t i;
> +
> +	ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr);
> +
> +	for (p = get_all_packs(the_repository); p; p = p->next) {
> +		if (!(p->is_cruft && p->pack_local))
> +			continue;
> +
> +		strbuf_reset(&buf);
> +		strbuf_addstr(&buf, pack_basename(p));
> +		strbuf_strip_suffix(&buf, ".pack");
> +
> +		if (!string_list_has_string(&existing->cruft_packs, buf.buf))
> +			continue;
> +
> +		if (existing_cruft_nr >= existing->cruft_packs.nr)
> +			BUG("too many cruft packs (found %"PRIuMAX", but knew "
> +			    "of %"PRIuMAX")",
> +			    (uintmax_t)existing_cruft_nr + 1,
> +			    (uintmax_t)existing->cruft_packs.nr);
> +		existing_cruft[existing_cruft_nr++] = p;
> +	}
> +
> +	QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp);
> +
> +	for (i = 0; i < existing_cruft_nr; i++) {
> +		size_t proposed;
> +
> +		p = existing_cruft[i];
> +		proposed = st_add(total_size, p->pack_size);
> +
> +		if (proposed <= max_size) {
> +			total_size = proposed;
> +			fprintf(in, "-%s\n", pack_basename(p));
> +		} else {
> +			retain_cruft_pack(existing, p);
> +			fprintf(in, "%s\n", pack_basename(p));
> +		}
> +	}
> +
> +	for (i = 0; i < existing->non_kept_packs.nr; i++)
> +		fprintf(in, "-%s.pack\n",
> +			existing->non_kept_packs.items[i].string);

As far as I can see, the non-kept packs are passed to
git-pack-objects(1) both in the cases where we do collapse small cruft
packs and where we don't. Is there any particular reason why we handle
those in both code paths separately instead of merging that logic? Is
the ordering of packfiles important here?

> +	strbuf_release(&buf);
> +}
> +
>  static int write_cruft_pack(const struct pack_objects_args *args,
>  			    const char *destination,
>  			    const char *pack_prefix,
> @@ -853,10 +959,14 @@ static int write_cruft_pack(const struct pack_objects_args *args,
>  	in = xfdopen(cmd.in, "w");
>  	for_each_string_list_item(item, names)
>  		fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
> -	for_each_string_list_item(item, &existing->non_kept_packs)
> -		fprintf(in, "-%s.pack\n", item->string);
> -	for_each_string_list_item(item, &existing->cruft_packs)
> -		fprintf(in, "-%s.pack\n", item->string);
> +	if (args->max_pack_size && !cruft_expiration) {
> +		collapse_small_cruft_packs(in, args->max_pack_size, existing);
> +	} else {
> +		for_each_string_list_item(item, &existing->non_kept_packs)
> +			fprintf(in, "-%s.pack\n", item->string);
> +		for_each_string_list_item(item, &existing->cruft_packs)
> +			fprintf(in, "-%s.pack\n", item->string);
> +	}
>  	for_each_string_list_item(item, &existing->kept_packs)
>  		fprintf(in, "%s.pack\n", item->string);
>  	fclose(in);

Patrick

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux