Re: [PATCH 1/3] index-pack: dedup first during outgoing link check

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2024.12.02 12:18, Jonathan Tan wrote:
> Commit c08589efdc (index-pack: repack local links into promisor packs,
> 2024-11-01) fixed a bug with what was believed to be a negligible
> decrease in performance [1] [2]. But at $DAYJOB, with at least one repo,
> it was found that the decrease in performance was very significant.
> 
> Looking at the patch, whenever we parse an object in the packfile to
> be indexed, we check the targets of all its outgoing links for its
> existence. However, this could be optimized by first collecting all such
> targets into an oidset (thus deduplicating them) before checking. Teach
> Git to do that.
> 
> On a certain fetch from the aforementioned repo, this improved
> performance from approximately 7 hours to 24m47.815s. This number will
> be further reduced in a subsequent patch.
> 
> [1] https://lore.kernel.org/git/CAG1j3zGiNMbri8rZNaF0w+yP+6OdMz0T8+8_Wgd1R_p1HzVasg@xxxxxxxxxxxxxx/
> [2] https://lore.kernel.org/git/20241105212849.3759572-1-jonathantanmy@xxxxxxxxxx/
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
> ---
>  builtin/index-pack.c | 44 ++++++++++++++++++++++----------------------
>  1 file changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index 95babdc5ea..8e7d14c17e 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -155,11 +155,11 @@ static int input_fd, output_fd;
>  static const char *curr_pack;
>  
>  /*
> - * local_links is guarded by read_mutex, and record_local_links is read-only in
> - * a thread.
> + * outgoing_links is guarded by read_mutex, and record_outgoing_links is
> + * read-only in a thread.
>   */
> -static struct oidset local_links = OIDSET_INIT;
> -static int record_local_links;
> +static struct oidset outgoing_links = OIDSET_INIT;
> +static int record_outgoing_links;
>  
>  static struct thread_local_data *thread_data;
>  static int nr_dispatched;

We're renaming the oidset and flag because our purpose is now more
general. OK.


> @@ -812,18 +812,12 @@ static int check_collison(struct object_entry *entry)
>  	return 0;
>  }
>  
> -static void record_if_local_object(const struct object_id *oid)
> +static void record_outgoing_link(const struct object_id *oid)
>  {
> -	struct object_info info = OBJECT_INFO_INIT;
> -	if (oid_object_info_extended(the_repository, oid, &info, 0))
> -		/* Missing; assume it is a promisor object */
> -		return;
> -	if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
> -		return;
> -	oidset_insert(&local_links, oid);
> +	oidset_insert(&outgoing_links, oid);
>  }

We're now unconditionally recording linked objects, and this logic has
been moved below. Looks good.


> -static void do_record_local_links(struct object *obj)
> +static void do_record_outgoing_links(struct object *obj)
>  {
>  	if (obj->type == OBJ_TREE) {
>  		struct tree *tree = (struct tree *)obj;
> @@ -837,16 +831,16 @@ static void do_record_local_links(struct object *obj)
>  			 */
>  			return;
>  		while (tree_entry_gently(&desc, &entry))
> -			record_if_local_object(&entry.oid);
> +			record_outgoing_link(&entry.oid);
>  	} else if (obj->type == OBJ_COMMIT) {
>  		struct commit *commit = (struct commit *) obj;
>  		struct commit_list *parents = commit->parents;
>  
>  		for (; parents; parents = parents->next)
> -			record_if_local_object(&parents->item->object.oid);
> +			record_outgoing_link(&parents->item->object.oid);
>  	} else if (obj->type == OBJ_TAG) {
>  		struct tag *tag = (struct tag *) obj;
> -		record_if_local_object(get_tagged_oid(tag));
> +		record_outgoing_link(get_tagged_oid(tag));
>  	}
>  }
>  
> @@ -896,7 +890,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
>  		free(has_data);
>  	}
>  
> -	if (strict || do_fsck_object || record_local_links) {
> +	if (strict || do_fsck_object || record_outgoing_links) {
>  		read_lock();
>  		if (type == OBJ_BLOB) {
>  			struct blob *blob = lookup_blob(the_repository, oid);
> @@ -928,8 +922,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
>  				die(_("fsck error in packed object"));
>  			if (strict && fsck_walk(obj, NULL, &fsck_options))
>  				die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
> -			if (record_local_links)
> -				do_record_local_links(obj);
> +			if (record_outgoing_links)
> +				do_record_outgoing_links(obj);
>  
>  			if (obj->type == OBJ_TREE) {
>  				struct tree *item = (struct tree *) obj;
> @@ -1781,7 +1775,7 @@ static void repack_local_links(void)
>  	struct object_id *oid;
>  	char *base_name;
>  
> -	if (!oidset_size(&local_links))
> +	if (!oidset_size(&outgoing_links))
>  		return;
>  
>  	base_name = mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository));
> @@ -1795,8 +1789,14 @@ static void repack_local_links(void)
>  	if (start_command(&cmd))
>  		die(_("could not start pack-objects to repack local links"));
>  
> -	oidset_iter_init(&local_links, &iter);
> +	oidset_iter_init(&outgoing_links, &iter);
>  	while ((oid = oidset_iter_next(&iter))) {
> +		struct object_info info = OBJECT_INFO_INIT;
> +		if (oid_object_info_extended(the_repository, oid, &info, 0))
> +			/* Missing; assume it is a promisor object */
> +			continue;
> +		if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
> +			continue;
>  		if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
>  		    write_in_full(cmd.in, "\n", 1) < 0)
>  			die(_("failed to feed local object to pack-objects"));

We've moved our logic to skip promisor objects here, after potential
objects have been recorded to the oidset and thus deduped. Now we
`continue` the loop to skip promisor objects, whereas before we had an
early return to avoid recording them in the first place. Seems
straightforward.

> @@ -1899,7 +1899,7 @@ int cmd_index_pack(int argc,
>  			} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
>  				; /* nothing to do */
>  			} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
> -				record_local_links = 1;
> +				record_outgoing_links = 1;
>  			} else if (starts_with(arg, "--threads=")) {
>  				char *end;
>  				nr_threads = strtoul(arg+10, &end, 0);
> -- 
> 2.47.0.338.g60cca15819-goog
> 
> 




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux