Re: [PATCH v4] remote: allow specifying refs to prefetch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:

I'm coming rather late to the party and simply want to review this so
that the thread gets revived. So my context may be lacking, please
forgive me if I am reopening things that were already discussed.

> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> index 8efc53e836d..186f439ed7b 100644
> --- a/Documentation/config/remote.txt
> +++ b/Documentation/config/remote.txt
> @@ -33,6 +33,13 @@ remote.<name>.fetch::
>  	The default set of "refspec" for linkgit:git-fetch[1]. See
>  	linkgit:git-fetch[1].
>  
> +remote.<name>.prefetchref::
> +	Specify the refs to be prefetched when fetching from this
> +	remote. The value is a space-separated list of ref patterns
> +	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
> +	used to optimize fetch operations by specifying exactly which
> +	refs should be prefetched.

I'm a bit surprised that we use a space-separated list here instead of
accepting a multi-valued config like we do for "remote.<name>.fetch".
Shouldn't we use the format here to make things more consistent?

>  remote.<name>.push::
>  	The default set of "refspec" for linkgit:git-push[1]. See
>  	linkgit:git-push[1].
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index b2b5aee5bf2..74603cfabe0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs)
>  	}
>  }
>  
> +static int pattern_matches_ref(const char *pattern, const char *refname)
> +{
> +	if (strchr(pattern, '*'))
> +		return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
> +	return strcmp(pattern, refname) == 0;
> +}
> +
> +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
> +{
> +	int has_positive = 0, matched_positive = 0, matched_negative = 0;
> +
> +	for (int i = 0; i < prefetch_refs->nr; i++) {
> +		const char *pattern = prefetch_refs->items[i].string;
> +		int is_negative = (*pattern == '!');
> +		if (is_negative) pattern++;
> +		else has_positive = 1;
> +
> +		if (pattern_matches_ref(pattern, refname)) {
> +			if (is_negative) matched_negative = 1;
> +			else matched_positive = 1;
> +		}
> +	}
> +
> +	return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
> +}

This is essentially open-coding a bunch of logic around how we parse
refspecs. I'd propose to instead use the APIs we already have in this
area, namely those in "refspec.h".

>  static struct ref *get_ref_map(struct remote *remote,
>  			       const struct ref *remote_refs,
>  			       struct refspec *rs,
> @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote,
>  	struct hashmap existing_refs;
>  	int existing_refs_populated = 0;
>  
> +	struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
> +	struct ref *next;
> +

We don't typically have empty lines between variable declarations.

>  	filter_prefetch_refspec(rs);
> +
>  	if (remote)
>  		filter_prefetch_refspec(&remote->fetch);
>  
> @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote,
>  	else
>  		ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
>  
> +	/**
> +	 * Filter out advertised refs that we don't want to fetch during
> +	 * prefetch if a prefetchref config is set
> +	 */

Our comments typically start with `/*`, not `/**`.

> +	if (prefetch && remote->prefetch_refs.nr) {
> +		prefetch_filtered_ref_map = NULL;
> +		ref_map_tail = &prefetch_filtered_ref_map;

As far as I can see, both of these variables have already been
initialized beforehand.

> +		for (rm = ref_map; rm; rm = next) {
> +			next = rm->next;
> +			rm->next = NULL;
> +
> +			if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
> +				*ref_map_tail = rm;
> +				ref_map_tail = &rm->next;
> +			} else {
> +				free_one_ref(rm);
> +			}
> +		}
> +		ref_map = prefetch_filtered_ref_map;
> +	}
> +
>  	ref_map = ref_remove_duplicates(ref_map);
>  
>  	for (rm = ref_map; rm; rm = rm->next) {
> diff --git a/remote.c b/remote.c
> index 8f3dee13186..6752c73370f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state,
>  	ret->prune = -1;  /* unspecified */
>  	ret->prune_tags = -1;  /* unspecified */
>  	ret->name = xstrndup(name, len);
> +	string_list_init_dup(&ret->prefetch_refs);
>  	refspec_init(&ret->push, REFSPEC_PUSH);
>  	refspec_init(&ret->fetch, REFSPEC_FETCH);
>  
> @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote)
>  	free((char *)remote->uploadpack);
>  	FREE_AND_NULL(remote->http_proxy);
>  	FREE_AND_NULL(remote->http_proxy_authmethod);
> +	string_list_clear(&remote->prefetch_refs, 0);
>  }
>  
>  static void add_merge(struct branch *branch, const char *name)
> @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value,
>  		remote->prune = git_config_bool(key, value);
>  	else if (!strcmp(subkey, "prunetags"))
>  		remote->prune_tags = git_config_bool(key, value);
> +	else if (!strcmp(subkey, "prefetchref")) {
> +		if (!value)
> +			return config_error_nonbool(key);
> +		string_list_split(&remote->prefetch_refs, value, ' ', -1);
> +		return 0;
> +	}
>  	else if (!strcmp(subkey, "url")) {
>  		if (!value)
>  			return config_error_nonbool(key);
> @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote)
>  	return remote->pushurl.nr ? &remote->pushurl : &remote->url;
>  }
>  
> -static int match_name_with_pattern(const char *key, const char *name,
> +int match_refspec_name_with_pattern(const char *key, const char *name,
>  				   const char *value, char **result)
>  {
>  	const char *kstar = strchr(key, '*');

Is this rename really necessary? It is not mentioned in the commit
message, so this is surprising to me. If it really was necessary it
should be split out into a separate commit that also explains why you
think that this is a good idea.

> diff --git a/remote.h b/remote.h
> index b901b56746d..9ffef206f23 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -5,6 +5,7 @@
>  #include "hashmap.h"
>  #include "refspec.h"
>  #include "strvec.h"
> +#include "string-list.h"
>  
>  struct option;
>  struct transport_ls_refs_options;
> @@ -77,6 +78,8 @@ struct remote {
>  
>  	struct refspec fetch;
>  
> +	struct string_list prefetch_refs;
> +
>  	/*
>  	 * The setting for whether to fetch tags (as a separate rule from the
>  	 * configured refspecs);
> @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>  
>  int check_ref_type(const struct ref *ref, int flags);
>  
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> +					const char *value, char **result);

I think instead of exposing this function we should rather expose
`refspec_match()`, which is at a higher level and knows to handle the
cases for us where the refspec is a pattern and when it's not.

Patrick




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux