On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote: I'm coming rather late to the party and simply want to review this so that the thread gets revived. So my context may be lacking, please forgive me if I am reopening things that were already discussed. > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..186f439ed7b 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,13 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this > + remote. The value is a space-separated list of ref patterns > + (e.g., "refs/heads/main !refs/heads/develop*"). This can be > + used to optimize fetch operations by specifying exactly which > + refs should be prefetched. I'm a bit surprised that we use a space-separated list here instead of accepting a multi-valued config like we do for "remote.<name>.fetch". Shouldn't we use the format here to make things more consistent? > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..74603cfabe0 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs) > } > } > > +static int pattern_matches_ref(const char *pattern, const char *refname) > +{ > + if (strchr(pattern, '*')) > + return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; > + return strcmp(pattern, refname) == 0; > +} > + > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > +{ > + int has_positive = 0, matched_positive = 0, matched_negative = 0; > + > + for (int i = 0; i < prefetch_refs->nr; i++) { > + const char *pattern = prefetch_refs->items[i].string; > + int is_negative = (*pattern == '!'); > + if (is_negative) pattern++; > + else has_positive = 1; > + > + if (pattern_matches_ref(pattern, refname)) { > + if (is_negative) matched_negative = 1; > + else matched_positive = 1; > + } > + } > + > + return has_positive ? (matched_positive && !matched_negative) : !matched_negative; > +} This is essentially open-coding a bunch of logic around how we parse refspecs. I'd propose to instead use the APIs we already have in this area, namely those in "refspec.h". > static struct ref *get_ref_map(struct remote *remote, > const struct ref *remote_refs, > struct refspec *rs, > @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote, > struct hashmap existing_refs; > int existing_refs_populated = 0; > > + struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; > + struct ref *next; > + We don't typically have empty lines between variable declarations. > filter_prefetch_refspec(rs); > + > if (remote) > filter_prefetch_refspec(&remote->fetch); > > @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote, > else > ref_map = apply_negative_refspecs(ref_map, &remote->fetch); > > + /** > + * Filter out advertised refs that we don't want to fetch during > + * prefetch if a prefetchref config is set > + */ Our comments typically start with `/*`, not `/**`. > + if (prefetch && remote->prefetch_refs.nr) { > + prefetch_filtered_ref_map = NULL; > + ref_map_tail = &prefetch_filtered_ref_map; As far as I can see, both of these variables have already been initialized beforehand. > + for (rm = ref_map; rm; rm = next) { > + next = rm->next; > + rm->next = NULL; > + > + if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { > + *ref_map_tail = rm; > + ref_map_tail = &rm->next; > + } else { > + free_one_ref(rm); > + } > + } > + ref_map = prefetch_filtered_ref_map; > + } > + > ref_map = ref_remove_duplicates(ref_map); > > for (rm = ref_map; rm; rm = rm->next) { > diff --git a/remote.c b/remote.c > index 8f3dee13186..6752c73370f 100644 > --- a/remote.c > +++ b/remote.c > @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, > ret->prune = -1; /* unspecified */ > ret->prune_tags = -1; /* unspecified */ > ret->name = xstrndup(name, len); > + string_list_init_dup(&ret->prefetch_refs); > refspec_init(&ret->push, REFSPEC_PUSH); > refspec_init(&ret->fetch, REFSPEC_FETCH); > > @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) > free((char *)remote->uploadpack); > FREE_AND_NULL(remote->http_proxy); > FREE_AND_NULL(remote->http_proxy_authmethod); > + string_list_clear(&remote->prefetch_refs, 0); > } > > static void add_merge(struct branch *branch, const char *name) > @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, > remote->prune = git_config_bool(key, value); > else if (!strcmp(subkey, "prunetags")) > remote->prune_tags = git_config_bool(key, value); > + else if (!strcmp(subkey, "prefetchref")) { > + if (!value) > + return config_error_nonbool(key); > + string_list_split(&remote->prefetch_refs, value, ' ', -1); > + return 0; > + } > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote) > return remote->pushurl.nr ? &remote->pushurl : &remote->url; > } > > -static int match_name_with_pattern(const char *key, const char *name, > +int match_refspec_name_with_pattern(const char *key, const char *name, > const char *value, char **result) > { > const char *kstar = strchr(key, '*'); Is this rename really necessary? It is not mentioned in the commit message, so this is surprising to me. If it really was necessary it should be split out into a separate commit that also explains why you think that this is a good idea. > diff --git a/remote.h b/remote.h > index b901b56746d..9ffef206f23 100644 > --- a/remote.h > +++ b/remote.h > @@ -5,6 +5,7 @@ > #include "hashmap.h" > #include "refspec.h" > #include "strvec.h" > +#include "string-list.h" > > struct option; > struct transport_ls_refs_options; > @@ -77,6 +78,8 @@ struct remote { > > struct refspec fetch; > > + struct string_list prefetch_refs; > + > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref > > int check_ref_type(const struct ref *ref, int flags); > > +int match_refspec_name_with_pattern(const char *key, const char *name, > + const char *value, char **result); I think instead of exposing this function we should rather expose `refspec_match()`, which is at a higher level and knows to handle the cases for us where the refspec is a pattern and when it's not. Patrick