Re: [PATCH 00/17] Clean up how fetch_pack() handles the heads list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 08/23/2012 10:31 PM, Jeff King wrote:
> On Thu, Aug 23, 2012 at 03:56:48PM -0400, Jeff King wrote:
> 
>> This code blames back to:
>>
>>  commit 4bcb310c2539b66d535e87508d1b7a90fe29c083
>>  Author: Alexandre Julliard <julliard@xxxxxxxxxx>
>>  Date:   Fri Nov 24 16:00:13 2006 +0100
>>
>>     fetch-pack: Do not fetch tags for shallow clones.
>>
>>     A better fix may be to only fetch tags that point to commits that we
>>     are downloading, but git-clone doesn't have support for following
>>     tags. This will happen automatically on the next git-fetch though.
>>
>> So it is about making sure that "git clone --depth=1" does not
>> accidentally pull a single commit from v1.0, v1.1, v1.2, and so forth,
>> losing the purpose of using --depth in the first place. These days it is
>> largely irrelevant, since this code path is not followed by clone, and
>> clone will automatically restrict its list of fetched refs to a single
>> branch if --depth is used.
> 
> I think part of the confusion of this code is that inside the loop over
> the refs it is sometimes checking aspects of the ref, and sometimes
> checking invariants of the loop (like args.fetch_all). Splitting it into
> separate loops makes it easier to see what is going on, like the patch
> below (on top of Michael's series).
> 
> I'm not sure if it ends up being more readable, since the generic "cut
> down this linked list" function has to operate through callbacks with a
> void pointer. On the other hand, that function could also be used
> elsewhere.
> 
> diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
> index 90683ca..877cf38 100644
> --- a/builtin/fetch-pack.c
> +++ b/builtin/fetch-pack.c
> @@ -521,51 +521,80 @@ static void mark_recent_complete_commits(unsigned long cutoff)
>  	}
>  }
>  
> -static void filter_refs(struct ref **refs, int *nr_heads, char **heads)
> +static void filter_refs_callback(struct ref **refs,
> +				 int (*want)(struct ref *, void *),
> +				 void *data)
>  {
> -	struct ref *newlist = NULL;
> -	struct ref **newtail = &newlist;
> +	struct ref **tail = refs;
>  	struct ref *ref, *next;
> -	int match_pos = 0, unmatched = 0;
>  
>  	for (ref = *refs; ref; ref = next) {
> -		int keep_ref = 0;
>  		next = ref->next;
> -		if (!memcmp(ref->name, "refs/", 5) &&
> -		    check_refname_format(ref->name, 0))
> -			; /* trash */
> -		else if (args.fetch_all &&
> -			   (!args.depth || prefixcmp(ref->name, "refs/tags/")))
> -			keep_ref = 1;
> -		else
> -			while (match_pos < *nr_heads) {
> -				int cmp = strcmp(ref->name, heads[match_pos]);
> -				if (cmp < 0) { /* definitely do not have it */
> -					break;
> -				} else if (cmp == 0) { /* definitely have it */
> -					free(heads[match_pos++]);
> -					keep_ref = 1;
> -					break;
> -				} else { /* might have it; keep looking */
> -					heads[unmatched++] = heads[match_pos++];
> -				}
> -			}
> -
> -		if (keep_ref) {
> -			*newtail = ref;
> -			ref->next = NULL;
> -			newtail = &ref->next;
> -		} else {
> +		if (want(ref, data))
> +			tail = &ref->next;
> +		else {
>  			free(ref);
> +			*tail = next;
>  		}
>  	}
> +}
>  
> -	/* copy any remaining unmatched heads: */
> -	while (match_pos < *nr_heads)
> -		heads[unmatched++] = heads[match_pos++];
> -	*nr_heads = unmatched;
> +static int ref_name_is_ok(struct ref *ref, void *data)
> +{
> +	return memcmp(ref->name, "refs/", 5) ||
> +		!check_refname_format(ref->name, 0);
> +}
> +
> +static int ref_ok_for_shallow(struct ref *ref, void *data)
> +{
> +	return prefixcmp(ref->name, "refs/tags/");
> +}
>  
> -	*refs = newlist;
> +struct filter_by_name_data {
> +	char **heads;
> +	int nr_heads;
> +	int match_pos;
> +	int unmatched;
> +};
> +
> +static int want_ref_name(struct ref *ref, void *data)
> +{
> +	struct filter_by_name_data *f = data;
> +
> +	while (f->match_pos < f->nr_heads) {
> +		int cmp = strcmp(ref->name, f->heads[f->match_pos]);
> +		if (cmp < 0) /* definitely do not have it */
> +			return 0;
> +		else if (cmp == 0) { /* definitely have it */
> +			free(f->heads[f->match_pos++]);
> +			return 1;
> +		} else /* might have it; keep looking */
> +			f->heads[f->unmatched++] = f->heads[f->match_pos++];
> +	}
> +	return 0;
> +}
> +
> +static void filter_refs(struct ref **refs, int *nr_heads, char **heads)
> +{
> +	struct filter_by_name_data f;
> +
> +	filter_refs_callback(refs, ref_name_is_ok, NULL);
> +
> +	if (args.fetch_all) {
> +		if (args.depth)
> +			filter_refs_callback(refs, ref_ok_for_shallow, NULL);
> +		return;
> +	}
> +
> +	memset(&f, 0, sizeof(f));
> +	f.heads = heads;
> +	f.nr_heads = *nr_heads;
> +	filter_refs_callback(refs, want_ref_name, &f);
> +
> +	/* copy any remaining unmatched heads: */
> +	while (f.match_pos < f.nr_heads)
> +		heads[f.unmatched++] = heads[f.match_pos++];
> +	*nr_heads = f.unmatched;
>  }
>  
>  static void mark_alternate_complete(const struct ref *ref, void *unused)
> 

I need a sanity check here.  I don't see that it is forbidden to call
"git fetch-pack --all --depth=N" and also specify some explicit
references.  (This usage would make it possible for the user to do a
shallow clone while also retrieving some specific tags.)

However, if I try to do this I get (before your change)

    $ git fetch-pack -v --all --depth=1 $URL refs/heads/master
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (HEAD)
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (refs/heads/master)
    done
    remote: Counting objects: 6, done.
    remote: Compressing objects: 100% (4/4), done.
    remote: Total 6 (delta 0), reused 0 (delta 0)
    Unpacking objects: 100% (6/6), done.
    error: no such remote ref refs/heads/master
    9c9f2f4453a7260d0d60926c73811f025be98ded HEAD
    9c9f2f4453a7260d0d60926c73811f025be98ded refs/heads/master

Note the error message "no such remote ref refs/heads/master", which is
incorrect.  This happens because when the (fetch_all && (!depth ||
ref_non_tag(ref))) branch of the if statement succeeds, then the
look-in-heads-list branch is never executed, and therefore the requested
reference is never marked as having been found among the remote references.

On the other hand, if I try the same with an (existing) tag, before your
change I get

    $ git fetch-pack -v --all --depth=1 $URL refs/tags/c
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    fatal: The remote end hung up unexpectedly

Now this is obviously broken in git-fetch-pack, because when fetch_all
is specified, return_refs is never initialized, but the
look-in-heads-list branch of the if is executed nevertheless.

It is not hard to fix these problems in the old code: (1)
unconditionally initialize return_refs, and (2) exchange the
look-in-heads-list handling and the fetch_all handling.


After your change the first problem (spurious error when asking for a
head) remains unchanged.  The second problem is changed from a crash to
a misbehavior:

    $ git fetch-pack -v --all --depth=1 $URL refs/tags/c
    Initialized empty Git repository in /home/mhagger/tmp/cl/c/.git/
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (HEAD)
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (refs/heads/master)
    done
    remote: Counting objects: 6, done.
    remote: Compressing objects: 100% (4/4), done.
    remote: Total 6 (delta 0), reused 0 (delta 0)
    Unpacking objects: 100% (6/6), done.
    error: no such remote ref refs/tags/c
    9c9f2f4453a7260d0d60926c73811f025be98ded HEAD
    9c9f2f4453a7260d0d60926c73811f025be98ded refs/heads/master

Note that "refs/tags/c" is not obtained from the remote even though it
was specifically requested.  The reason that your patch makes the crash
disappear is that when fetch_all is true, then the look-in-heads-list
branch is not executed at all -- but this is also why "refs/tags/c" is
not obtained.

So, assuming that we want to support

    git fetch-pack --all --depth=N $URL refs/tags/TAG

correctly, then your filter_refs_callback() refactoring won't work, at
least not the way that you have written it.  I will continue working on
this.

Michael

-- 
Michael Haggerty
mhagger@xxxxxxxxxxxx
http://softwareswirl.blogspot.com/
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]