Re: [PATCH v2 14/16] refs/iterator: implement seeking for `packed-ref` iterators

shejialuo <shejialuo@xxxxxxxxx> · Mon, 24 Feb 2025 23:09:32 +0800

On Wed, Feb 19, 2025 at 02:23:41PM +0100, Patrick Steinhardt wrote:
> Implement seeking of `packed-ref` iterators. The implementation is again
> straight forward, except that we cannot continue to use the prefix
> iterator as we would otherwise not be able to reseek the iterator
> anymore in case one first asks for an empty and then for a non-empty
> prefix. Instead, we open-code the logic to in `advance()`.
> 
> Signed-off-by: Patrick Steinhardt <ps@xxxxxx>
> ---
>  refs/packed-backend.c | 62 +++++++++++++++++++++++++++++++++------------------
>  1 file changed, 40 insertions(+), 22 deletions(-)
> 
> diff --git a/refs/packed-backend.c b/refs/packed-backend.c
> index 38a1956d1a8..71a38acfedc 100644
> --- a/refs/packed-backend.c
> +++ b/refs/packed-backend.c
> @@ -819,6 +819,8 @@ struct packed_ref_iterator {
>  
>  	struct snapshot *snapshot;
>  
> +	char *prefix;
> +
>  	/* The current position in the snapshot's buffer: */
>  	const char *pos;
>  
> @@ -841,11 +843,9 @@ struct packed_ref_iterator {
>  };
>  
>  /*
> - * Move the iterator to the next record in the snapshot, without
> - * respect for whether the record is actually required by the current
> - * iteration. Adjust the fields in `iter` and return `ITER_OK` or
> - * `ITER_DONE`. This function does not free the iterator in the case
> - * of `ITER_DONE`.
> + * Move the iterator to the next record in the snapshot. Adjust the fields in
> + * `iter` and return `ITER_OK` or `ITER_DONE`. This function does not free the
> + * iterator in the case of `ITER_DONE`.
>   */
>  static int next_record(struct packed_ref_iterator *iter)
>  {
> @@ -942,6 +942,9 @@ static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  	int ok;
>  
>  	while ((ok = next_record(iter)) == ITER_OK) {
> +		const char *refname = iter->base.refname;
> +		const char *prefix = iter->prefix;
> +
>  		if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY &&
>  		    !is_per_worktree_ref(iter->base.refname))
>  			continue;
> @@ -951,12 +954,41 @@ static int packed_ref_iterator_advance(struct ref_iterator *ref_iterator)
>  					    &iter->oid, iter->flags))
>  			continue;
>  
> +		while (prefix && *prefix) {
> +			if (*refname < *prefix)
> +				BUG("packed-refs backend yielded reference preceding its prefix");
> +			else if (*refname > *prefix)
> +				return ITER_DONE;
> +			prefix++;
> +			refname++;
> +		}

Although I cannot understand the code, I want to ask a question here, we
we need to do this in `advance`? Should we check this for
`packed_ref_iterator_seek` or in the `next_record` function?

Before we introduce `seek`, we don't need this logic. I somehow think we
should do this in `packed_ref_iterator_seek`.

> +
>  		return ITER_OK;
>  	}
>  
>  	return ok;
>  }
>  
> +static int packed_ref_iterator_seek(struct ref_iterator *ref_iterator,
> +				    const char *prefix)
> +{
> +	struct packed_ref_iterator *iter =
> +		(struct packed_ref_iterator *)ref_iterator;
> +	const char *start;
> +
> +	if (prefix && *prefix)
> +		start = find_reference_location(iter->snapshot, prefix, 0);
> +	else
> +		start = iter->snapshot->start;
> +
> +	free(iter->prefix);
> +	iter->prefix = xstrdup_or_null(prefix);
> +	iter->pos = start;
> +	iter->eof = iter->snapshot->eof;
> +
> +	return 0;
> +}
> +
>  static int packed_ref_iterator_peel(struct ref_iterator *ref_iterator,
>  				   struct object_id *peeled)
>  {
> @@ -979,11 +1011,13 @@ static void packed_ref_iterator_release(struct ref_iterator *ref_iterator)
>  		(struct packed_ref_iterator *)ref_iterator;
>  	strbuf_release(&iter->refname_buf);
>  	free(iter->jump);
> +	free(iter->prefix);
>  	release_snapshot(iter->snapshot);
>  }
>  
>  static struct ref_iterator_vtable packed_ref_iterator_vtable = {
>  	.advance = packed_ref_iterator_advance,
> +	.seek = packed_ref_iterator_seek,
>  	.peel = packed_ref_iterator_peel,
>  	.release = packed_ref_iterator_release,
>  };
> @@ -1097,7 +1131,6 @@ static struct ref_iterator *packed_ref_iterator_begin(
>  {
>  	struct packed_ref_store *refs;
>  	struct snapshot *snapshot;
> -	const char *start;
>  	struct packed_ref_iterator *iter;
>  	struct ref_iterator *ref_iterator;
>  	unsigned int required_flags = REF_STORE_READ;
> @@ -1113,14 +1146,6 @@ static struct ref_iterator *packed_ref_iterator_begin(
>  	 */
>  	snapshot = get_snapshot(refs);
>  
> -	if (prefix && *prefix)
> -		start = find_reference_location(snapshot, prefix, 0);
> -	else
> -		start = snapshot->start;
> -
> -	if (start == snapshot->eof)
> -		return empty_ref_iterator_begin();
> -

So, we don't return empty ref iterator. This is the same motivation like
the previous patch.

>  	CALLOC_ARRAY(iter, 1);
>  	ref_iterator = &iter->base;
>  	base_ref_iterator_init(ref_iterator, &packed_ref_iterator_vtable);
> @@ -1130,19 +1155,12 @@ static struct ref_iterator *packed_ref_iterator_begin(
>  
>  	iter->snapshot = snapshot;
>  	acquire_snapshot(snapshot);
> -
> -	iter->pos = start;
> -	iter->eof = snapshot->eof;
>  	strbuf_init(&iter->refname_buf, 0);
> -
>  	iter->base.oid = &iter->oid;
> -
>  	iter->repo = ref_store->repo;
>  	iter->flags = flags;
>  
> -	if (prefix && *prefix)
> -		/* Stop iteration after we've gone *past* prefix: */
> -		ref_iterator = prefix_ref_iterator_begin(ref_iterator, prefix, 0);
> +	packed_ref_iterator_seek(&iter->base, prefix);

Why don't we check the return value here? Actually, in the previous
patch, `cache_ref_iterator_seek` will always return 0, but you still
check. I have thought that you just want to be more defensive.

Thanks,
Jialuo