Re: [PATCH 7/7] Accelerate rename_dst setup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Dec 06, 2020 at 02:54:36AM +0000, Elijah Newren via GitGitGadget wrote:
> diff --git a/diffcore-rename.c b/diffcore-rename.c
> index 816d2fbac44..fb3c2817c6b 100644
> --- a/diffcore-rename.c
> +++ b/diffcore-rename.c
> @@ -5,67 +5,64 @@
>  #include "cache.h"
>  #include "diff.h"
>  #include "diffcore.h"
> -#include "object-store.h"
>  #include "hashmap.h"
> +#include "object-store.h"

Shuffling around this object-store.h include looks like noise to me, but
maybe there's a good reason for doing it.

>  #include "progress.h"
>  #include "promisor-remote.h"
> +#include "strmap.h"
>
>  /* Table of rename/copy destinations */
>
>  static struct diff_rename_dst {
> -	struct diff_filespec *two;
> -	struct diff_filepair *pair;
> +	struct diff_filepair *p;
> +	struct diff_filespec *filespec_to_free;
> +	int is_rename; /* false -> just a create; true -> rename or copy */
>  } *rename_dst;
>  static int rename_dst_nr, rename_dst_alloc;
> +/* Mapping from break source pathname to break destination index */
> +static struct strintmap *break_idx = NULL;
>
> -static int find_rename_dst(struct diff_filespec *two)
> -{
> -	int first, last;
> -
> -	first = 0;
> -	last = rename_dst_nr;
> -	while (last > first) {
> -		int next = first + ((last - first) >> 1);
> -		struct diff_rename_dst *dst = &(rename_dst[next]);
> -		int cmp = strcmp(two->path, dst->two->path);
> -		if (!cmp)
> -			return next;
> -		if (cmp < 0) {
> -			last = next;
> -			continue;
> -		}
> -		first = next+1;
> -	}
> -	return -first - 1;
> -}
> -
> -static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two)
> +static struct diff_rename_dst *locate_rename_dst(struct diff_filepair *p)
>  {
> -	int ofs = find_rename_dst(two);
> -	return ofs < 0 ? NULL : &rename_dst[ofs];
> +	/* Lookup by p->ONE->path */
> +	int idx = break_idx ? strintmap_get(break_idx, p->one->path) : -1;

I had to lookup the behavior of strintmap_get() to realize that it
returns the map's "default value" to figure out why this double
ternary was necessary, but I think that it is.

Ideally, if break_idx is non-NULL, then we ought to be able to immediately
return NULL, but it's possible that break_idx is non-NULL and simply
doesn't contain p->one->path, in which case we also want to return NULL.

So, I think this is as clear as it can be.

> +	return (idx == -1) ? NULL : &rename_dst[idx];
>  }
>
>  /*
>   * Returns 0 on success, -1 if we found a duplicate.
>   */
> -static int add_rename_dst(struct diff_filespec *two)
> +static int add_rename_dst(struct diff_filepair *p)
>  {
> -	int first = find_rename_dst(two);
> -
> -	if (first >= 0)
> +	/*
> +	 * See t4058; trees might have duplicate entries.  I think
> +	 * trees with duplicate entries should be ignored and we
> +	 * should just leave rename detection on; while the results
> +	 * may be slightly harder to understand, that's merely a
> +	 * result of the underlying tree making no sense.  But I
> +	 * believe everything still works fine, the results do still
> +	 * make sense, and the extra overhead of doing this checking
> +	 * for a few historical weird trees from long ago seems like
> +	 * the dog wagging the tail to me.
> +	 *
> +	 * However: I don't feel like fighting that battle right now.
> +	 * For now, to keep the regression test passing, we have to
> +	 * detect it.  Since the diff machinery passes these to us in
> +	 * adjacent pairs, we just need to check to see if our name
> +	 * matches the previous one.
> +	 *
> +	 * TODO: Dispense with this test, rip out the test in t4058, and
> +	 * lobby folks for the change.
> +	 */
> +	if (rename_dst_nr > 0 &&
> +	    !strcmp(rename_dst[rename_dst_nr-1].p->two->path, p->two->path))
>  		return -1;
> -	first = -first - 1;
>
> -	/* insert to make it at "first" */
>  	ALLOC_GROW(rename_dst, rename_dst_nr + 1, rename_dst_alloc);
> +	rename_dst[rename_dst_nr].p = p;
> +	rename_dst[rename_dst_nr].filespec_to_free = NULL;
> +	rename_dst[rename_dst_nr].is_rename = 0;
>  	rename_dst_nr++;
> -	if (first < rename_dst_nr)
> -		MOVE_ARRAY(rename_dst + first + 1, rename_dst + first,
> -			   rename_dst_nr - first - 1);
> -	rename_dst[first].two = alloc_filespec(two->path);
> -	fill_filespec(rename_dst[first].two, &two->oid, two->oid_valid,
> -		      two->mode);
> -	rename_dst[first].pair = NULL;
>  	return 0;

Very nice, this is much simpler than what was here before.

> @@ -78,6 +75,14 @@ static int rename_src_nr, rename_src_alloc;
>
>  static void register_rename_src(struct diff_filepair *p)
>  {
> +	if (p->broken_pair) {
> +		if (!break_idx) {
> +			break_idx = xmalloc(sizeof(*break_idx));
> +			strintmap_init(break_idx, -1);
> +		}
> +		strintmap_set(break_idx, p->one->path, rename_dst_nr);
> +	}
> +

Makes sense.

> @@ -664,8 +653,9 @@ void diffcore_rename(struct diff_options *options)
>  			 */
>  			if (DIFF_PAIR_BROKEN(p)) {
>  				/* broken delete */
> -				struct diff_rename_dst *dst = locate_rename_dst(p->one);
> -				if (dst && dst->pair)
> +				struct diff_rename_dst *dst = locate_rename_dst(p);
> +				assert(dst);

You're not hurting anything, but I'm not convinced that this assert() is
buying you anything that the line immediately below it isn't already
doing.

The rest of the patch looks good to me.

Thanks,
Taylor



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux