Re: [PATCHv3 20/20] diff.c: color moved lines differently

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 18 May 2017 12:37:46 -0700
Stefan Beller <sbeller@xxxxxxxxxx> wrote:

[snip]

> Instead this provides a dynamic programming greedy algorithm that

Not sure if this is called "dynamic programming".

> finds the largest moved hunk and then switches color to the
> alternative color for the next hunk. By doing this any permutation is
> recognized and displayed. That implies that there is no dedicated
> boundary or inside-hunk color, but instead we'll have just two colors
> alternating for hunks.

[snip]

I would title this "color moved blocks differently" to emphasize that we
are treating the moves in terms of blocks, not just lines.

The first part of the commit message could probably be written more
concisely, like the following:

  When a patch consists mostly of moving blocks of code around, it can
  be quite tedious to ensure that the blocks are moved verbatim, and not
  undesirably modified in the move. To that end, color blocks that are
  moved within the same patch differently. For example (OM, del, add,
  and NM are different colors):

    [OM]  -void sensitive_stuff(void)
    [OM]  -{
    [OM]  -        if (!is_authorized_user())
    [OM]  -                die("unauthorized");
    [OM]  -        sensitive_stuff(spanning,
    [OM]  -                        multiple,
    [OM]  -                        lines);
    [OM]  -}

           void another_function()
           {
    [del] -        printf("foo");
    [add] +        printf("bar");
           }

    [NM]  +void sensitive_stuff(void)
    [NM]  +{
    [NM]  +        if (!is_authorized_user())
    [NM]  +                die("unauthorized");
    [NM]  +        sensitive_stuff(spanning,
    [NM]  +                        multiple,
    [NM]  +                        lines);
    [NM]  +}

  Adjacent blocks are colored differently. For example, in this
  potentially malicious patch, the swapping of blocks can be spotted:

    [OM]  -void sensitive_stuff(void)
    [OM]  -{
    [OMA] -        if (!is_authorized_user())
    [OMA] -                die("unauthorized");
    [OM]  -        sensitive_stuff(spanning,
    [OM]  -                        multiple,
    [OM]  -                        lines);
    [OMA] -}

           void another_function()
           {
    [del] -        printf("foo");
    [add] +        printf("bar");
           }

    [NM]  +void sensitive_stuff(void)
    [NM]  +{
    [NMA] +        sensitive_stuff(spanning,
    [NMA] +                        multiple,
    [NMA] +                        lines);
    [NM]  +        if (!is_authorized_user())
    [NM]  +                die("unauthorized");
    [NMA] +}

Having said that, thanks - this version is much more like what I would
expect.

> +static int buffered_patch_line_cmp_no_ws(const struct buffered_patch_line *a,
> +					 const struct buffered_patch_line *b,
> +					 const void *keydata)
> +{
> +	int ret;
> +	struct strbuf sba = STRBUF_INIT;
> +	struct strbuf sbb = STRBUF_INIT;
> +
> +	get_ws_cleaned_string(a, &sba);
> +	get_ws_cleaned_string(b, &sbb);
> +	ret = sba.len != sbb.len || strncmp(sba.buf, sbb.buf, sba.len);
> +	strbuf_release(&sba);
> +	strbuf_release(&sbb);
> +	return ret;
> +}
> +
> +static int buffered_patch_line_cmp(const struct buffered_patch_line *a,
> +				   const struct buffered_patch_line *b,
> +				   const void *keydata)
> +{
> +	return a->len != b->len || strncmp(a->line, b->line, a->len);
> +}

Instead of having 2 versions of all the comparison functions, could the
ws-ness be passed as the keydata?

> +static unsigned get_line_hash(struct buffered_patch_line *line, unsigned ignore_ws)
> +{
> +	static struct strbuf sb = STRBUF_INIT;
> +
> +	if (ignore_ws) {
> +		strbuf_reset(&sb);
> +		get_ws_cleaned_string(line, &sb);

Memory leak here, I think.

> +		return memhash(sb.buf, sb.len);
> +	} else {
> +		return memhash(line->line, line->len);
> +	}
> +}

[snip]

> +static void add_lines_to_move_detection(struct diff_options *o)
> +{
> +	struct moved_entry *prev_line;

gcc says (rightly) that this must be initialized.

> +
> +	int n;
> +	for (n = 0; n < o->line_buffer_nr; n++) {
> +		int sign = 0;
> +		struct hashmap *hm;
> +		struct moved_entry *key;
> +
> +		switch (o->line_buffer[n].sign) {
> +		case '+':
> +			sign = '+';
> +			hm = o->added_lines;
> +			break;
> +		case '-':
> +			sign = '-';
> +			hm = o->deleted_lines;
> +			break;
> +		case ' ':
> +		default:
> +			prev_line = NULL;
> +			continue;
> +		}
> +
> +		key = prepare_entry(o, n);
> +		if (prev_line &&
> +		    prev_line->line->sign == sign)
> +			prev_line->next_line = key;
> +
> +		hashmap_add(hm, key);
> +		prev_line = key;
> +	}
> +}
> +
> +static void mark_color_as_moved(struct diff_options *o)
> +{
> +	struct moved_entry **pmb = NULL; /* potentially moved blocks */
> +	int pmb_nr = 0, pmb_alloc = 0;
> +	int alt_flag = 0;

Probably call this "use_alt_color" or something similar.

> +	int n;
> +
> +	for (n = 0; n < o->line_buffer_nr; n++) {
> +		struct hashmap *hm = NULL;
> +		struct moved_entry *key;
> +		struct moved_entry *match = NULL;
> +		struct buffered_patch_line *l = &o->line_buffer[n];
> +		int i, lp, rp;
> +
> +		switch (l->sign) {
> +		case '+':
> +			hm = o->deleted_lines;
> +			break;
> +		case '-':
> +			hm = o->added_lines;
> +			break;
> +		default:
> +			alt_flag = 0; /* reset to standard, no-alt move color */
> +			pmb_nr = 0; /* no running sets */
> +			continue;
> +		}
> +
> +		/* Check for any match to color it as a move. */
> +		key = prepare_entry(o, n);
> +		match = hashmap_get(hm, key, o);
> +		free(key);
> +		if (!match)
> +			continue;
> +
> +		/* Check any potential block runs, advance each or nullify */
> +		for (i = 0; i < pmb_nr; i++) {
> +			struct moved_entry *p = pmb[i];
> +			struct moved_entry *pnext = (p && p->next_line) ?
> +					p->next_line : NULL;
> +			if (pnext &&
> +			    !buffered_patch_line_cmp(pnext->line, l, o)) {
> +				pmb[i] = p->next_line;
> +			} else {
> +				pmb[i] = NULL;
> +			}

Memory leak of pmb[i] somewhere here?

> +		}
> +
> +		/* Shrink the set to the remaining runs */
> +		for (lp = 0, rp = pmb_nr - 1; lp <= rp;) {
> +			while (lp < pmb_nr && pmb[lp])
> +				lp ++;
> +			/* lp points at the first NULL now */
> +
> +			while (rp > -1 && !pmb[rp])
> +				rp--;
> +			/* rp points at the last non-NULL */
> +
> +			if (lp < pmb_nr && rp > -1 && lp < rp) {
> +				pmb[lp] = pmb[rp];
> +				pmb[rp] = NULL;
> +				rp--;
> +				lp++;
> +			}
> +		}
> +
> +		if (rp > -1) {
> +			/* Remember the number of running sets */
> +			pmb_nr = rp + 1;
> +		} else {
> +			/* Toggle color */
> +			alt_flag = (alt_flag + 1) % 2;
> +
> +			/* Build up a new set */
> +			pmb_nr = 0;
> +			for (; match; match = hashmap_get_next(hm, match)) {
> +				ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
> +				pmb[pmb_nr++] = match;
> +			}
> +		}
> +
> +		switch (l->sign) {
> +		case '+':
> +			l->set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED + alt_flag);
> +			break;
> +		case '-':
> +			l->set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED + alt_flag);
> +			break;
> +		default:
> +			die("BUG: we should have continued earlier?");
> +		}
> +	}
> +	free(pmb);
> +}

[snip]

> @@ -4874,6 +5114,11 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
>  
>  	if (o->use_buffer) {
> +		if (o->color_moved) {

Can you just declare the two hashmaps here, so that we do not need to
put them in o? They don't seem to be used outside this block anyway.

> +			add_lines_to_move_detection(o);
> +			mark_color_as_moved(o);
> +		}
> +
>  		for (i = 0; i < o->line_buffer_nr; i++)
>  			emit_buffered_patch_line(o, &o->line_buffer[i]); 

[snip]

> diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
> index 289806d0c7..232d9ad55e 100755
> --- a/t/t4015-diff-whitespace.sh
> +++ b/t/t4015-diff-whitespace.sh

As for the tests, also add a test checking the interaction with
whitespace highlighting, and a test showing that diff errors out if we
ask for both move coloring and word-by-word diffing.



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]