Re: [PATCH v2 7/8] diff: add ability to insert additional headers for paths

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Dec 25, 2021 at 07:59:18AM +0000, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@xxxxxxxxx>
> 
> When additional headers are provided, we need to
>   * add diff_filepairs to diff_queued_diff for each paths in the
>     additional headers map which, unless that path is part of
>     another diff_filepair already found in diff_queued_diff
>   * format the headers (colorization, line_prefix for --graph)
>   * make sure the various codepaths that attempt to return early
>     if there are "no changes" take into account the headers that
>     need to be shown.
> 
> Signed-off-by: Elijah Newren <newren@xxxxxxxxx>
> ---
>  diff.c     | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  diff.h     |   3 +-
>  log-tree.c |   2 +-
>  3 files changed, 115 insertions(+), 6 deletions(-)
> 
> diff --git a/diff.c b/diff.c
> index 861282db1c3..aaa6a19f158 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -27,6 +27,7 @@
>  #include "help.h"
>  #include "promisor-remote.h"
>  #include "dir.h"
> +#include "strmap.h"
>  
>  #ifdef NO_FAST_WORKING_DIRECTORY
>  #define FAST_WORKING_DIRECTORY 0
> @@ -3406,6 +3407,31 @@ struct userdiff_driver *get_textconv(struct repository *r,
>  	return userdiff_get_textconv(r, one->driver);
>  }
>  
> +static struct strbuf *additional_headers(struct diff_options *o,
> +					 const char *path)
> +{
> +	if (!o->additional_path_headers)
> +		return NULL;
> +	return strmap_get(o->additional_path_headers, path);
> +}
> +
> +static void add_formatted_headers(struct strbuf *msg,
> +				  struct strbuf *more_headers,
> +				  const char *line_prefix,
> +				  const char *meta,
> +				  const char *reset)
> +{
> +	char *next, *newline;
> +
> +	for (next = more_headers->buf; *next; next = newline) {
> +		newline = strchrnul(next, '\n');
> +		strbuf_addf(msg, "%s%s%.*s%s\n", line_prefix, meta,
> +			    (int)(newline - next), next, reset);
> +		if (*newline)
> +			newline++;
> +	}
> +}
> +
>  static void builtin_diff(const char *name_a,
>  			 const char *name_b,
>  			 struct diff_filespec *one,
> @@ -3464,6 +3490,17 @@ static void builtin_diff(const char *name_a,
>  	b_two = quote_two(b_prefix, name_b + (*name_b == '/'));
>  	lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
>  	lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
> +	if (!DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two)) {
> +		/*
> +		 * We should only reach this point for pairs from
> +		 * create_filepairs_for_header_only_notifications().  For
> +		 * these, we should avoid the "/dev/null" special casing
> +		 * above, meaning we avoid showing such pairs as either
> +		 * "new file" or "deleted file" below.
> +		 */
> +		lbl[0] = a_one;
> +		lbl[1] = b_two;
> +	}

not so familiar with this logic, but I saw that without this change, the
rename/rename conflict test fails. Is this because we add a file pair under
the original name (that's been renamed on both sides). I wonder if we
can sketch such a case in the comment.

>  	strbuf_addf(&header, "%s%sdiff --git %s %s%s\n", line_prefix, meta, a_one, b_two, reset);
>  	if (lbl[0][0] == '/') {
>  		/* /dev/null */
> @@ -4328,6 +4365,7 @@ static void fill_metainfo(struct strbuf *msg,
>  	const char *set = diff_get_color(use_color, DIFF_METAINFO);
>  	const char *reset = diff_get_color(use_color, DIFF_RESET);
>  	const char *line_prefix = diff_line_prefix(o);
> +	struct strbuf *more_headers = NULL;
>  
>  	*must_show_header = 1;
>  	strbuf_init(msg, PATH_MAX * 2 + 300);
> @@ -4364,6 +4402,11 @@ static void fill_metainfo(struct strbuf *msg,
>  	default:
>  		*must_show_header = 0;
>  	}
> +	if ((more_headers = additional_headers(o, name))) {
> +		add_formatted_headers(msg, more_headers,
> +				      line_prefix, set, reset);
> +		*must_show_header = 1;
> +	}
>  	if (one && two && !oideq(&one->oid, &two->oid)) {
>  		const unsigned hexsz = the_hash_algo->hexsz;
>  		int abbrev = o->abbrev ? o->abbrev : DEFAULT_ABBREV;
> @@ -5852,12 +5895,22 @@ int diff_unmodified_pair(struct diff_filepair *p)
>  
>  static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
>  {
> -	if (diff_unmodified_pair(p))
> +	/*
> +	 * Check if we can return early without showing a diff.  Note that
> +	 * diff_filepair only stores {oid, path, mode, is_valid}
> +	 * information for each path, and thus diff_unmodified_pair() only
> +	 * considers those bits of info.  However, we do not want pairs
> +	 * created by create_filepairs_for_header_only_notifications() to
> +	 * be ignored, so return early if both p is unmodified AND
> +	 * p->one->path is not in additional headers.
> +	 */
> +	if (diff_unmodified_pair(p) && !additional_headers(o, p->one->path))
>  		return;
>  
> +	/* Actually, we can also return early to avoid showing tree diffs */
>  	if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
>  	    (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
> -		return; /* no tree diffs in patch format */
> +		return;
>  
>  	run_diff(p, o);
>  }
> @@ -5888,10 +5941,14 @@ static void diff_flush_checkdiff(struct diff_filepair *p,
>  	run_checkdiff(p, o);
>  }
>  
> -int diff_queue_is_empty(void)
> +int diff_queue_is_empty(struct diff_options *o)
>  {
>  	struct diff_queue_struct *q = &diff_queued_diff;
>  	int i;
> +
> +	if (o->additional_path_headers &&
> +	    !strmap_empty(o->additional_path_headers))
> +		return 0;
>  	for (i = 0; i < q->nr; i++)
>  		if (!diff_unmodified_pair(q->queue[i]))
>  			return 0;
> @@ -6325,6 +6382,54 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc)
>  		warning(_(rename_limit_advice), varname, needed);
>  }
>  
> +static void create_filepairs_for_header_only_notifications(struct diff_options *o)
> +{
> +	struct strset present;
> +	struct diff_queue_struct *q = &diff_queued_diff;
> +	struct hashmap_iter iter;
> +	struct strmap_entry *e;
> +	int i;
> +
> +	strset_init_with_options(&present, /*pool*/ NULL, /*strdup*/ 0);
> +
> +	/*
> +	 * Find out which paths exist in diff_queued_diff, preferring
> +	 * one->path for any pair that has multiple paths.

Why do we prefer one->path?

> +	 */
> +	for (i = 0; i < q->nr; i++) {
> +		struct diff_filepair *p = q->queue[i];
> +		char *path = p->one->path ? p->one->path : p->two->path;
> +
> +		if (strmap_contains(o->additional_path_headers, path))
> +			strset_add(&present, path);
> +	}
> +
> +	/*
> +	 * Loop over paths in additional_path_headers; for each NOT already
> +	 * in diff_queued_diff, create a synthetic filepair and insert that
> +	 * into diff_queued_diff.
> +	 */
> +	strmap_for_each_entry(o->additional_path_headers, &iter, e) {
> +		if (!strset_contains(&present, e->key)) {
> +			struct diff_filespec *one, *two;
> +			struct diff_filepair *p;
> +
> +			one = alloc_filespec(e->key);
> +			two = alloc_filespec(e->key);
> +			fill_filespec(one, null_oid(), 0, 0);
> +			fill_filespec(two, null_oid(), 0, 0);
> +			p = diff_queue(q, one, two);
> +			p->status = DIFF_STATUS_MODIFIED;
> +		}
> +	}

All these string hash-maps are not really typical for a C program. I'm sure
they are the best choice for an advanced merge algorithm but they are not
really necessary for computing/printing a diff. It feels like this is an
implementation detail from merge-ort that's leaking into other components.

What we want to do is

	for file_pair in additional_headers:
		if not already_queued(file_pair):
			queue(file_pair)

to do that, you use a temporary has-set ("present") that records everything
that's already queued (already_queued() is a lookup in that set).

Let's assume both the queue and additional_headers are sorted arrays.
Then we could efficiently merge them (like a merge-sort algorithm)
without ever allocating a temporary hash map.

I haven't checked if this is practical (better wait for feedback).
We'd probably need to convert the strmap additional_path_headers into an
array and sort it (I guess our hash map does not guarantee any ordering?)

> +
> +	/* Re-sort the filepairs */
> +	diffcore_fix_diff_index();
> +
> +	/* Cleanup */
> +	strset_clear(&present);

Not a strong opinion, but I'd probably drop this comment

> +}
> +
>  static void diff_flush_patch_all_file_pairs(struct diff_options *o)
>  {
>  	int i;
> @@ -6337,6 +6442,9 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
>  	if (o->color_moved)
>  		o->emitted_symbols = &esm;
>  
> +	if (o->additional_path_headers)
> +		create_filepairs_for_header_only_notifications(o);
> +
>  	for (i = 0; i < q->nr; i++) {
>  		struct diff_filepair *p = q->queue[i];
>  		if (check_pair_status(p))
> @@ -6413,7 +6521,7 @@ void diff_flush(struct diff_options *options)
>  	 * Order: raw, stat, summary, patch
>  	 * or:    name/name-status/checkdiff (other bits clear)
>  	 */
> -	if (!q->nr)
> +	if (!q->nr && !options->additional_path_headers)
>  		goto free_queue;
>  
>  	if (output_format & (DIFF_FORMAT_RAW |
> diff --git a/diff.h b/diff.h
> index 8ba85c5e605..06a0a67afda 100644
> --- a/diff.h
> +++ b/diff.h
> @@ -395,6 +395,7 @@ struct diff_options {
>  
>  	struct repository *repo;
>  	struct option *parseopts;
> +	struct strmap *additional_path_headers;
>  
>  	int no_free;
>  };
> @@ -593,7 +594,7 @@ void diffcore_fix_diff_index(void);
>  "                show all files diff when -S is used and hit is found.\n" \
>  "  -a  --text    treat all files as text.\n"
>  
> -int diff_queue_is_empty(void);
> +int diff_queue_is_empty(struct diff_options*);
>  void diff_flush(struct diff_options*);
>  void diff_free(struct diff_options*);
>  void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc);
> diff --git a/log-tree.c b/log-tree.c
> index d4655b63d75..33c28f537a6 100644
> --- a/log-tree.c
> +++ b/log-tree.c
> @@ -850,7 +850,7 @@ int log_tree_diff_flush(struct rev_info *opt)
>  	opt->shown_dashes = 0;
>  	diffcore_std(&opt->diffopt);
>  
> -	if (diff_queue_is_empty()) {
> +	if (diff_queue_is_empty(&opt->diffopt)) {
>  		int saved_fmt = opt->diffopt.output_format;
>  		opt->diffopt.output_format = DIFF_FORMAT_NO_OUTPUT;
>  		diff_flush(&opt->diffopt);
> -- 
> gitgitgadget
> 



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux