On Sat, Dec 25, 2021 at 07:59:18AM +0000, Elijah Newren via GitGitGadget wrote: > From: Elijah Newren <newren@xxxxxxxxx> > > When additional headers are provided, we need to > * add diff_filepairs to diff_queued_diff for each paths in the > additional headers map which, unless that path is part of > another diff_filepair already found in diff_queued_diff > * format the headers (colorization, line_prefix for --graph) > * make sure the various codepaths that attempt to return early > if there are "no changes" take into account the headers that > need to be shown. > > Signed-off-by: Elijah Newren <newren@xxxxxxxxx> > --- > diff.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > diff.h | 3 +- > log-tree.c | 2 +- > 3 files changed, 115 insertions(+), 6 deletions(-) > > diff --git a/diff.c b/diff.c > index 861282db1c3..aaa6a19f158 100644 > --- a/diff.c > +++ b/diff.c > @@ -27,6 +27,7 @@ > #include "help.h" > #include "promisor-remote.h" > #include "dir.h" > +#include "strmap.h" > > #ifdef NO_FAST_WORKING_DIRECTORY > #define FAST_WORKING_DIRECTORY 0 > @@ -3406,6 +3407,31 @@ struct userdiff_driver *get_textconv(struct repository *r, > return userdiff_get_textconv(r, one->driver); > } > > +static struct strbuf *additional_headers(struct diff_options *o, > + const char *path) > +{ > + if (!o->additional_path_headers) > + return NULL; > + return strmap_get(o->additional_path_headers, path); > +} > + > +static void add_formatted_headers(struct strbuf *msg, > + struct strbuf *more_headers, > + const char *line_prefix, > + const char *meta, > + const char *reset) > +{ > + char *next, *newline; > + > + for (next = more_headers->buf; *next; next = newline) { > + newline = strchrnul(next, '\n'); > + strbuf_addf(msg, "%s%s%.*s%s\n", line_prefix, meta, > + (int)(newline - next), next, reset); > + if (*newline) > + newline++; > + } > +} > + > static void builtin_diff(const char *name_a, > const char *name_b, > struct diff_filespec *one, > @@ -3464,6 +3490,17 @@ static void builtin_diff(const char *name_a, > b_two = quote_two(b_prefix, name_b + (*name_b == '/')); > lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; > lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; > + if (!DIFF_FILE_VALID(one) && !DIFF_FILE_VALID(two)) { > + /* > + * We should only reach this point for pairs from > + * create_filepairs_for_header_only_notifications(). For > + * these, we should avoid the "/dev/null" special casing > + * above, meaning we avoid showing such pairs as either > + * "new file" or "deleted file" below. > + */ > + lbl[0] = a_one; > + lbl[1] = b_two; > + } not so familiar with this logic, but I saw that without this change, the rename/rename conflict test fails. Is this because we add a file pair under the original name (that's been renamed on both sides). I wonder if we can sketch such a case in the comment. > strbuf_addf(&header, "%s%sdiff --git %s %s%s\n", line_prefix, meta, a_one, b_two, reset); > if (lbl[0][0] == '/') { > /* /dev/null */ > @@ -4328,6 +4365,7 @@ static void fill_metainfo(struct strbuf *msg, > const char *set = diff_get_color(use_color, DIFF_METAINFO); > const char *reset = diff_get_color(use_color, DIFF_RESET); > const char *line_prefix = diff_line_prefix(o); > + struct strbuf *more_headers = NULL; > > *must_show_header = 1; > strbuf_init(msg, PATH_MAX * 2 + 300); > @@ -4364,6 +4402,11 @@ static void fill_metainfo(struct strbuf *msg, > default: > *must_show_header = 0; > } > + if ((more_headers = additional_headers(o, name))) { > + add_formatted_headers(msg, more_headers, > + line_prefix, set, reset); > + *must_show_header = 1; > + } > if (one && two && !oideq(&one->oid, &two->oid)) { > const unsigned hexsz = the_hash_algo->hexsz; > int abbrev = o->abbrev ? o->abbrev : DEFAULT_ABBREV; > @@ -5852,12 +5895,22 @@ int diff_unmodified_pair(struct diff_filepair *p) > > static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o) > { > - if (diff_unmodified_pair(p)) > + /* > + * Check if we can return early without showing a diff. Note that > + * diff_filepair only stores {oid, path, mode, is_valid} > + * information for each path, and thus diff_unmodified_pair() only > + * considers those bits of info. However, we do not want pairs > + * created by create_filepairs_for_header_only_notifications() to > + * be ignored, so return early if both p is unmodified AND > + * p->one->path is not in additional headers. > + */ > + if (diff_unmodified_pair(p) && !additional_headers(o, p->one->path)) > return; > > + /* Actually, we can also return early to avoid showing tree diffs */ > if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || > (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) > - return; /* no tree diffs in patch format */ > + return; > > run_diff(p, o); > } > @@ -5888,10 +5941,14 @@ static void diff_flush_checkdiff(struct diff_filepair *p, > run_checkdiff(p, o); > } > > -int diff_queue_is_empty(void) > +int diff_queue_is_empty(struct diff_options *o) > { > struct diff_queue_struct *q = &diff_queued_diff; > int i; > + > + if (o->additional_path_headers && > + !strmap_empty(o->additional_path_headers)) > + return 0; > for (i = 0; i < q->nr; i++) > if (!diff_unmodified_pair(q->queue[i])) > return 0; > @@ -6325,6 +6382,54 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc) > warning(_(rename_limit_advice), varname, needed); > } > > +static void create_filepairs_for_header_only_notifications(struct diff_options *o) > +{ > + struct strset present; > + struct diff_queue_struct *q = &diff_queued_diff; > + struct hashmap_iter iter; > + struct strmap_entry *e; > + int i; > + > + strset_init_with_options(&present, /*pool*/ NULL, /*strdup*/ 0); > + > + /* > + * Find out which paths exist in diff_queued_diff, preferring > + * one->path for any pair that has multiple paths. Why do we prefer one->path? > + */ > + for (i = 0; i < q->nr; i++) { > + struct diff_filepair *p = q->queue[i]; > + char *path = p->one->path ? p->one->path : p->two->path; > + > + if (strmap_contains(o->additional_path_headers, path)) > + strset_add(&present, path); > + } > + > + /* > + * Loop over paths in additional_path_headers; for each NOT already > + * in diff_queued_diff, create a synthetic filepair and insert that > + * into diff_queued_diff. > + */ > + strmap_for_each_entry(o->additional_path_headers, &iter, e) { > + if (!strset_contains(&present, e->key)) { > + struct diff_filespec *one, *two; > + struct diff_filepair *p; > + > + one = alloc_filespec(e->key); > + two = alloc_filespec(e->key); > + fill_filespec(one, null_oid(), 0, 0); > + fill_filespec(two, null_oid(), 0, 0); > + p = diff_queue(q, one, two); > + p->status = DIFF_STATUS_MODIFIED; > + } > + } All these string hash-maps are not really typical for a C program. I'm sure they are the best choice for an advanced merge algorithm but they are not really necessary for computing/printing a diff. It feels like this is an implementation detail from merge-ort that's leaking into other components. What we want to do is for file_pair in additional_headers: if not already_queued(file_pair): queue(file_pair) to do that, you use a temporary has-set ("present") that records everything that's already queued (already_queued() is a lookup in that set). Let's assume both the queue and additional_headers are sorted arrays. Then we could efficiently merge them (like a merge-sort algorithm) without ever allocating a temporary hash map. I haven't checked if this is practical (better wait for feedback). We'd probably need to convert the strmap additional_path_headers into an array and sort it (I guess our hash map does not guarantee any ordering?) > + > + /* Re-sort the filepairs */ > + diffcore_fix_diff_index(); > + > + /* Cleanup */ > + strset_clear(&present); Not a strong opinion, but I'd probably drop this comment > +} > + > static void diff_flush_patch_all_file_pairs(struct diff_options *o) > { > int i; > @@ -6337,6 +6442,9 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) > if (o->color_moved) > o->emitted_symbols = &esm; > > + if (o->additional_path_headers) > + create_filepairs_for_header_only_notifications(o); > + > for (i = 0; i < q->nr; i++) { > struct diff_filepair *p = q->queue[i]; > if (check_pair_status(p)) > @@ -6413,7 +6521,7 @@ void diff_flush(struct diff_options *options) > * Order: raw, stat, summary, patch > * or: name/name-status/checkdiff (other bits clear) > */ > - if (!q->nr) > + if (!q->nr && !options->additional_path_headers) > goto free_queue; > > if (output_format & (DIFF_FORMAT_RAW | > diff --git a/diff.h b/diff.h > index 8ba85c5e605..06a0a67afda 100644 > --- a/diff.h > +++ b/diff.h > @@ -395,6 +395,7 @@ struct diff_options { > > struct repository *repo; > struct option *parseopts; > + struct strmap *additional_path_headers; > > int no_free; > }; > @@ -593,7 +594,7 @@ void diffcore_fix_diff_index(void); > " show all files diff when -S is used and hit is found.\n" \ > " -a --text treat all files as text.\n" > > -int diff_queue_is_empty(void); > +int diff_queue_is_empty(struct diff_options*); > void diff_flush(struct diff_options*); > void diff_free(struct diff_options*); > void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc); > diff --git a/log-tree.c b/log-tree.c > index d4655b63d75..33c28f537a6 100644 > --- a/log-tree.c > +++ b/log-tree.c > @@ -850,7 +850,7 @@ int log_tree_diff_flush(struct rev_info *opt) > opt->shown_dashes = 0; > diffcore_std(&opt->diffopt); > > - if (diff_queue_is_empty()) { > + if (diff_queue_is_empty(&opt->diffopt)) { > int saved_fmt = opt->diffopt.output_format; > opt->diffopt.output_format = DIFF_FORMAT_NO_OUTPUT; > diff_flush(&opt->diffopt); > -- > gitgitgadget >