I expected there to be one main function that takes in a diff options and returns the appropriate output without much (if any) changes in other functions...but (as with the previous patch) maybe there are some complications that I didn't foresee. On Sat, May 13, 2017 at 9:01 PM, Stefan Beller <sbeller@xxxxxxxxxx> wrote: > diff --git a/Documentation/config.txt b/Documentation/config.txt > index 475e874d51..90403c06e3 100644 > --- a/Documentation/config.txt > +++ b/Documentation/config.txt > @@ -1051,14 +1051,22 @@ This does not affect linkgit:git-format-patch[1] or the > 'git-diff-{asterisk}' plumbing commands. Can be overridden on the > command line with the `--color[=<when>]` option. > > +color.moved:: > + A boolean value, whether a diff should color moved lines > + differently. The moved lines are searched for in the diff only. > + Duplicated lines from somewhere in the project that are not > + part of the diff are not colored as moved. > + Defaults to false. > + > color.diff.<slot>:: > Use customized color for diff colorization. `<slot>` specifies > which part of the patch to use the specified color, and is one > of `context` (context text - `plain` is a historical synonym), > `meta` (metainformation), `frag` > (hunk header), 'func' (function in hunk header), `old` (removed lines), > - `new` (added lines), `commit` (commit headers), or `whitespace` > - (highlighting whitespace errors). > + `new` (added lines), `commit` (commit headers), `whitespace` > + (highlighting whitespace errors), `movedFrom` (removed lines that > + reappear), `movedTo` (added lines that were removed elsewhere). There should be 4 "moved" colors. I think the code below is correct (oldmoved, oldmovedalternate, etc.) but the documentation above is wrong. > > color.decorate.<slot>:: > Use customized color for 'git log --decorate' output. `<slot>` is one > diff --git a/diff.c b/diff.c > index dbab7fb44e..6372e0eb25 100644 > --- a/diff.c > +++ b/diff.c > @@ -31,6 +31,7 @@ static int diff_indent_heuristic; /* experimental */ > static int diff_rename_limit_default = 400; > static int diff_suppress_blank_empty; > static int diff_use_color_default = -1; > +static int diff_color_moved_default; > static int diff_context_default = 3; > static int diff_interhunk_context_default; > static const char *diff_word_regex_cfg; > @@ -55,6 +56,10 @@ static char diff_colors[][COLOR_MAXLEN] = { > GIT_COLOR_YELLOW, /* COMMIT */ > GIT_COLOR_BG_RED, /* WHITESPACE */ > GIT_COLOR_NORMAL, /* FUNCINFO */ > + GIT_COLOR_BOLD_RED, /* OLD_MOVED_A */ > + GIT_COLOR_BG_RED, /* OLD_MOVED_B */ > + GIT_COLOR_BOLD_GREEN, /* NEW_MOVED_A */ > + GIT_COLOR_BG_GREEN, /* NEW_MOVED_B */ > }; > > static NORETURN void die_want_option(const char *option_name) > @@ -80,6 +85,14 @@ static int parse_diff_color_slot(const char *var) > return DIFF_WHITESPACE; > if (!strcasecmp(var, "func")) > return DIFF_FUNCINFO; > + if (!strcasecmp(var, "oldmoved")) > + return DIFF_FILE_OLD_MOVED; > + if (!strcasecmp(var, "oldmovedalternative")) > + return DIFF_FILE_OLD_MOVED_ALT; > + if (!strcasecmp(var, "newmoved")) > + return DIFF_FILE_NEW_MOVED; > + if (!strcasecmp(var, "newmovedalternative")) > + return DIFF_FILE_NEW_MOVED_ALT; > return -1; > } > > @@ -234,6 +247,10 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) > diff_use_color_default = git_config_colorbool(var, value); > return 0; > } > + if (!strcmp(var, "color.moved")) { > + diff_color_moved_default = git_config_bool(var, value); > + return 0; > + } > if (!strcmp(var, "diff.context")) { > diff_context_default = git_config_int(var, value); > if (diff_context_default < 0) > @@ -354,6 +371,81 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) > return git_default_config(var, value, cb); > } > > +struct moved_entry { > + struct hashmap_entry ent; > + const struct buffered_patch_line *line; > + struct moved_entry *next_line; > +}; > + > +static void get_ws_cleaned_string(const struct buffered_patch_line *l, > + struct strbuf *out) > +{ > + int i; > + for (i = 0; i < l->len; i++) { > + if (isspace(l->line[i])) > + continue; > + strbuf_addch(out, l->line[i]); > + } > +} > + > +static int buffered_patch_line_cmp_no_ws(const struct buffered_patch_line *a, > + const struct buffered_patch_line *b, > + const void *keydata) > +{ > + struct strbuf sba = STRBUF_INIT; > + struct strbuf sbb = STRBUF_INIT; > + get_ws_cleaned_string(a, &sba); > + get_ws_cleaned_string(b, &sbb); > + return sba.len != sbb.len || strncmp(sba.buf, sbb.buf, sba.len); > +} > + > +static int buffered_patch_line_cmp(const struct buffered_patch_line *a, > + const struct buffered_patch_line *b, > + const void *keydata) > +{ > + return a->len != b->len || strncmp(a->line, b->line, a->len); > +} > + > +static int moved_entry_cmp(const struct moved_entry *a, > + const struct moved_entry *b, > + const void *keydata) > +{ > + return buffered_patch_line_cmp(a->line, b->line, keydata); > +} > + > +static int moved_entry_cmp_no_ws(const struct moved_entry *a, > + const struct moved_entry *b, > + const void *keydata) > +{ > + return buffered_patch_line_cmp_no_ws(a->line, b->line, keydata); > +} > + > +static unsigned get_line_hash(struct buffered_patch_line *line, unsigned ignore_ws) > +{ > + static struct strbuf sb = STRBUF_INIT; > + > + if (ignore_ws) { > + strbuf_reset(&sb); > + get_ws_cleaned_string(line, &sb); > + return memhash(sb.buf, sb.len); > + } else > + return memhash(line->line, line->len); > +} > + > +static struct moved_entry *prepare_entry(struct diff_options *o, > + int line_no) > +{ > + struct moved_entry *ret = xmalloc(sizeof(*ret)); > + unsigned ignore_ws = DIFF_XDL_TST(o, IGNORE_WHITESPACE); > + struct buffered_patch_line *l = &o->line_buffer[line_no]; > + > + ret->ent.hash = get_line_hash(l, ignore_ws); > + ret->line = l; > + ret->next_line = NULL; > + > + return ret; > +} > + > static char *quote_two(const char *one, const char *two) > { > int need_one = quote_c_style(one, NULL, NULL, 1); > @@ -516,8 +608,98 @@ static void check_blank_at_eof(mmfile_t *mf1, mmfile_t *mf2, > ecbdata->blank_at_eof_in_postimage = (at - l2) + 1; > } > > +static void mark_color_as_moved(struct diff_options *o, int line_no) > +{ > + struct hashmap *hm = NULL; > + struct moved_entry *key = prepare_entry(o, line_no); > + struct moved_entry *match = NULL; > + struct buffered_patch_line *l = &o->line_buffer[line_no]; > + int alt_flag; > + int i, lp, rp; > + > + switch (l->sign) { > + case '+': > + hm = o->deleted_lines; > + break; > + case '-': > + hm = o->added_lines; > + break; > + default: > + /* reset to standard, on-alt move color */ > + o->color_moved = 1; > + break; > + } > + > + /* Check for any match to color it as a move. */ > + if (!hm) > + return; > + match = hashmap_get(hm, key, o); > + free(key); > + if (!match) > + return; > + > + /* Check any potential block runs, advance each or nullify */ > + for (i = 0; i < o->pmb_nr; i++) { > + struct moved_entry *p = o->pmb[i]; > + if (p && p->next_line && > + !buffered_patch_line_cmp(p->next_line->line, l, o)) { > + o->pmb[i] = p->next_line; > + } else { > + o->pmb[i] = NULL; > + } > + } > + > + /* Shrink the set to the remaining runs */ > + for (lp = 0, rp = o->pmb_nr - 1; lp <= rp;) { > + while (lp < o->pmb_nr && o->pmb[lp]) > + lp ++; > + /* lp points at the first NULL now */ > + > + while (rp > -1 && !o->pmb[rp]) > + rp--; > + /* rp points at the last non-NULL */ > + > + if (lp < o->pmb_nr && rp > -1 && lp < rp) { > + o->pmb[lp] = o->pmb[rp]; > + o->pmb[rp] = NULL; > + rp--; > + lp++; > + } > + } > + > + if (rp > -1) { > + /* Remember the number of running sets */ > + o->pmb_nr = rp + 1; > + } else { > + /* Toggle color */ > + o->color_moved = o->color_moved == 2 ? 1 : 2; > + > + /* Build up a new set */ > + i = 0; > + for (; match; match = hashmap_get_next(hm, match)) { > + ALLOC_GROW(o->pmb, i + 1, o->pmb_alloc); > + o->pmb[i] = match; > + i++; > + } > + o->pmb_nr = i; > + } > + > + alt_flag = o->color_moved - 1; > + switch (l->sign) { > + case '+': > + l->set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED + alt_flag); > + break; > + case '-': > + l->set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED + alt_flag); > + break; > + default: > + ; /* nothing */ > + } > +} > + > static void emit_buffered_patch_line(struct diff_options *o, > - struct buffered_patch_line *e) > + struct buffered_patch_line *e, > + int pass) 1. I didn't expect such a function to need to be modified in this patch. 2. What does "pass" do? > { > int has_trailing_newline, has_trailing_carriage_return, len = e->len; > FILE *file = o->file; > @@ -548,11 +730,11 @@ static void emit_buffered_patch_line(struct diff_options *o, > > static void emit_buffered_patch_line_ws(struct diff_options *o, > struct buffered_patch_line *e, > - const char *ws, unsigned ws_rule) > + const char *ws, unsigned ws_rule, > + int pass) > { > struct buffered_patch_line s = {e->set, e->reset, "", 0, e->sign}; > - > - emit_buffered_patch_line(o, &s); > + emit_buffered_patch_line(o, &s, 0); > ws_check_emit(e->line, e->len, ws_rule, > o->file, e->set, e->reset, ws); > } > @@ -564,12 +746,14 @@ static void process_next_buffered_patch_line(struct diff_options *o, int line_no > const char *ws = o->current_filepair->ws; > unsigned ws_rule = o->current_filepair->ws_rule; > > + mark_color_as_moved(o, line_no); > + > switch (e->state) { > case BPL_EMIT_LINE_ASIS: > - emit_buffered_patch_line(o, e); > + emit_buffered_patch_line(o, e, 1); > break; > case BPL_EMIT_LINE_WS: > - emit_buffered_patch_line_ws(o, e, ws, ws_rule); > + emit_buffered_patch_line_ws(o, e, ws, ws_rule, 1); > break; > case BPL_HANDOVER: > o->current_filepair++; > @@ -602,7 +786,7 @@ static void emit_line_0(struct diff_options *o, > if (o->use_buffer) > append_buffered_patch_line(o, &e); > else > - emit_buffered_patch_line(o, &e); > + emit_buffered_patch_line(o, &e, 0); > } > > void emit_line(struct diff_options *o, const char *set, const char *reset, > @@ -621,7 +805,7 @@ static void emit_line_ws(struct diff_options *o, > if (o->use_buffer) > append_buffered_patch_line(o, &e); > else > - emit_buffered_patch_line_ws(o, &e, ws, ws_rule); > + emit_buffered_patch_line_ws(o, &e, ws, ws_rule, 0); > } > > void emit_line_fmt(struct diff_options *o, > @@ -676,6 +860,36 @@ static void emit_line_checked(const char *reset, > ws, ecbdata->ws_rule); > } > > +static void add_line_to_move_detection(struct diff_options *o, int line_idx) > +{ > + int sign = 0; > + struct hashmap *hm; > + struct moved_entry *key; > + > + switch (o->line_buffer[line_idx].sign) { > + case '+': > + sign = '+'; > + hm = o->added_lines; > + break; > + case '-': > + sign = '-'; > + hm = o->deleted_lines; > + break; > + case ' ': > + default: > + o->prev_line = NULL; > + return; > + } > + > + key = prepare_entry(o, line_idx); > + if (o->prev_line && > + o->prev_line->line->sign == sign) > + o->prev_line->next_line = key; > + > + hashmap_add(hm, key); > + o->prev_line = key; > +} > + > static void emit_add_line(const char *reset, > struct emit_callback *ecbdata, > const char *line, int len) > @@ -3649,6 +3863,9 @@ void diff_setup_done(struct diff_options *options) > > if (DIFF_OPT_TST(options, FOLLOW_RENAMES) && options->pathspec.nr != 1) > die(_("--follow requires exactly one pathspec")); > + > + if (!options->use_color || external_diff()) > + options->color_moved = 0; > } > > static int opt_arg(const char *arg, int arg_short, const char *arg_long, int *val) > @@ -4073,6 +4290,10 @@ int diff_opt_parse(struct diff_options *options, > } > else if (!strcmp(arg, "--no-color")) > options->use_color = 0; > + else if (!strcmp(arg, "--color-moved")) > + options->color_moved = 1; > + else if (!strcmp(arg, "--no-color-moved")) > + options->color_moved = 0; > else if (!strcmp(arg, "--color-words")) { > options->use_color = 1; > options->word_diff = DIFF_WORDS_COLOR; > @@ -4878,16 +5099,19 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) > { > int i; > struct diff_queue_struct *q = &diff_queued_diff; > - /* > - * For testing purposes we want to make sure the diff machinery > - * works completely with the buffer. If there is anything emitted > - * outside the emit_buffered_patch_line, then the order is screwed > - * up and the tests will fail. > - * > - * TODO (later in this series): > - * We'll unset this flag in a later patch. > - */ > - o->use_buffer = 1; > + > + if (o->color_moved) { > + unsigned ignore_ws = DIFF_XDL_TST(o, IGNORE_WHITESPACE); > + o->use_buffer = 1; > + o->deleted_lines = xmallocz(sizeof(*o->deleted_lines)); > + o->added_lines = xmallocz(sizeof(*o->added_lines)); > + hashmap_init(o->deleted_lines, ignore_ws ? > + (hashmap_cmp_fn)moved_entry_cmp_no_ws : > + (hashmap_cmp_fn)moved_entry_cmp, 0); > + hashmap_init(o->added_lines, ignore_ws ? > + (hashmap_cmp_fn)moved_entry_cmp_no_ws : > + (hashmap_cmp_fn)moved_entry_cmp, 0); > + } > > if (o->use_buffer) { > ALLOC_GROW(o->filepair_buffer, > @@ -4902,6 +5126,10 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) > } > > if (o->use_buffer) { > + o->current_filepair = &o->filepair_buffer[0]; > + for (i = 0; i < o->line_buffer_nr; i++) > + add_line_to_move_detection(o, i); > + > o->current_filepair = &o->filepair_buffer[0]; > for (i = 0; i < o->line_buffer_nr; i++) > process_next_buffered_patch_line(o, i); > @@ -4992,6 +5220,7 @@ void diff_flush(struct diff_options *options) > if (!options->file) > die_errno("Could not open /dev/null"); > options->close_file = 1; > + options->color_moved = 0; > for (i = 0; i < q->nr; i++) { > struct diff_filepair *p = q->queue[i]; > if (check_pair_status(p)) > diff --git a/diff.h b/diff.h > index c334aac02e..b83d6fefcc 100644 > --- a/diff.h > +++ b/diff.h > @@ -7,6 +7,7 @@ > #include "tree-walk.h" > #include "pathspec.h" > #include "object.h" > +#include "hashmap.h" > > struct rev_info; > struct diff_options; > @@ -145,6 +146,8 @@ struct buffered_filepair { > unsigned ws_rule; > }; > > +struct moved_entry; > + > struct diff_options { > const char *orderfile; > const char *pickaxe; > @@ -217,6 +220,8 @@ struct diff_options { > > int diff_path_counter; > > + /* Determines color moved code. Flipped between 1, 2 for alt. color. */ > + int color_moved; > int use_buffer; > > struct buffered_patch_line *line_buffer; > @@ -225,6 +230,16 @@ struct diff_options { > struct buffered_filepair *filepair_buffer; > int filepair_buffer_nr, filepair_buffer_alloc; > struct buffered_filepair *current_filepair; > + > + /* built up in the first pass: */ > + struct hashmap *deleted_lines; > + struct hashmap *added_lines; > + /* needed for building up */ > + struct moved_entry *prev_line; > + > + /* state in the second pass */ > + struct moved_entry **pmb; /* potentially moved blocks */ > + int pmb_nr, pmb_alloc; Placing these in the public API makes the scope unnecessarily large - could these be stored in a struct (or better, in local variables) private to the .c file? In particular, prev_line should not need a scope greater than a function - a function could just loop through all the buffered lines and construct the two hash maps, and prev_line would not be needed elsewhere. > }; > > void emit_line_fmt(struct diff_options *o, const char *set, const char *reset, > @@ -241,7 +256,11 @@ enum color_diff { > DIFF_FILE_NEW = 5, > DIFF_COMMIT = 6, > DIFF_WHITESPACE = 7, > - DIFF_FUNCINFO = 8 > + DIFF_FUNCINFO = 8, > + DIFF_FILE_OLD_MOVED = 9, > + DIFF_FILE_OLD_MOVED_ALT = 10, > + DIFF_FILE_NEW_MOVED = 11, > + DIFF_FILE_NEW_MOVED_ALT = 12 > }; > const char *diff_get_color(int diff_use_color, enum color_diff ix); > #define diff_get_color_opt(o, ix) \ > diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh > index 289806d0c7..232d9ad55e 100755 > --- a/t/t4015-diff-whitespace.sh > +++ b/t/t4015-diff-whitespace.sh [snip]