Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for some of the languages that already had funcname patterns. (They are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@xxxxxxxxxxxxxxx> --- Documentation/diff-options.txt | 4 +++- Documentation/gitattributes.txt | 21 +++++++++++++++++++++ diff.c | 10 ++++++++++ userdiff.c | 27 +++++++++++++++++++-------- userdiff.h | 1 + 5 files changed, 54 insertions(+), 9 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 6152d5b..d22c06b 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -96,7 +96,9 @@ endif::git-format-patch[] By default, a new word only starts at whitespace, so that a 'word' is defined as a maximal sequence of non-whitespace characters. The optional argument <regex> can be used to - configure this. + configure this. It can also be set via a diff driver, see + linkgit:gitattributes[1]; if a <regex> is given explicitly, it + overrides any diff driver setting. + The <regex> must be an (extended) regular expression. When set, every non-overlapping match of the <regex> is considered a word. (Regular diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 8af22ec..67f5522 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -334,6 +334,27 @@ patterns are available: - `tex` suitable for source code for LaTeX documents. +Customizing word diff +^^^^^^^^^^^^^^^^^^^^^ + +You can customize the rules that `git diff --color-words` uses to +split words in a line, by specifying an appropriate regular expression +in the "diff.*.wordregex" configuration variable. For example, in TeX +a backslash followed by a sequence of letters forms a command, but +several such commands can be run together without intervening +whitespace. To separate them, use a regular expression such as + +------------------------ +[diff "tex"] + wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{} \t]+" +------------------------ + +Similar to 'xfuncname', a built in value is provided for the drivers +`bibtex`, `html`, `java`, `php`, `python` and `tex`. See the +documentation of --color-words in linkgit:git-diff[1] for the precise +semantics. + + Performing text diffs of binary files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/diff.c b/diff.c index badaea6..c1cc426 100644 --- a/diff.c +++ b/diff.c @@ -1548,6 +1548,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } +static const char *userdiff_word_regex(struct diff_filespec *one) +{ + diff_filespec_load_driver(one); + return one->driver->word_regex; +} + void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) { if (!options->a_prefix) @@ -1708,6 +1714,10 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); if (o->word_regex) { ecbdata.diff_words->word_regex = (regex_t *) xmalloc(sizeof(regex_t)); diff --git a/userdiff.c b/userdiff.c index 3681062..7fd9a07 100644 --- a/userdiff.c +++ b/userdiff.c @@ -6,13 +6,17 @@ static struct userdiff_driver *drivers; static int ndrivers; static int drivers_alloc; -#define FUNCNAME(name, pattern) \ +#define FUNCNAME(name, pattern) \ { name, NULL, -1, { pattern, REG_EXTENDED } } +#define PATTERNS(name, pattern, wordregex) \ + { name, NULL, -1, { pattern, REG_EXTENDED }, NULL, wordregex } static struct userdiff_driver builtin_drivers[] = { -FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), -FUNCNAME("java", +PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", + "[^<>= \t]+|\\S"), +PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", + "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|\\S|[\x80-\xff]+"), FUNCNAME("objc", /* Negate C statements that can look like functions */ "!^[ \t]*(do|for|if|else|return|switch|while)\n" @@ -27,14 +31,19 @@ FUNCNAME("pascal", "implementation|initialization|finalization)[ \t]*.*)$" "\n" "^(.*=[ \t]*(class|record).*)$"), -FUNCNAME("php", "^[\t ]*((function|class).*)"), -FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), +PATTERNS("php", "^[\t ]*((function|class).*)", + "\\$?[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|->|\\S|[\x80-\xff]+"), +PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", + "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|//|\\S|[\x80-\xff]+"), FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), -FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), -FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + "[={}\"]|[^={}\" \t]+"), +PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", + "\\\\[a-zA-Z@]+|[{}]|\\\\.|[^\\{} \t]+"), { "default", NULL, -1, { NULL, 0 } }, }; #undef FUNCNAME +#undef PATTERNS static struct userdiff_driver driver_true = { "diff=true", @@ -134,6 +143,8 @@ int userdiff_config(const char *k, const char *v) return parse_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) return parse_string(&drv->textconv, k, v); + if ((drv = parse_driver(k, v, "wordregex"))) + return parse_string(&drv->word_regex, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index ba29457..2aab13e 100644 --- a/userdiff.h +++ b/userdiff.h @@ -12,6 +12,7 @@ struct userdiff_driver { int binary; struct userdiff_funcname funcname; const char *textconv; + const char *word_regex; }; int userdiff_config(const char *k, const char *v); -- 1.6.1.269.g0769 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html