[PATCH v3 3/4] word diff: make regex configurable via attributes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Make the --color-words splitting regular expression configurable via
the diff driver's 'wordregex' attribute.  The user can then set the
driver on a file in .gitattributes.  If a regex is given on the
command line, it overrides the driver's setting.

We also provide built-in regexes for some of the languages that
already had funcname patterns.  (They are designed to run UTF-8
sequences into a single chunk to make sure they remain readable.)

Signed-off-by: Thomas Rast <trast@xxxxxxxxxxxxxxx>
---
 Documentation/diff-options.txt  |    4 +++-
 Documentation/gitattributes.txt |   21 +++++++++++++++++++++
 diff.c                          |   10 ++++++++++
 userdiff.c                      |   27 +++++++++++++++++++--------
 userdiff.h                      |    1 +
 5 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 6152d5b..d22c06b 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -96,7 +96,9 @@ endif::git-format-patch[]
 	By default, a new word only starts at whitespace, so that a
 	'word' is defined as a maximal sequence of non-whitespace
 	characters.  The optional argument <regex> can be used to
-	configure this.
+	configure this.  It can also be set via a diff driver, see
+	linkgit:gitattributes[1]; if a <regex> is given explicitly, it
+	overrides any diff driver setting.
 +
 The <regex> must be an (extended) regular expression.  When set, every
 non-overlapping match of the <regex> is considered a word.  (Regular
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 8af22ec..67f5522 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -334,6 +334,27 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Customizing word diff
+^^^^^^^^^^^^^^^^^^^^^
+
+You can customize the rules that `git diff --color-words` uses to
+split words in a line, by specifying an appropriate regular expression
+in the "diff.*.wordregex" configuration variable.  For example, in TeX
+a backslash followed by a sequence of letters forms a command, but
+several such commands can be run together without intervening
+whitespace.  To separate them, use a regular expression such as
+
+------------------------
+[diff "tex"]
+	wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{} \t]+"
+------------------------
+
+Similar to 'xfuncname', a built in value is provided for the drivers
+`bibtex`, `html`, `java`, `php`, `python` and `tex`.  See the
+documentation of --color-words in linkgit:git-diff[1] for the precise
+semantics.
+
+
 Performing text diffs of binary files
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/diff.c b/diff.c
index badaea6..c1cc426 100644
--- a/diff.c
+++ b/diff.c
@@ -1548,6 +1548,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
 	return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
 }
 
+static const char *userdiff_word_regex(struct diff_filespec *one)
+{
+	diff_filespec_load_driver(one);
+	return one->driver->word_regex;
+}
+
 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
 {
 	if (!options->a_prefix)
@@ -1708,6 +1714,10 @@ static void builtin_diff(const char *name_a,
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
 			ecbdata.diff_words->file = o->file;
+			if (!o->word_regex)
+				o->word_regex = userdiff_word_regex(one);
+			if (!o->word_regex)
+				o->word_regex = userdiff_word_regex(two);
 			if (o->word_regex) {
 				ecbdata.diff_words->word_regex = (regex_t *)
 					xmalloc(sizeof(regex_t));
diff --git a/userdiff.c b/userdiff.c
index 3681062..7fd9a07 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -6,13 +6,17 @@ static struct userdiff_driver *drivers;
 static int ndrivers;
 static int drivers_alloc;
 
-#define FUNCNAME(name, pattern) \
+#define FUNCNAME(name, pattern)			\
 	{ name, NULL, -1, { pattern, REG_EXTENDED } }
+#define PATTERNS(name, pattern, wordregex)			\
+	{ name, NULL, -1, { pattern, REG_EXTENDED }, NULL, wordregex }
 static struct userdiff_driver builtin_drivers[] = {
-FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
-FUNCNAME("java",
+PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
+	 "[^<>= \t]+|\\S"),
+PATTERNS("java",
 	 "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
-	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
+	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+	 "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|\\S|[\x80-\xff]+"),
 FUNCNAME("objc",
 	 /* Negate C statements that can look like functions */
 	 "!^[ \t]*(do|for|if|else|return|switch|while)\n"
@@ -27,14 +31,19 @@ FUNCNAME("pascal",
 		"implementation|initialization|finalization)[ \t]*.*)$"
 	 "\n"
 	 "^(.*=[ \t]*(class|record).*)$"),
-FUNCNAME("php", "^[\t ]*((function|class).*)"),
-FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
+PATTERNS("php", "^[\t ]*((function|class).*)",
+	 "\\$?[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|->|\\S|[\x80-\xff]+"),
+PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
+	 "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|//|\\S|[\x80-\xff]+"),
 FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
-FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
-FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+	 "[={}\"]|[^={}\" \t]+"),
+PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+	 "\\\\[a-zA-Z@]+|[{}]|\\\\.|[^\\{} \t]+"),
 { "default", NULL, -1, { NULL, 0 } },
 };
 #undef FUNCNAME
+#undef PATTERNS
 
 static struct userdiff_driver driver_true = {
 	"diff=true",
@@ -134,6 +143,8 @@ int userdiff_config(const char *k, const char *v)
 		return parse_string(&drv->external, k, v);
 	if ((drv = parse_driver(k, v, "textconv")))
 		return parse_string(&drv->textconv, k, v);
+	if ((drv = parse_driver(k, v, "wordregex")))
+		return parse_string(&drv->word_regex, k, v);
 
 	return 0;
 }
diff --git a/userdiff.h b/userdiff.h
index ba29457..2aab13e 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -12,6 +12,7 @@ struct userdiff_driver {
 	int binary;
 	struct userdiff_funcname funcname;
 	const char *textconv;
+	const char *word_regex;
 };
 
 int userdiff_config(const char *k, const char *v);
-- 
1.6.1.269.g0769

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux