[RFC 1/1] xdiff: use leading whitespace in function heuristic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The regular expressions specified in userdiff.c, as well as user-defined
expressions, allow git to detect which lines of code which declare functions
(as well as other notable lines, such as class declarations).

Although useful, these regular expressions can't identify which function
a line of code belongs to, only the closest function to it.
Languages which allow for nested functions -- or functions inside of
classes -- can trip this mechanism up.

Since many languages use indentation to associate lines of code with a
function (either semantically or cosmetically), we can use indentation
as an additional heuristic for identifying the owning function.

Specifically, this assumes code belongs to a function which is less
indented than it.

Signed-off-by: Ryan Zoeller <rtzoeller@xxxxxxxxxxxxx>
---
 grep.c            |  2 +-
 line-range.c      |  2 +-
 xdiff-interface.c | 14 +++++++++++++-
 xdiff/xdiff.h     |  2 +-
 xdiff/xemit.c     | 23 +++++++++++++++++------
 5 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/grep.c b/grep.c
index 54af9f813e..3281f19977 100644
--- a/grep.c
+++ b/grep.c
@@ -1555,7 +1555,7 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo
 
 	if (xecfg) {
 		char buf[1];
-		return xecfg->find_func(bol, eol - bol, buf, 1,
+		return xecfg->find_func(bol, eol - bol, buf, 1, -1,
 					xecfg->find_func_priv) >= 0;
 	}
 
diff --git a/line-range.c b/line-range.c
index 9b50583dc0..eb9540bc76 100644
--- a/line-range.c
+++ b/line-range.c
@@ -119,7 +119,7 @@ static int match_funcname(xdemitconf_t *xecfg, const char *bol, const char *eol)
 {
 	if (xecfg) {
 		char buf[1];
-		return xecfg->find_func(bol, eol - bol, buf, 1,
+		return xecfg->find_func(bol, eol - bol, buf, 1, -1,
 					xecfg->find_func_priv) >= 0;
 	}
 
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 4d20069302..d93cb5c72e 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -201,7 +201,7 @@ struct ff_regs {
 };
 
 static long ff_regexp(const char *line, long len,
-		char *buffer, long buffer_size, void *priv)
+		char *buffer, long buffer_size, long max_leading_spaces, void *priv)
 {
 	struct ff_regs *regs = priv;
 	regmatch_t pmatch[2];
@@ -216,6 +216,18 @@ static long ff_regexp(const char *line, long len,
 			len--;
 	}
 
+	// TODO: Is it faster to check whitespace only after matching the regex?
+	if (max_leading_spaces >= 0) {
+		long leading_spaces;
+		for (leading_spaces = 0; leading_spaces < len
+				&& leading_spaces <= max_leading_spaces
+				&& isspace(line[leading_spaces]); leading_spaces++)
+			;
+
+		if (leading_spaces > max_leading_spaces)
+			return -1;
+	}
+
 	for (i = 0; i < regs->nr; i++) {
 		struct ff_reg *reg = regs->array + i;
 		if (!regexec_buf(&reg->re, line, len, 2, pmatch, 0)) {
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 032e3a9f41..f78c30c527 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -93,7 +93,7 @@ typedef struct s_xdemitcb {
 	int (*out_line)(void *, mmbuffer_t *, int);
 } xdemitcb_t;
 
-typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv);
+typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, long max_leading_spaces, void *priv);
 
 typedef int (*xdl_emit_hunk_consume_func_t)(long start_a, long count_a,
 					    long start_b, long count_b,
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 9d7d6c5087..1de68008f9 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -95,7 +95,7 @@ xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
 }
 
 
-static long def_ff(const char *rec, long len, char *buf, long sz, void *priv)
+static long def_ff(const char *rec, long len, char *buf, long sz, long max_leading_spaces, void *priv)
 {
 	if (len > 0 &&
 			(isalpha((unsigned char)*rec) || /* identifier? */
@@ -112,19 +112,19 @@ static long def_ff(const char *rec, long len, char *buf, long sz, void *priv)
 }
 
 static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
-			   char *buf, long sz)
+			   char *buf, long sz, long max_leading_spaces)
 {
 	const char *rec;
 	long len = xdl_get_rec(xdf, ri, &rec);
 	if (!xecfg->find_func)
-		return def_ff(rec, len, buf, sz, xecfg->find_func_priv);
-	return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv);
+		return def_ff(rec, len, buf, sz, max_leading_spaces, xecfg->find_func_priv);
+	return xecfg->find_func(rec, len, buf, sz, max_leading_spaces, xecfg->find_func_priv);
 }
 
 static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
 {
 	char dummy[1];
-	return match_func_rec(xdf, xecfg, ri, dummy, sizeof(dummy)) >= 0;
+	return match_func_rec(xdf, xecfg, ri, dummy, -1, sizeof(dummy)) >= 0;
 }
 
 struct func_line {
@@ -137,12 +137,23 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
 {
 	long l, size, step = (start > limit) ? -1 : 1;
 	char *buf, dummy[1];
+	long leading_spaces;
+
+	if (start - step >= 0 && start - step < xe->xdf1.nrec) {
+		xrecord_t *first_line = xe->xdf1.recs[start - step];
+
+		for (leading_spaces = 0; first_line->ptr[leading_spaces]
+				&& isspace(first_line->ptr[leading_spaces]); leading_spaces++)
+			;
+	} else {
+		leading_spaces = 0;
+	}
 
 	buf = func_line ? func_line->buf : dummy;
 	size = func_line ? sizeof(func_line->buf) : sizeof(dummy);
 
 	for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) {
-		long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size);
+		long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size, leading_spaces - 1);
 		if (len >= 0) {
 			if (func_line)
 				func_line->len = len;
-- 
2.28.0.586.g47c91ef7fe






[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux