[PATCH 2/7] grep.c: expose {,inverted} match column in match_line()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When calling match_line(), callers presently cannot determine the
relative offset of the match because match_line() discards the
'regmatch_t' that contains this information.

Instead, teach match_line() to take in two 'ssize_t's. Fill the first
with the offset of the match produced by the given expression. If
extended, fill the later with the offset of the match produced as if
--invert were given.

For instance, matching "--not -e x" on this line produces a columnar
offset of 0, (i.e., the whole line does not contain an x), but "--invert
--not -e -x" will fill the later ssize_t of the column containing an
"x", because this expression is semantically equivalent to "-e x".

To determine the column for the inverted and non-inverted case, do the
following:

  - If matching an atom, the non-inverted column is as given from
    match_one_pattern(), and the inverted column is unset.

  - If matching a --not, the inverted column and non-inverted column swap.

  - If matching an --and, or --or, the non-inverted column is the
    minimum of the two children, with the exception that --or is
    short-circuiting. For instance, if we match "-e a --or -e b" on a
    line that contains both "a" and "b" (and "b" comes first), the match
    column will hold "a", since we inspected the left child first, and
    short-circuited over the right child.

This patch will become useful when we later pick between the two new
results in order to display the column number of the first match on a
line with --column.

Co-authored-by: Jeff King <peff@xxxxxxxx>
Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx>
---
 grep.c | 56 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/grep.c b/grep.c
index 45ec7e636c..19c782aa9d 100644
--- a/grep.c
+++ b/grep.c
@@ -1249,10 +1249,10 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 }
 
 static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
-			   enum grep_context ctx, int collect_hits)
+			   enum grep_context ctx, ssize_t *col,
+			   ssize_t *icol, int collect_hits)
 {
 	int h = 0;
-	regmatch_t match;
 
 	if (!x)
 		die("Not a valid grep expression");
@@ -1261,25 +1261,39 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
 		h = 1;
 		break;
 	case GREP_NODE_ATOM:
-		h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
+		{
+			regmatch_t tmp;
+			h = match_one_pattern(x->u.atom, bol, eol, ctx,
+					      &tmp, 0);
+			if (h && (*col < 0 || tmp.rm_so < *col))
+				*col = tmp.rm_so;
+		}
 		break;
 	case GREP_NODE_NOT:
-		h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
+		/*
+		 * Upon visiting a GREP_NODE_NOT, imatch and match become
+		 * swapped.
+		 */
+		h = !match_expr_eval(x->u.unary, bol, eol, ctx, icol, col, 0);
 		break;
 	case GREP_NODE_AND:
-		if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
+		if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, col,
+				     icol, 0))
 			return 0;
-		h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
+		h = match_expr_eval(x->u.binary.right, bol, eol, ctx, col,
+				    icol, 0);
 		break;
 	case GREP_NODE_OR:
 		if (!collect_hits)
-			return (match_expr_eval(x->u.binary.left,
-						bol, eol, ctx, 0) ||
-				match_expr_eval(x->u.binary.right,
-						bol, eol, ctx, 0));
-		h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
+			return (match_expr_eval(x->u.binary.left, bol, eol, ctx,
+						col, icol, 0) ||
+				match_expr_eval(x->u.binary.right, bol, eol,
+						ctx, col, icol, 0));
+		h = match_expr_eval(x->u.binary.left, bol, eol, ctx, col,
+				    icol, 0);
 		x->u.binary.left->hit |= h;
-		h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
+		h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, col,
+				     icol, 1);
 		break;
 	default:
 		die("Unexpected node type (internal error) %d", x->node);
@@ -1290,25 +1304,30 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
 }
 
 static int match_expr(struct grep_opt *opt, char *bol, char *eol,
-		      enum grep_context ctx, int collect_hits)
+		      enum grep_context ctx, ssize_t *col,
+		      ssize_t *icol, int collect_hits)
 {
 	struct grep_expr *x = opt->pattern_expression;
-	return match_expr_eval(x, bol, eol, ctx, collect_hits);
+	return match_expr_eval(x, bol, eol, ctx, col, icol, collect_hits);
 }
 
 static int match_line(struct grep_opt *opt, char *bol, char *eol,
+		      ssize_t *col, ssize_t *icol,
 		      enum grep_context ctx, int collect_hits)
 {
 	struct grep_pat *p;
-	regmatch_t match;
 
 	if (opt->extended)
-		return match_expr(opt, bol, eol, ctx, collect_hits);
+		return match_expr(opt, bol, eol, ctx, col, icol,
+				  collect_hits);
 
 	/* we do not call with collect_hits without being extended */
 	for (p = opt->pattern_list; p; p = p->next) {
-		if (match_one_pattern(p, bol, eol, ctx, &match, 0))
+		regmatch_t tmp;
+		if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
+			*col = tmp.rm_so;
 			return 1;
+		}
 	}
 	return 0;
 }
@@ -1763,6 +1782,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 	while (left) {
 		char *eol, ch;
 		int hit;
+		ssize_t col = -1, icol = -1;
 
 		/*
 		 * look_ahead() skips quickly to the line that possibly
@@ -1786,7 +1806,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 		if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
 			ctx = GREP_CONTEXT_BODY;
 
-		hit = match_line(opt, bol, eol, ctx, collect_hits);
+		hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
 		*eol = ch;
 
 		if (collect_hits)
-- 
2.17.0.582.gccdcbd54c




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux