When calling match_line(), callers presently cannot determine the relative offset of the match because match_line() discards the 'regmatch_t' that contains this information. Instead, teach match_line() to take in two 'ssize_t's. Fill the first with the offset of the match produced by the given expression. If extended, fill the later with the offset of the match produced as if --invert were given. For instance, matching "--not -e x" on this line produces a columnar offset of 0, (i.e., the whole line does not contain an x), but "--invert --not -e -x" will fill the later ssize_t of the column containing an "x", because this expression is semantically equivalent to "-e x". To determine the column for the inverted and non-inverted case, do the following: - If matching an atom, the non-inverted column is as given from match_one_pattern(), and the inverted column is unset. - If matching a --not, the inverted column and non-inverted column swap. - If matching an --and, or --or, the non-inverted column is the minimum of the two children. Presently, the existing short-circuiting logic for AND and OR applies as before. This will change in the following commit when we add options to configure the --column flag. Taken together, this and the forthcoming change will always yield the earlier column on a given line. This patch will become useful when we later pick between the two new results in order to display the column number of the first match on a line with --column. Co-authored-by: Jeff King <peff@xxxxxxxx> Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx> --- grep.c | 58 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/grep.c b/grep.c index 45ec7e636c..dedfe17f93 100644 --- a/grep.c +++ b/grep.c @@ -1248,11 +1248,11 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, return hit; } -static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, - enum grep_context ctx, int collect_hits) +static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol, + char *eol, enum grep_context ctx, ssize_t *col, + ssize_t *icol, int collect_hits) { int h = 0; - regmatch_t match; if (!x) die("Not a valid grep expression"); @@ -1261,25 +1261,39 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, h = 1; break; case GREP_NODE_ATOM: - h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0); + { + regmatch_t tmp; + h = match_one_pattern(x->u.atom, bol, eol, ctx, + &tmp, 0); + if (h && (*col < 0 || tmp.rm_so < *col)) + *col = tmp.rm_so; + } break; case GREP_NODE_NOT: - h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0); + /* + * Upon visiting a GREP_NODE_NOT, col and icol become swapped. + */ + h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col, + 0); break; case GREP_NODE_AND: - if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0)) + if (!match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, + icol, 0)) return 0; - h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0); + h = match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col, + icol, 0); break; case GREP_NODE_OR: if (!collect_hits) - return (match_expr_eval(x->u.binary.left, - bol, eol, ctx, 0) || - match_expr_eval(x->u.binary.right, - bol, eol, ctx, 0)); - h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0); + return (match_expr_eval(opt, x->u.binary.left, bol, eol, + ctx, col, icol, 0) || + match_expr_eval(opt, x->u.binary.right, bol, + eol, ctx, col, icol, 0)); + h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, + icol, 0); x->u.binary.left->hit |= h; - h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1); + h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col, + icol, 1); break; default: die("Unexpected node type (internal error) %d", x->node); @@ -1290,25 +1304,30 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, } static int match_expr(struct grep_opt *opt, char *bol, char *eol, - enum grep_context ctx, int collect_hits) + enum grep_context ctx, ssize_t *col, + ssize_t *icol, int collect_hits) { struct grep_expr *x = opt->pattern_expression; - return match_expr_eval(x, bol, eol, ctx, collect_hits); + return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits); } static int match_line(struct grep_opt *opt, char *bol, char *eol, + ssize_t *col, ssize_t *icol, enum grep_context ctx, int collect_hits) { struct grep_pat *p; - regmatch_t match; if (opt->extended) - return match_expr(opt, bol, eol, ctx, collect_hits); + return match_expr(opt, bol, eol, ctx, col, icol, + collect_hits); /* we do not call with collect_hits without being extended */ for (p = opt->pattern_list; p; p = p->next) { - if (match_one_pattern(p, bol, eol, ctx, &match, 0)) + regmatch_t tmp; + if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) { + *col = tmp.rm_so; return 1; + } } return 0; } @@ -1763,6 +1782,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle while (left) { char *eol, ch; int hit; + ssize_t col = -1, icol = -1; /* * look_ahead() skips quickly to the line that possibly @@ -1786,7 +1806,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) ctx = GREP_CONTEXT_BODY; - hit = match_line(opt, bol, eol, ctx, collect_hits); + hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits); *eol = ch; if (collect_hits) -- 2.18.0