Search patterns in a file specified with -f can contain NUL characters. The current code ignores all characters on a line after a NUL. Pass the actual length of the line all the way from the pattern file to fixmatch() and use it for case-sensitive fixed string matching. Signed-off-by: Rene Scharfe <rene.scharfe@xxxxxxxxxxxxxx> --- Support for -F was easy, but in order to be able to search for NULs with -Fi, -G and -E, we'd need a different case-insensitive fixed string search function (memcasemem?) and a different regex library, or at least use a different (non-POSIX) entry point. How badly do we need this feature? If the new regex lib is faster or improves multi-platform support then NUL support would be a nice side effect, I think, but this feature alone doesn't justify a switch in my eyes. builtin/grep.c | 8 ++++++-- grep.c | 33 ++++++++++++++++++++------------- grep.h | 2 ++ t/t7008-grep-binary.sh | 30 ++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 15 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index b194ea3..d0a73da 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset) if (!patterns) die_errno("cannot open '%s'", arg); while (strbuf_getline(&sb, patterns, '\n') == 0) { + char *s; + size_t len; + /* ignore empty line like grep does */ if (sb.len == 0) continue; - append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg, - ++lno, GREP_PATTERN); + + s = strbuf_detach(&sb, &len); + append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN); } fclose(patterns); strbuf_release(&sb); diff --git a/grep.c b/grep.c index 70a776f..82fb349 100644 --- a/grep.c +++ b/grep.c @@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie { struct grep_pat *p = xcalloc(1, sizeof(*p)); p->pattern = pat; + p->patternlen = strlen(pat); p->origin = "header"; p->no = 0; p->token = GREP_PATTERN_HEAD; @@ -19,8 +20,15 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t) { + append_grep_pat(opt, pat, strlen(pat), origin, no, t); +} + +void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, + const char *origin, int no, enum grep_pat_token t) +{ struct grep_pat *p = xcalloc(1, sizeof(*p)); p->pattern = pat; + p->patternlen = patlen; p->origin = origin; p->no = no; p->token = t; @@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) append_header_grep_pattern(ret, pat->field, pat->pattern); else - append_grep_pattern(ret, pat->pattern, pat->origin, - pat->no, pat->token); + append_grep_pat(ret, pat->pattern, pat->patternlen, + pat->origin, pat->no, pat->token); } return ret; @@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int fixmatch(const char *pattern, char *line, char *eol, - int ignore_case, regmatch_t *match) +static int fixmatch(struct grep_pat *p, char *line, char *eol, + regmatch_t *match) { char *hit; - if (ignore_case) { + if (p->ignore_case) { char *s = line; do { - hit = strcasestr(s, pattern); + hit = strcasestr(s, p->pattern); if (hit) break; s += strlen(s) + 1; } while (s < eol); } else - hit = memmem(line, eol - line, pattern, strlen(pattern)); + hit = memmem(line, eol - line, p->pattern, p->patternlen); if (!hit) { match->rm_so = match->rm_eo = -1; @@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol, } else { match->rm_so = hit - line; - match->rm_eo = match->rm_so + strlen(pattern); + match->rm_eo = match->rm_so + p->patternlen; return 0; } } @@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, again: if (p->fixed) - hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch); + hit = !fixmatch(p, bol, eol, pmatch); else hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags); @@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) { - hit = !fixmatch(p->pattern, bol, bol + *left_p, - p->ignore_case, &m); - } else + if (p->fixed) + hit = !fixmatch(p, bol, bol + *left_p, &m); + else hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0); if (!hit || m.rm_so < 0 || m.rm_eo < 0) continue; diff --git a/grep.h b/grep.h index 89342e5..0aebebd 100644 --- a/grep.h +++ b/grep.h @@ -29,6 +29,7 @@ struct grep_pat { int no; enum grep_pat_token token; const char *pattern; + size_t patternlen; enum grep_header_field field; regex_t regexp; unsigned fixed:1; @@ -104,6 +105,7 @@ struct grep_opt { void *output_priv; }; +extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t); extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t); extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *); extern void compile_grep_patterns(struct grep_opt *opt); diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 4f5e74f..eb8ca88 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' ' git grep .fi a ' +test_expect_success 'git grep -F y<NUL>f a' " + printf 'y\000f' >f && + git grep -f f -F a +" + +test_expect_success 'git grep -F y<NUL>x a' " + printf 'y\000x' >f && + test_must_fail git grep -f f -F a +" + +test_expect_success 'git grep -Fi Y<NUL>f a' " + printf 'Y\000f' >f && + git grep -f f -Fi a +" + +test_expect_failure 'git grep -Fi Y<NUL>x a' " + printf 'Y\000x' >f && + test_must_fail git grep -f f -Fi a +" + +test_expect_success 'git grep y<NUL>f a' " + printf 'y\000f' >f && + git grep -f f a +" + +test_expect_failure 'git grep y<NUL>x a' " + printf 'y\000x' >f && + test_must_fail git grep -f f a +" + test_done -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html