[PATCH 8/8] grep: support NUL chars in search strings for -F

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Search patterns in a file specified with -f can contain NUL characters.
The current code ignores all characters on a line after a NUL.

Pass the actual length of the line all the way from the pattern file to
fixmatch() and use it for case-sensitive fixed string matching.

Signed-off-by: Rene Scharfe <rene.scharfe@xxxxxxxxxxxxxx>
---
Support for -F was easy, but in order to be able to search for NULs
with -Fi, -G and -E, we'd need a different case-insensitive fixed
string search function (memcasemem?) and a different regex library, or
at least use a different (non-POSIX) entry point.

How badly do we need this feature?  If the new regex lib is faster or
improves multi-platform support then NUL support would be a nice side
effect, I think, but this feature alone doesn't justify a switch in my
eyes.


 builtin/grep.c         |    8 ++++++--
 grep.c                 |   33 ++++++++++++++++++++-------------
 grep.h                 |    2 ++
 t/t7008-grep-binary.sh |   30 ++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index b194ea3..d0a73da 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
 	if (!patterns)
 		die_errno("cannot open '%s'", arg);
 	while (strbuf_getline(&sb, patterns, '\n') == 0) {
+		char *s;
+		size_t len;
+
 		/* ignore empty line like grep does */
 		if (sb.len == 0)
 			continue;
-		append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
-				    ++lno, GREP_PATTERN);
+
+		s = strbuf_detach(&sb, &len);
+		append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
 	}
 	fclose(patterns);
 	strbuf_release(&sb);
diff --git a/grep.c b/grep.c
index 70a776f..82fb349 100644
--- a/grep.c
+++ b/grep.c
@@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
 {
 	struct grep_pat *p = xcalloc(1, sizeof(*p));
 	p->pattern = pat;
+	p->patternlen = strlen(pat);
 	p->origin = "header";
 	p->no = 0;
 	p->token = GREP_PATTERN_HEAD;
@@ -19,8 +20,15 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
 void append_grep_pattern(struct grep_opt *opt, const char *pat,
 			 const char *origin, int no, enum grep_pat_token t)
 {
+	append_grep_pat(opt, pat, strlen(pat), origin, no, t);
+}
+
+void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
+		     const char *origin, int no, enum grep_pat_token t)
+{
 	struct grep_pat *p = xcalloc(1, sizeof(*p));
 	p->pattern = pat;
+	p->patternlen = patlen;
 	p->origin = origin;
 	p->no = no;
 	p->token = t;
@@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 			append_header_grep_pattern(ret, pat->field,
 						   pat->pattern);
 		else
-			append_grep_pattern(ret, pat->pattern, pat->origin,
-					    pat->no, pat->token);
+			append_grep_pat(ret, pat->pattern, pat->patternlen,
+					pat->origin, pat->no, pat->token);
 	}
 
 	return ret;
@@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
 	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
-static int fixmatch(const char *pattern, char *line, char *eol,
-		    int ignore_case, regmatch_t *match)
+static int fixmatch(struct grep_pat *p, char *line, char *eol,
+		    regmatch_t *match)
 {
 	char *hit;
 
-	if (ignore_case) {
+	if (p->ignore_case) {
 		char *s = line;
 		do {
-			hit = strcasestr(s, pattern);
+			hit = strcasestr(s, p->pattern);
 			if (hit)
 				break;
 			s += strlen(s) + 1;
 		} while (s < eol);
 	} else
-		hit = memmem(line, eol - line, pattern, strlen(pattern));
+		hit = memmem(line, eol - line, p->pattern, p->patternlen);
 
 	if (!hit) {
 		match->rm_so = match->rm_eo = -1;
@@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol,
 	}
 	else {
 		match->rm_so = hit - line;
-		match->rm_eo = match->rm_so + strlen(pattern);
+		match->rm_eo = match->rm_so + p->patternlen;
 		return 0;
 	}
 }
@@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 
  again:
 	if (p->fixed)
-		hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch);
+		hit = !fixmatch(p, bol, eol, pmatch);
 	else
 		hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
 
@@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt,
 		int hit;
 		regmatch_t m;
 
-		if (p->fixed) {
-			hit = !fixmatch(p->pattern, bol, bol + *left_p,
-					p->ignore_case, &m);
-		} else
+		if (p->fixed)
+			hit = !fixmatch(p, bol, bol + *left_p, &m);
+		else
 			hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
 		if (!hit || m.rm_so < 0 || m.rm_eo < 0)
 			continue;
diff --git a/grep.h b/grep.h
index 89342e5..0aebebd 100644
--- a/grep.h
+++ b/grep.h
@@ -29,6 +29,7 @@ struct grep_pat {
 	int no;
 	enum grep_pat_token token;
 	const char *pattern;
+	size_t patternlen;
 	enum grep_header_field field;
 	regex_t regexp;
 	unsigned fixed:1;
@@ -104,6 +105,7 @@ struct grep_opt {
 	void *output_priv;
 };
 
+extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
 extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
 extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
 extern void compile_grep_patterns(struct grep_opt *opt);
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index 4f5e74f..eb8ca88 100755
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
@@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' '
 	git grep .fi a
 '
 
+test_expect_success 'git grep -F y<NUL>f a' "
+	printf 'y\000f' >f &&
+	git grep -f f -F a
+"
+
+test_expect_success 'git grep -F y<NUL>x a' "
+	printf 'y\000x' >f &&
+	test_must_fail git grep -f f -F a
+"
+
+test_expect_success 'git grep -Fi Y<NUL>f a' "
+	printf 'Y\000f' >f &&
+	git grep -f f -Fi a
+"
+
+test_expect_failure 'git grep -Fi Y<NUL>x a' "
+	printf 'Y\000x' >f &&
+	test_must_fail git grep -f f -Fi a
+"
+
+test_expect_success 'git grep y<NUL>f a' "
+	printf 'y\000f' >f &&
+	git grep -f f a
+"
+
+test_expect_failure 'git grep y<NUL>x a' "
+	printf 'y\000x' >f &&
+	test_must_fail git grep -f f a
+"
+
 test_done
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]