[PATCH 5/5] Use kwset in grep

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Best of five runs in the linux repository:

before:

$ time git grep qwerty
drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */

real	0m1.065s
user	0m1.400s
sys	0m0.536s


after:

$ time git grep qwerty
drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */

real	0m0.621s
user	0m0.560s
sys	0m0.564s

So we gain about 40% by using the kwset code.

Signed-off-by: Fredrik Kuivinen <frekui@xxxxxxxxx>
---

 grep.c |   61 +++++++++++++++++++++++++++++++++++++++++--------------------
 grep.h |    2 ++
 2 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/grep.c b/grep.c
index a0864f1..deb5f71 100644
--- a/grep.c
+++ b/grep.c
@@ -51,16 +51,38 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 	return ret;
 }
 
+static int is_fixed(const char *s)
+{
+	while (*s && !is_regex_special(*s))
+		s++;
+	return !*s;
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
 	int err;
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
-	p->fixed = opt->fixed;
-
-	if (p->fixed)
+	p->fixed = 0;
+
+	if (opt->fixed || is_fixed(p->pattern))
+		p->fixed = 1;
+
+	if (p->fixed) {
+		if (opt->regflags & REG_ICASE || p->ignore_case) {
+			static char trans[256];
+			int i;
+			for (i = 0; i < 256; i++)
+				trans[i] = tolower(i);
+			p->kws = kwsalloc(trans);
+		} else {
+			p->kws = kwsalloc(NULL);
+		}
+		kwsincr(p->kws, p->pattern, strlen(p->pattern));
+		kwsprep(p->kws);
 		return;
+	}
 
 	err = regcomp(&p->regexp, p->pattern, opt->regflags);
 	if (err) {
@@ -241,7 +263,10 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			regfree(&p->regexp);
+			if (p->fixed)
+				kwsfree(p->kws);
+			else
+				regfree(&p->regexp);
 			break;
 		default:
 			break;
@@ -277,21 +302,17 @@ static void show_name(struct grep_opt *opt, const char *name)
 }
 
 
-static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
+static int fixmatch(const kwset_t kws, char *line, size_t sz,
+		    regmatch_t *match)
 {
-	char *hit;
-	if (ignore_case)
-		hit = strcasestr(line, pattern);
-	else
-		hit = strstr(line, pattern);
-
-	if (!hit) {
+	struct kwsmatch kwsm;
+	size_t offset = kwsexec(kws, line, sz, &kwsm);
+	if (offset == -1) {
 		match->rm_so = match->rm_eo = -1;
 		return REG_NOMATCH;
-	}
-	else {
-		match->rm_so = hit - line;
-		match->rm_eo = match->rm_so + strlen(pattern);
+	} else {
+		match->rm_so = offset;
+		match->rm_eo = match->rm_so + kwsm.size[0];
 		return 0;
 	}
 }
@@ -346,7 +367,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 
  again:
 	if (p->fixed)
-		hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
+		hit = !fixmatch(p->kws, bol, eol-bol, pmatch);
 	else
 		hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
 
@@ -670,9 +691,9 @@ static int look_ahead(struct grep_opt *opt,
 		int hit;
 		regmatch_t m;
 
-		if (p->fixed)
-			hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
-		else {
+		if (p->fixed) {
+			hit = !fixmatch(p->kws, bol, *left_p, &m);
+		} else {
 #ifdef REG_STARTEND
 			m.rm_so = 0;
 			m.rm_eo = *left_p;
diff --git a/grep.h b/grep.h
index 9703087..3c79154 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
 #ifndef GREP_H
 #define GREP_H
 #include "color.h"
+#include "kwset.h"
 
 enum grep_pat_token {
 	GREP_PATTERN,
@@ -31,6 +32,7 @@ struct grep_pat {
 	const char *pattern;
 	enum grep_header_field field;
 	regex_t regexp;
+	kwset_t kws;
 	unsigned fixed:1;
 	unsigned ignore_case:1;
 	unsigned word_regexp:1;

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]