Best of five runs in the linux repository: before: $ time git grep qwerty drivers/char/keyboard.c: "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ real 0m1.065s user 0m1.400s sys 0m0.536s after: $ time git grep qwerty drivers/char/keyboard.c: "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ real 0m0.621s user 0m0.560s sys 0m0.564s So we gain about 40% by using the kwset code. Signed-off-by: Fredrik Kuivinen <frekui@xxxxxxxxx> --- grep.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------------- grep.h | 2 ++ 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/grep.c b/grep.c index a0864f1..deb5f71 100644 --- a/grep.c +++ b/grep.c @@ -51,16 +51,38 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) return ret; } +static int is_fixed(const char *s) +{ + while (*s && !is_regex_special(*s)) + s++; + return !*s; +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - p->fixed = opt->fixed; - - if (p->fixed) + p->fixed = 0; + + if (opt->fixed || is_fixed(p->pattern)) + p->fixed = 1; + + if (p->fixed) { + if (opt->regflags & REG_ICASE || p->ignore_case) { + static char trans[256]; + int i; + for (i = 0; i < 256; i++) + trans[i] = tolower(i); + p->kws = kwsalloc(trans); + } else { + p->kws = kwsalloc(NULL); + } + kwsincr(p->kws, p->pattern, strlen(p->pattern)); + kwsprep(p->kws); return; + } err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { @@ -241,7 +263,10 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - regfree(&p->regexp); + if (p->fixed) + kwsfree(p->kws); + else + regfree(&p->regexp); break; default: break; @@ -277,21 +302,17 @@ static void show_name(struct grep_opt *opt, const char *name) } -static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match) +static int fixmatch(const kwset_t kws, char *line, size_t sz, + regmatch_t *match) { - char *hit; - if (ignore_case) - hit = strcasestr(line, pattern); - else - hit = strstr(line, pattern); - - if (!hit) { + struct kwsmatch kwsm; + size_t offset = kwsexec(kws, line, sz, &kwsm); + if (offset == -1) { match->rm_so = match->rm_eo = -1; return REG_NOMATCH; - } - else { - match->rm_so = hit - line; - match->rm_eo = match->rm_so + strlen(pattern); + } else { + match->rm_so = offset; + match->rm_eo = match->rm_so + kwsm.size[0]; return 0; } } @@ -346,7 +367,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, again: if (p->fixed) - hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch); + hit = !fixmatch(p->kws, bol, eol-bol, pmatch); else hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); @@ -670,9 +691,9 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) - hit = !fixmatch(p->pattern, bol, p->ignore_case, &m); - else { + if (p->fixed) { + hit = !fixmatch(p->kws, bol, *left_p, &m); + } else { #ifdef REG_STARTEND m.rm_so = 0; m.rm_eo = *left_p; diff --git a/grep.h b/grep.h index 9703087..3c79154 100644 --- a/grep.h +++ b/grep.h @@ -1,6 +1,7 @@ #ifndef GREP_H #define GREP_H #include "color.h" +#include "kwset.h" enum grep_pat_token { GREP_PATTERN, @@ -31,6 +32,7 @@ struct grep_pat { const char *pattern; enum grep_header_field field; regex_t regexp; + kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html