Jeff King schrieb: > On Sat, Feb 28, 2009 at 11:44:01PM +0100, Mike Hommey wrote: > >>> --- >>> Makefile | 1 + >>> compat/memmem.c | 103 +++++++++---- >>> compat/str-two-way.h | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++ >>> 3 files changed, 504 insertions(+), 29 deletions(-) >> Seeing how much memmem is being used in the codebase, is it really worth? > > See earlier in the thread, where "git log -Stoken" is substantially > faster on Linux versus Windows (but some exact numbers before and after > on Windows would be nice to have in the commit message). Yes, and also please see the other numbers I just posted. It's more of an update -- we took the current memmem() fall-back from glibc, too, and they switched to this shiny new implementation to avoid the quadratic complexity of the old one in the meantime. I was going to say that with a fast memmem() we could convert some strstr() calls, especially those where we know the lengths of the strings anyway -- intuitively, memmem() should be faster than strstr() in that case. However, the following patch on top of the Gnulib import makes "git grep grep v1.6.1" take 10% *more* time for me (on Windows). I wonder why. diff --git a/grep.c b/grep.c index 062b2b6..66ef171 100644 --- a/grep.c +++ b/grep.c @@ -39,6 +39,8 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; + p->pattern_len = strlen(p->pattern); + if (opt->fixed || is_fixed(p->pattern)) p->fixed = 1; if (opt->regflags & REG_ICASE) @@ -268,16 +270,17 @@ static void show_name(struct grep_opt *opt, const char *name) printf("%s%c", name, opt->null_following_name ? '\0' : '\n'); } -static int fixmatch(const char *pattern, char *line, regmatch_t *match) +static int fixmatch(const char *pattern, size_t pattern_len, const char *bol, + const char *eol, regmatch_t *match) { - char *hit = strstr(line, pattern); + char *hit = memmem(bol, eol - bol, pattern, pattern_len); if (!hit) { match->rm_so = match->rm_eo = -1; return REG_NOMATCH; } else { - match->rm_so = hit - line; - match->rm_eo = match->rm_so + strlen(pattern); + match->rm_so = hit - bol; + match->rm_eo = match->rm_so + pattern_len; return 0; } } @@ -335,7 +338,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol pmatch, 0); } else { - hit = !fixmatch(p->pattern, bol, pmatch); + hit = !fixmatch(p->pattern, p->pattern_len, bol, eol, pmatch); } if (hit && opt->word_regexp) { diff --git a/grep.h b/grep.h index 5102ce3..2e22ab2 100644 --- a/grep.h +++ b/grep.h @@ -28,6 +28,7 @@ struct grep_pat { int no; enum grep_pat_token token; const char *pattern; + size_t pattern_len; enum grep_header_field field; regex_t regexp; unsigned fixed:1; -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html