Bring back optimized fixed-string search for "grep", this time with PCRE v2 as an optional backend. As noted in [1] with kwset we were slower than PCRE v1 and v2 JIT with the kwset backend, so that optimization was counterproductive. This brings back the optimization for "-F", without changing the semantics of "\0" in patterns. As seen in previous commits in this series we could support it now, but I'd rather just leave that edge-case aside so the tests don't need to do one thing or the other depending on what --fixed-strings backend we're using. I could also support the v1 backend here, but that would make the code more complex, and I'd rather aim for simplicity here and in future changes to the diffcore. We're not going to have someone who absolutely must have faster search, but for whom building PCRE v2 isn't acceptable. 1. https://public-inbox.org/git/87v9x793qi.fsf@xxxxxxxxxxxxxxxxxxx/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> --- grep.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index 4716217837..6b75d5be68 100644 --- a/grep.c +++ b/grep.c @@ -356,6 +356,18 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } +static int is_fixed(const char *s, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (is_regex_special(s[i])) + return 0; + } + + return 1; +} + #ifdef USE_LIBPCRE1 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { @@ -602,7 +614,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol, static void free_pcre2_pattern(struct grep_pat *p) { } -#endif /* !USE_LIBPCRE2 */ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { @@ -623,11 +634,13 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) compile_regexp_failed(p, errbuf); } } +#endif /* !USE_LIBPCRE2 */ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; int regflags = REG_NEWLINE; + int pat_is_fixed; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; @@ -636,8 +649,38 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2")); - if (opt->fixed) { + pat_is_fixed = is_fixed(p->pattern, p->patternlen); + if (opt->fixed || pat_is_fixed) { +#ifdef USE_LIBPCRE2 + opt->pcre2 = 1; + if (pat_is_fixed) { + compile_pcre2_pattern(p, opt); + } else { + /* + * E.g. t7811-grep-open.sh relies on the + * pattern being restored, and unfortunately + * there's no PCRE compile flag for "this is + * fixed", so we need to munge it to + * "\Q<pat>\E". + */ + char *old_pattern = p->pattern; + size_t old_patternlen = p->patternlen; + struct strbuf sb = STRBUF_INIT; + + strbuf_add(&sb, "\\Q", 2); + strbuf_add(&sb, p->pattern, p->patternlen); + strbuf_add(&sb, "\\E", 2); + + p->pattern = sb.buf; + p->patternlen = sb.len; + compile_pcre2_pattern(p, opt); + p->pattern = old_pattern; + p->patternlen = old_patternlen; + strbuf_release(&sb); + } +#else compile_fixed_regexp(p, opt); +#endif return; } -- 2.22.0.455.g172b71a6c5