Re: [PATCH v3 05/30] log: make --regexp-ignore-case work with --perl-regexp

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, May 21, 2017 at 1:50 AM, Junio C Hamano <gitster@xxxxxxxxx> wrote:
> Ævar Arnfjörð Bjarmason  <avarab@xxxxxxxxx> writes:
>
>> Make the --regexp-ignore-case option work with --perl-regexp. This
>> never worked, and there was no test for this. Fix the bug and add a
>> test.
>>
>> When PCRE support was added in commit 63e7e9d8b6 ("git-grep: Learn
>> PCRE", 2011-05-09) compile_pcre_regexp() would only check
>> opt->ignore_case, but when the --perl-regexp option was added in
>> commit 727b6fc3ed ("log --grep: accept --basic-regexp and
>> --perl-regexp", 2012-10-03) the code didn't set the opt->ignore_case.
>>
>> Change the test suite to test for -i and --invert-regexp with
>> basic/extended/perl patterns in addition to fixed, which was the only
>> patternType that was tested for before in combination with those
>> options.
>>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
>> ---
>>  revision.c     |  1 +
>>  t/t4202-log.sh | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>>  2 files changed, 56 insertions(+), 5 deletions(-)
>>
>> diff --git a/revision.c b/revision.c
>> index 8a8c1789c7..4883cdd2d0 100644
>> --- a/revision.c
>> +++ b/revision.c
>> @@ -1991,6 +1991,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
>>       } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
>>               revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
>>       } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
>> +             revs->grep_filter.ignore_case = 1;
>>               revs->grep_filter.regflags |= REG_ICASE;
>>               DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
>>       } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
>
> Looks good.
>
> I however wonder if it is a better approach in the longer term to
> treat the .ignore_case field just like .extended_regexp_option
> field, i.e. not committing immediately to .regflags but commit it
> after config and command line parsing is done, just like we make the
> "BRE? ERE?" decision in grep_commit_pattern_type().

I started hacking up a patch to fix the root cause of this, i.e. the
users of the grep API should only set `.ignore_case = 1` and not care
about setting regflags, but it was more than a trivial change, so I
didn't include it in this series:

diff --git a/builtin/grep.c b/builtin/grep.c
index 3ffb5b4e81..be28c37265 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const
char *prefix)

        if (!opt.pattern_list)
                die(_("no pattern given."));
-       if (!opt.fixed && opt.ignore_case)
-               opt.regflags |= REG_ICASE;

        compile_grep_patterns(&opt);

diff --git a/grep.c b/grep.c
index 47cee45067..7b13ee1043 100644
--- a/grep.c
+++ b/grep.c
@@ -435,12 +435,11 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)

 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
-       int icase, ascii_only;
+       int ascii_only;
        int err;

        p->word_regexp = opt->word_regexp;
        p->ignore_case = opt->ignore_case;
-       icase          = opt->regflags & REG_ICASE || p->ignore_case;
        ascii_only     = !has_non_ascii(p->pattern);

        /*
@@ -456,12 +455,12 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
         * want to use kws.
         */
        if (opt->fixed || is_fixed(p->pattern, p->patternlen))
-               p->fixed = !icase || ascii_only;
+               p->fixed = !p->ignore_case || ascii_only;
        else
                p->fixed = 0;

        if (p->fixed) {
-               p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
+               p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
                kwsincr(p->kws, p->pattern, p->patternlen);
                kwsprep(p->kws);
                return;
@@ -480,6 +479,8 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
                return;
        }

+       if (p->ignore_case)
+               opt->regflags |= REG_ICASE;
        err = regcomp(&p->regexp, p->pattern, opt->regflags);
        if (err) {
                char errbuf[1024];
diff --git a/revision.c b/revision.c
index 4883cdd2d0..30c23a1098 100644
--- a/revision.c
+++ b/revision.c
@@ -1992,7 +1992,6 @@ static int handle_revision_opt(struct rev_info
*revs, int argc, const char **arg
                revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
        } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
                revs->grep_filter.ignore_case = 1;
-               revs->grep_filter.regflags |= REG_ICASE;
                DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
        } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
                revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED;

But an even better solution is to get rid of passing the regflags
field in grep_opt entirely, this conflicts with some of my later
patches:

diff --git a/builtin/grep.c b/builtin/grep.c
index 3ffb5b4e81..be28c37265 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const
char *prefix)

        if (!opt.pattern_list)
                die(_("no pattern given."));
-       if (!opt.fixed && opt.ignore_case)
-               opt.regflags |= REG_ICASE;

        compile_grep_patterns(&opt);
diff --git a/grep.c b/grep.c
index 47cee45067..1bde7037ba 100644
--- a/grep.c
+++ b/grep.c
@@ -34,7 +34,6 @@ void init_grep_defaults(void)
        memset(opt, 0, sizeof(*opt));
        opt->relative = 1;
        opt->pathname = 1;
-       opt->regflags = REG_NEWLINE;
        opt->max_depth = -1;
        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
        opt->extended_regexp_option = 0;
@@ -156,7 +155,6 @@ void grep_init(struct grep_opt *opt, const char *prefix)
        opt->linenum = def->linenum;
        opt->max_depth = def->max_depth;
        opt->pathname = def->pathname;
-       opt->regflags = def->regflags;
        opt->relative = def->relative;
        opt->output = def->output;

@@ -179,25 +177,25 @@ static void grep_set_pattern_type_option(enum
grep_pattern_type pattern_type, st
        case GREP_PATTERN_TYPE_BRE:
                opt->fixed = 0;
                opt->pcre = 0;
-               opt->regflags &= ~REG_EXTENDED;
+               opt->extended = 0;
                break;
         case GREP_PATTERN_TYPE_ERE:
                opt->fixed = 0;
                opt->pcre = 0;
-               opt->regflags |= REG_EXTENDED;
+               opt->extended = 1;
                break;

        case GREP_PATTERN_TYPE_FIXED:
                opt->fixed = 1;
                opt->pcre = 0;
-               opt->regflags &= ~REG_EXTENDED;
+               opt->extended = 0;
                break;

        case GREP_PATTERN_TYPE_PCRE:
                opt->fixed = 0;
                opt->pcre = 1;
-               opt->regflags &= ~REG_EXTENDED;
+               opt->extended = 0;
                break;
        }
 }
@@ -415,10 +413,9 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)
 {
        struct strbuf sb = STRBUF_INIT;
        int err;
-       int regflags;
+       int regflags = REG_NEWLINE;

        basic_regex_quote_buf(&sb, p->pattern);
-       regflags = opt->regflags & ~REG_EXTENDED;
        if (opt->ignore_case)
                regflags |= REG_ICASE;
        err = regcomp(&p->regexp, sb.buf, regflags);
@@ -435,12 +432,12 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)

 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
-       int icase, ascii_only;
+       int ascii_only;
        int err;
+       int regflags = REG_NEWLINE;

        p->word_regexp = opt->word_regexp;
        p->ignore_case = opt->ignore_case;
-       icase          = opt->regflags & REG_ICASE || p->ignore_case;
        ascii_only     = !has_non_ascii(p->pattern);

        /*
@@ -456,12 +453,12 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
         * want to use kws.
         */
        if (opt->fixed || is_fixed(p->pattern, p->patternlen))
-               p->fixed = !icase || ascii_only;
+               p->fixed = !p->ignore_case || ascii_only;
        else
                p->fixed = 0;

        if (p->fixed) {
-               p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
+               p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
                kwsincr(p->kws, p->pattern, p->patternlen);
                kwsprep(p->kws);
                return;
@@ -480,7 +477,11 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
                return;
        }

-       err = regcomp(&p->regexp, p->pattern, opt->regflags);
+       if (p->ignore_case)
+               regflags |= REG_ICASE;
+       if (opt->extended)
+               regflags |= REG_EXTENDED;
+       err = regcomp(&p->regexp, p->pattern, regflags);
        if (err) {
                char errbuf[1024];
                regerror(err, &p->regexp, errbuf, 1024);
diff --git a/grep.h b/grep.h
index 267534ca24..d9d603deb1 100644
--- a/grep.h
+++ b/grep.h
@@ -129,7 +129,6 @@ struct grep_opt {
        char color_match_selected[COLOR_MAXLEN];
        char color_selected[COLOR_MAXLEN];
        char color_sep[COLOR_MAXLEN];
-       int regflags;
        unsigned pre_context;
        unsigned post_context;
        unsigned last_shown;
diff --git a/revision.c b/revision.c
index 4883cdd2d0..67240d38af 100644
--- a/revision.c
+++ b/revision.c
@@ -1362,7 +1362,6 @@ void init_revisions(struct rev_info *revs, const
char *prefix)
        init_grep_defaults();
        grep_init(&revs->grep_filter, prefix);
        revs->grep_filter.status_only = 1;
-       revs->grep_filter.regflags = REG_NEWLINE;

        diff_setup(&revs->diffopt);
        if (prefix && !revs->diffopt.prefix) {
@@ -1992,7 +1991,6 @@ static int handle_revision_opt(struct rev_info
*revs, int argc, const char **arg
                revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
        } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
                revs->grep_filter.ignore_case = 1;
-               revs->grep_filter.regflags |= REG_ICASE;
                DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
        } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
                revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED;

But as all this code cleanup isn't needed for fixing this bug, and I'd
really like to get this series merged into next/master ASAP so I can
start submitting the grep/pcre patches that are actually interesting,
let's leave this orthogonal code cleanup for now.




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux