[PATCH V2 4/5] git-grep: Learn PCRE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.

PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.

Signed-off-by: MichaÅ Kiedrowicz <michal.kiedrowicz@xxxxxxxxx>
---
 Documentation/git-grep.txt             |    6 +++
 Makefile                               |   16 +++++++
 builtin/grep.c                         |    2 +
 contrib/completion/git-completion.bash |    1 +
 grep.c                                 |   77 +++++++++++++++++++++++++++++++-
 grep.h                                 |    9 ++++
 6 files changed, 110 insertions(+), 1 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4a58378..e150c77 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -12,6 +12,7 @@ SYNOPSIS
 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
 	   [-v | --invert-match] [-h|-H] [--full-name]
 	   [-E | --extended-regexp] [-G | --basic-regexp]
+	   [-P | --perl-regexp]
 	   [-F | --fixed-strings] [-n | --line-number]
 	   [-l | --files-with-matches] [-L | --files-without-match]
 	   [(-O | --open-files-in-pager) [<pager>]]
@@ -97,6 +98,11 @@ OPTIONS
 	Use POSIX extended/basic regexp for patterns.  Default
 	is to use basic regexp.
 
+-P::
+--perl-regexp::
+	Use Perl-compatible regexp for patterns. Requires libpcre to be
+	compiled in.
+
 -F::
 --fixed-strings::
 	Use fixed strings for patterns (don't interpret pattern
diff --git a/Makefile b/Makefile
index 3a1fe20..98841dc 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
 # Define NO_OPENSSL environment variable if you do not have OpenSSL.
 # This also implies BLK_SHA1.
 #
+# Define NO_LIBPCRE if you do not have libpcre installed.  git-grep cannot use
+# Perl-compatible regexes.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
 # Define NO_CURL if you do not have libcurl installed.  git-http-pull and
 # git-http-push are not built, and you cannot use http:// and https://
 # transports.
@@ -1251,6 +1257,16 @@ ifdef NO_LIBGEN_H
 	COMPAT_OBJS += compat/basename.o
 endif
 
+ifdef NO_LIBPCRE
+	BASIC_CFLAGS += -DNO_LIBPCRE
+else
+	ifdef LIBPCREDIR
+		BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+		EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+	endif
+	EXTLIBS += -lpcre
+endif
+
 ifdef NO_CURL
 	BASIC_CFLAGS += -DNO_CURL
 	REMOTE_CURL_PRIMARY =
diff --git a/builtin/grep.c b/builtin/grep.c
index 10a1f65..6831975 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 			REG_EXTENDED),
 		OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
 			"interpret patterns as fixed strings"),
+		OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
+				"use Perl-compatible regular expressions"),
 		OPT_GROUP(""),
 		OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
 		OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 4b2654d..95790a1 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1487,6 +1487,7 @@ _git_grep ()
 			--text --ignore-case --word-regexp --invert-match
 			--full-name --line-number
 			--extended-regexp --basic-regexp --fixed-strings
+			--perl-regexp
 			--files-with-matches --name-only
 			--files-without-match
 			--max-depth
diff --git a/grep.c b/grep.c
index d67baf9..561b791 100644
--- a/grep.c
+++ b/grep.c
@@ -3,6 +3,71 @@
 #include "userdiff.h"
 #include "xdiff-interface.h"
 
+#ifdef NO_LIBPCRE
+static void compile_pcre_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+	die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+static int pcrematch(struct grep_pat *p, char *line, char *eol,
+		regmatch_t *match, int eflags)
+{
+	die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+	die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+#else /* !NO_LIBPCRE */
+static void compile_pcre_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+	const char *error;
+	int erroffset;
+	int options = 0;
+
+	if (opt->ignore_case)
+		options |= PCRE_CASELESS;
+
+	p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
+			NULL);
+	if (!p->pcre_regexp)
+		die("'%s': %s", p->pattern, error);
+
+	p->extra = pcre_study(p->pcre_regexp, 0, &error);
+	if (!p->extra && error)
+		die("%s", error);
+}
+
+static int pcrematch(struct grep_pat *p, char *line, char *eol,
+		regmatch_t *match, int eflags)
+{
+	int ovector[30], ret, flags = 0;
+
+	if (eflags & REG_NOTBOL)
+		flags |= PCRE_NOTBOL;
+
+	ret = pcre_exec(p->pcre_regexp, p->extra, line, eol - line, 0, flags,
+			ovector, ARRAY_SIZE(ovector));
+	if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+		die("pcre_exec failed with error code %d", ret);
+	if (ret > 0) {
+		ret = 0;
+		match->rm_so = ovector[0];
+		match->rm_eo = ovector[1];
+	}
+
+	return ret;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+	pcre_free(p->pcre_regexp);
+	pcre_free(p->extra);
+}
+#endif /* !NO_LIBPCRE */
+
 void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
 {
 	struct grep_pat *p = xcalloc(1, sizeof(*p));
@@ -70,6 +135,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 	if (p->fixed)
 		return;
 
+	if (opt->pcre) {
+		compile_pcre_regexp(p, opt);
+		return;
+	}
+
 	err = regcomp(&p->regexp, p->pattern, opt->regflags);
 	if (err) {
 		char errbuf[1024];
@@ -320,7 +390,10 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			regfree(&p->regexp);
+			if (p->pcre_regexp)
+				free_pcre_regexp(p);
+			else
+				regfree(&p->regexp);
 			break;
 		default:
 			break;
@@ -419,6 +492,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
 
 	if (p->fixed)
 		hit = !fixmatch(p, line, eol, match);
+	else if (p->pcre_regexp)
+		hit = !pcrematch(p, line, eol, match, eflags);
 	else
 		hit = !regmatch(&p->regexp, line, eol, match, eflags);
 
diff --git a/grep.h b/grep.h
index 06621fe..68aa47a 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,12 @@
 #ifndef GREP_H
 #define GREP_H
 #include "color.h"
+#ifndef NO_LIBPCRE
+#include <pcre.h>
+#else
+typedef int pcre;
+typedef int pcre_extra;
+#endif /* NO_LIBPCRE */
 
 enum grep_pat_token {
 	GREP_PATTERN,
@@ -33,6 +39,8 @@ struct grep_pat {
 	size_t patternlen;
 	enum grep_header_field field;
 	regex_t regexp;
+	pcre *pcre_regexp;
+	pcre_extra *extra;
 	unsigned fixed:1;
 	unsigned ignore_case:1;
 	unsigned word_regexp:1;
@@ -83,6 +91,7 @@ struct grep_opt {
 #define GREP_BINARY_TEXT	2
 	int binary;
 	int extended;
+	int pcre;
 	int relative;
 	int pathname;
 	int null_following_name;
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]