[PATCH v2 46/46] parse_pathspec: accept :(icase)path syntax

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 Documentation/git.txt              |  8 ++++
 Documentation/glossary-content.txt |  3 ++
 builtin/add.c                      |  6 ++-
 builtin/ls-tree.c                  |  2 +-
 cache.h                            |  1 +
 dir.c                              | 74 ++++++++++++++++++++++++-----
 git.c                              |  4 ++
 pathspec.c                         |  9 +++-
 pathspec.h                         | 22 ++++++++-
 t/t6131-pathspec-icase.sh (new +x) | 97 ++++++++++++++++++++++++++++++++++++++
 tree-walk.c                        | 59 ++++++++++++++++++-----
 11 files changed, 257 insertions(+), 28 deletions(-)
 create mode 100755 t/t6131-pathspec-icase.sh

diff --git a/Documentation/git.txt b/Documentation/git.txt
index 3571a1b..546eea4 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -466,6 +466,10 @@ help ...`.
 	globbing on individual pathspecs can be done using pathspec
 	magic ":(glob)"
 
+--icase-pathspecs:
+	Add "icase" magic to all pathspec. This is equivalent to setting
+	the `GIT_ICASE_PATHSPECS` environment variable to `1`.
+
 GIT COMMANDS
 ------------
 
@@ -879,6 +883,10 @@ GIT_NOGLOB_PATHSPECS::
 	Setting this variable to `1` will cause Git to treat all
 	pathspecs as literal (aka "literal" magic).
 
+GIT_ICASE_PATHSPECS::
+	Setting this variable to `1` will cause Git to treat all
+	pathspeccs as case-insensitive.
+
 
 Discussion[[Discussion]]
 ------------------------
diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
index a3d9029..13a64d3 100644
--- a/Documentation/glossary-content.txt
+++ b/Documentation/glossary-content.txt
@@ -334,6 +334,9 @@ literal;;
 	Wildcards in the pattern such as `*` or `?` are treated
 	as literal characters.
 
+icase;;
+	Case insensitive match.
+
 glob;;
 	Git treats the pattern as a shell glob suitable for
 	consumption by fnmatch(3) with the FNM_PATHNAME flag:
diff --git a/builtin/add.c b/builtin/add.c
index 1dab246..9d52fc7 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -544,12 +544,14 @@ int cmd_add(int argc, const char **argv, const char *prefix)
 		GUARD_PATHSPEC(&pathspec,
 			       PATHSPEC_FROMTOP |
 			       PATHSPEC_LITERAL |
-			       PATHSPEC_GLOB);
+			       PATHSPEC_GLOB |
+			       PATHSPEC_ICASE);
 
 		for (i = 0; i < pathspec.nr; i++) {
 			const char *path = pathspec.items[i].match;
 			if (!seen[i] &&
-			    ((pathspec.items[i].magic & PATHSPEC_GLOB) ||
+			    ((pathspec.items[i].magic &
+			      (PATHSPEC_GLOB | PATHSPEC_ICASE)) ||
 			     !file_exists(path))) {
 				if (ignore_missing) {
 					int dtype = DT_UNKNOWN;
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 7882352..f6d8215 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -173,7 +173,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 	 * cannot be lifted until it is converted to use
 	 * match_pathspec_depth() or tree_entry_interesting()
 	 */
-	parse_pathspec(&pathspec, PATHSPEC_GLOB,
+	parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE,
 		       PATHSPEC_PREFER_CWD,
 		       prefix, argv + 1);
 	for (i = 0; i < pathspec.nr; i++)
diff --git a/cache.h b/cache.h
index dc4d2ee..3cff825 100644
--- a/cache.h
+++ b/cache.h
@@ -369,6 +369,7 @@ static inline enum object_type object_type(unsigned int mode)
 #define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS"
 #define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS"
 #define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS"
+#define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS"
 
 /*
  * This environment variable is expected to contain a boolean indicating
diff --git a/dir.c b/dir.c
index 076bd46..8543736 100644
--- a/dir.c
+++ b/dir.c
@@ -57,7 +57,7 @@ inline int git_fnmatch(const struct pathspec_item *item,
 		       int prefix)
 {
 	if (prefix > 0) {
-		if (strncmp(pattern, string, prefix))
+		if (ps_strncmp(item, pattern, string, prefix))
 			return FNM_NOMATCH;
 		pattern += prefix;
 		string += prefix;
@@ -66,14 +66,18 @@ inline int git_fnmatch(const struct pathspec_item *item,
 		int pattern_len = strlen(++pattern);
 		int string_len = strlen(string);
 		return string_len < pattern_len ||
-		       strcmp(pattern,
-			      string + string_len - pattern_len);
+			ps_strcmp(item, pattern,
+				  string + string_len - pattern_len);
 	}
 	if (item->magic & PATHSPEC_GLOB)
-		return wildmatch(pattern, string, WM_PATHNAME, NULL);
+		return wildmatch(pattern, string,
+				 WM_PATHNAME |
+				 (item->magic & PATHSPEC_ICASE ? WM_CASEFOLD : 0),
+				 NULL);
 	else
 		/* wildmatch has not learned no FNM_PATHNAME mode yet */
-		return fnmatch(pattern, string, 0);
+		return fnmatch(pattern, string,
+			       item->magic & PATHSPEC_ICASE ? FNM_CASEFOLD : 0);
 }
 
 static int fnmatch_icase_mem(const char *pattern, int patternlen,
@@ -110,16 +114,27 @@ static size_t common_prefix_len(const struct pathspec *pathspec)
 	int n;
 	size_t max = 0;
 
+	/*
+	 * ":(icase)path" is treated as a pathspec full of
+	 * wildcard. In other words, only prefix is considered common
+	 * prefix. If the pathspec is abc/foo abc/bar, running in
+	 * subdir xyz, the common prefix is still xyz, not xuz/abc as
+	 * in non-:(icase).
+	 */
 	GUARD_PATHSPEC(pathspec,
 		       PATHSPEC_FROMTOP |
 		       PATHSPEC_MAXDEPTH |
 		       PATHSPEC_LITERAL |
-		       PATHSPEC_GLOB);
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE);
 
 	for (n = 0; n < pathspec->nr; n++) {
-		size_t i = 0, len = 0;
-		while (i < pathspec->items[n].nowildcard_len &&
-		       (n == 0 || i < max)) {
+		size_t i = 0, len = 0, item_len;
+		if (pathspec->items[n].magic & PATHSPEC_ICASE)
+			item_len = pathspec->items[n].prefix;
+		else
+			item_len = pathspec->items[n].nowildcard_len;
+		while (i < item_len && (n == 0 || i < max)) {
 			char c = pathspec->items[n].match[i];
 			if (c != pathspec->items[0].match[i])
 				break;
@@ -196,11 +211,44 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 	const char *match = item->match + prefix;
 	int matchlen = item->len - prefix;
 
+	/*
+	 * The normal call pattern is:
+	 * 1. prefix = common_prefix_len(ps);
+	 * 2. prune something, or fill_directory
+	 * 3. match_pathspec_depth()
+	 *
+	 * 'prefix' at #1 may be shorter than the command's prefix and
+	 * it's ok for #2 to match extra files. Those extras will be
+	 * trimmed at #3.
+	 *
+	 * Suppose the pathspec is 'foo' and '../bar' running from
+	 * subdir 'xyz'. The common prefix at #1 will be empty, thanks
+	 * to "../". We may have xyz/foo _and_ XYZ/foo after #2. The
+	 * user does not want XYZ/foo, only the "foo" part should be
+	 * case-insensitive. We need to filter out XYZ/foo here. In
+	 * other words, we do not trust the caller on comparing the
+	 * prefix part when :(icase) is involved. We do exact
+	 * comparison ourselves.
+	 *
+	 * Normally the caller (common_prefix_len() in fact) does
+	 * _exact_ matching on name[-prefix+1..-1] and we do not need
+	 * to check that part. Be defensive and check it anyway, in
+	 * case common_prefix_len is changed, or a new caller is
+	 * introduced that does not use common_prefix_len.
+	 *
+	 * If the penalty turns out too high when prefix is really
+	 * long, maybe change it to
+	 * strncmp(match, name, item->prefix - prefix)
+	 */
+	if (item->prefix && (item->magic & PATHSPEC_ICASE) &&
+	    strncmp(item->match, name - prefix, item->prefix))
+		return 0;
+
 	/* If the match was just the prefix, we matched */
 	if (!*match)
 		return MATCHED_RECURSIVELY;
 
-	if (matchlen <= namelen && !strncmp(match, name, matchlen)) {
+	if (matchlen <= namelen && !ps_strncmp(item, match, name, matchlen)) {
 		if (matchlen == namelen)
 			return MATCHED_EXACTLY;
 
@@ -241,7 +289,8 @@ int match_pathspec_depth(const struct pathspec *ps,
 		       PATHSPEC_FROMTOP |
 		       PATHSPEC_MAXDEPTH |
 		       PATHSPEC_LITERAL |
-		       PATHSPEC_GLOB);
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE);
 
 	if (!ps->nr) {
 		if (!ps->recursive ||
@@ -1301,7 +1350,8 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 			       PATHSPEC_FROMTOP |
 			       PATHSPEC_MAXDEPTH |
 			       PATHSPEC_LITERAL |
-			       PATHSPEC_GLOB);
+			       PATHSPEC_GLOB |
+			       PATHSPEC_ICASE);
 
 	if (has_symlink_leading_path(path, len))
 		return dir->nr;
diff --git a/git.c b/git.c
index 2509675..cebf882 100644
--- a/git.c
+++ b/git.c
@@ -155,6 +155,10 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 			setenv(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, "1", 1);
 			if (envchanged)
 				*envchanged = 1;
+		} else if (!strcmp(cmd, "--icase-pathspecs")) {
+			setenv(GIT_ICASE_PATHSPECS_ENVIRONMENT, "1", 1);
+			if (envchanged)
+				*envchanged = 1;
 		} else if (!strcmp(cmd, "--shallow-file")) {
 			(*argv)++;
 			(*argc)--;
diff --git a/pathspec.c b/pathspec.c
index c1e6917..d9f4143 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -57,7 +57,6 @@ char *find_pathspecs_matching_against_index(const struct pathspec *pathspec)
  *
  * Possible future magic semantics include stuff like:
  *
- *	{ PATHSPEC_ICASE, '\0', "icase" },
  *	{ PATHSPEC_RECURSIVE, '*', "recursive" },
  *	{ PATHSPEC_REGEXP, '\0', "regexp" },
  *
@@ -71,6 +70,7 @@ static struct pathspec_magic {
 	{ PATHSPEC_FROMTOP, '/', "top" },
 	{ PATHSPEC_LITERAL,   0, "literal" },
 	{ PATHSPEC_GLOB,   '\0', "glob" },
+	{ PATHSPEC_ICASE,  '\0', "icase" },
 };
 
 /*
@@ -95,6 +95,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	static int literal_global = -1;
 	static int glob_global = -1;
 	static int noglob_global = -1;
+	static int icase_global = -1;
 	unsigned magic = 0, short_magic = 0, global_magic = 0;
 	const char *copyfrom = elt, *long_magic_end = NULL;
 	char *match;
@@ -116,6 +117,12 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	if (glob_global && noglob_global)
 		die(_("global 'glob' and 'noglob' pathspec settings are incompatible"));
 
+
+	if (icase_global < 0)
+		icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
+	if (icase_global)
+		global_magic |= PATHSPEC_ICASE;
+
 	if ((global_magic & PATHSPEC_LITERAL) &&
 	    (global_magic & ~PATHSPEC_LITERAL))
 		die(_("global 'literal' pathspec setting is incompatible "
diff --git a/pathspec.h b/pathspec.h
index cdf2fa3..04b632f 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -6,11 +6,13 @@
 #define PATHSPEC_MAXDEPTH	(1<<1)
 #define PATHSPEC_LITERAL	(1<<2)
 #define PATHSPEC_GLOB		(1<<3)
+#define PATHSPEC_ICASE		(1<<4)
 #define PATHSPEC_ALL_MAGIC	  \
 	(PATHSPEC_FROMTOP	| \
 	 PATHSPEC_MAXDEPTH	| \
 	 PATHSPEC_LITERAL	| \
-	 PATHSPEC_GLOB)
+	 PATHSPEC_GLOB		| \
+	 PATHSPEC_ICASE)
 
 #define PATHSPEC_ONESTAR 1	/* the pathspec pattern sastisfies GFNM_ONESTAR */
 
@@ -65,6 +67,24 @@ extern void parse_pathspec(struct pathspec *pathspec,
 extern void copy_pathspec(struct pathspec *dst, const struct pathspec *src);
 extern void free_pathspec(struct pathspec *);
 
+static inline int ps_strncmp(const struct pathspec_item *item,
+			     const char *s1, const char *s2, size_t n)
+{
+	if (item->magic & PATHSPEC_ICASE)
+		return strncasecmp(s1, s2, n);
+	else
+		return strncmp(s1, s2, n);
+}
+
+static inline int ps_strcmp(const struct pathspec_item *item,
+			    const char *s1, const char *s2)
+{
+	if (item->magic & PATHSPEC_ICASE)
+		return strcasecmp(s1, s2);
+	else
+		return strcmp(s1, s2);
+}
+
 extern char *find_pathspecs_matching_against_index(const struct pathspec *pathspec);
 extern void add_pathspec_matches_against_index(const struct pathspec *pathspec, char *seen);
 extern const char *check_path_for_gitlink(const char *path);
diff --git a/t/t6131-pathspec-icase.sh b/t/t6131-pathspec-icase.sh
new file mode 100755
index 0000000..3215eef
--- /dev/null
+++ b/t/t6131-pathspec-icase.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+
+test_description='test case insensitive pathspec limiting'
+. ./test-lib.sh
+
+test_expect_success 'create commits with glob characters' '
+	test_commit bar bar &&
+	test_commit bAr bAr &&
+	test_commit BAR BAR &&
+	mkdir foo &&
+	test_commit foo/bar foo/bar &&
+	test_commit foo/bAr foo/bAr &&
+	test_commit foo/BAR foo/BAR &&
+	mkdir fOo &&
+	test_commit fOo/bar fOo/bar &&
+	test_commit fOo/bAr fOo/bAr &&
+	test_commit fOo/BAR fOo/BAR &&
+	mkdir FOO &&
+	test_commit FOO/bar FOO/bar &&
+	test_commit FOO/bAr FOO/bAr &&
+	test_commit FOO/BAR FOO/BAR
+'
+
+test_expect_success 'tree_entry_interesting matches bar' '
+	echo bar >expect &&
+	git log --format=%s -- "bar" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar' '
+	cat <<-EOF >expect &&
+	BAR
+	bAr
+	bar
+	EOF
+	git log --format=%s -- ":(icase)bar" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar with prefix' '
+	cat <<-EOF >expect &&
+	fOo/BAR
+	fOo/bAr
+	fOo/bar
+	EOF
+	( cd fOo && git log --format=%s -- ":(icase)bar" ) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'tree_entry_interesting matches :(icase)bar with empty prefix' '
+	cat <<-EOF >expect &&
+	FOO/BAR
+	FOO/bAr
+	FOO/bar
+	fOo/BAR
+	fOo/bAr
+	fOo/bar
+	foo/BAR
+	foo/bAr
+	foo/bar
+	EOF
+	( cd fOo && git log --format=%s -- ":(icase)../foo/bar" ) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar' '
+	cat <<-EOF >expect &&
+	BAR
+	bAr
+	bar
+	EOF
+	git ls-files ":(icase)bar" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar with prefix' '
+	cat <<-EOF >expect &&
+	fOo/BAR
+	fOo/bAr
+	fOo/bar
+	EOF
+	( cd fOo && git ls-files --full-name ":(icase)bar" ) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'match_pathspec_depth matches :(icase)bar with empty prefix' '
+	cat <<-EOF >expect &&
+	bar
+	fOo/BAR
+	fOo/bAr
+	fOo/bar
+	EOF
+	( cd fOo && git ls-files --full-name ":(icase)bar" ../bar ) >actual &&
+	test_cmp expect actual
+'
+
+test_done
diff --git a/tree-walk.c b/tree-walk.c
index a44f528..c366852 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -489,13 +489,25 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
 	return retval;
 }
 
-static int match_entry(const struct name_entry *entry, int pathlen,
+static int match_entry(const struct pathspec_item *item,
+		       const struct name_entry *entry, int pathlen,
 		       const char *match, int matchlen,
 		       enum interesting *never_interesting)
 {
 	int m = -1; /* signals that we haven't called strncmp() */
 
-	if (*never_interesting != entry_not_interesting) {
+	if (item->magic & PATHSPEC_ICASE)
+		/*
+		 * "Never interesting" trick requires exact
+		 * matching. We could do something clever with inexact
+		 * matching, but it's trickier (and not to forget that
+		 * strcasecmp is locale-dependent, at least in
+		 * glibc). Just disable it for now. It can't be worse
+		 * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
+		 * pattern.
+		 */
+		*never_interesting = entry_not_interesting;
+	else if (*never_interesting != entry_not_interesting) {
 		/*
 		 * We have not seen any match that sorts later
 		 * than the current path.
@@ -541,7 +553,7 @@ static int match_entry(const struct name_entry *entry, int pathlen,
 		 * we cheated and did not do strncmp(), so we do
 		 * that here.
 		 */
-		m = strncmp(match, entry->path, pathlen);
+		m = ps_strncmp(item, match, entry->path, pathlen);
 
 	/*
 	 * If common part matched earlier then it is a hit,
@@ -549,15 +561,39 @@ static int match_entry(const struct name_entry *entry, int pathlen,
 	 * leading directory and is shorter than match.
 	 */
 	if (!m)
+		/*
+		 * match_entry does not check if the prefix part is
+		 * matched case-sensitively. If the entry is a
+		 * directory and part of prefix, it'll be rematched
+		 * eventually by basecmp with special treatment for
+		 * the prefix.
+		 */
 		return 1;
 
 	return 0;
 }
 
-static int match_dir_prefix(const char *base,
+/* :(icase)-aware string compare */
+static int basecmp(const struct pathspec_item *item,
+		   const char *base, const char *match, int len)
+{
+	if (item->magic & PATHSPEC_ICASE) {
+		int ret, n = len > item->prefix ? item->prefix : len;
+		ret = strncmp(base, match, n);
+		if (ret)
+			return ret;
+		base += n;
+		match += n;
+		len -= n;
+	}
+	return ps_strncmp(item, base, match, len);
+}
+
+static int match_dir_prefix(const struct pathspec_item *item,
+			    const char *base,
 			    const char *match, int matchlen)
 {
-	if (strncmp(base, match, matchlen))
+	if (basecmp(item, base, match, matchlen))
 		return 0;
 
 	/*
@@ -594,7 +630,7 @@ static int match_wildcard_base(const struct pathspec_item *item,
 		 */
 		if (baselen >= matchlen) {
 			*matched = matchlen;
-			return !strncmp(base, match, matchlen);
+			return !basecmp(item, base, match, matchlen);
 		}
 
 		dirlen = matchlen;
@@ -607,7 +643,7 @@ static int match_wildcard_base(const struct pathspec_item *item,
 		 * base ends with '/' so we are sure it really matches
 		 * directory
 		 */
-		if (strncmp(base, match, baselen))
+		if (basecmp(item, base, match, baselen))
 			return 0;
 		*matched = baselen;
 	} else
@@ -640,7 +676,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 		       PATHSPEC_FROMTOP |
 		       PATHSPEC_MAXDEPTH |
 		       PATHSPEC_LITERAL |
-		       PATHSPEC_GLOB);
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE);
 
 	if (!ps->nr) {
 		if (!ps->recursive ||
@@ -663,7 +700,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 
 		if (baselen >= matchlen) {
 			/* If it doesn't match, move along... */
-			if (!match_dir_prefix(base_str, match, matchlen))
+			if (!match_dir_prefix(item, base_str, match, matchlen))
 				goto match_wildcards;
 
 			if (!ps->recursive ||
@@ -679,8 +716,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 		}
 
 		/* Either there must be no base, or the base must match. */
-		if (baselen == 0 || !strncmp(base_str, match, baselen)) {
-			if (match_entry(entry, pathlen,
+		if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
+			if (match_entry(item, entry, pathlen,
 					match + baselen, matchlen - baselen,
 					&never_interesting))
 				return entry_interesting;
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]