[PATCH 4/6] attr: more matching optimizations from .gitignore

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



.gitattributes and .gitignore share the same pattern syntax but has
separate matching implementation. Over the years, ignore's
implementation accumulates more optimizations while attr's stays the
same.

This patch adds those optimizations to .gitattributes. Basically it
tries to avoid fnmatch/wildmatch in favor of strncmp as much as
possible.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 attr.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 dir.c  |  4 ++--
 dir.h  |  2 ++
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/attr.c b/attr.c
index eb576ac..3fde9fa 100644
--- a/attr.c
+++ b/attr.c
@@ -116,6 +116,13 @@ struct attr_state {
 	const char *setto;
 };
 
+struct pattern {
+	const char *pattern;
+	int patternlen;
+	int nowildcardlen;
+	int flags;		/* EXC_FLAG_* */
+};
+
 /*
  * One rule, as from a .gitattributes file.
  *
@@ -131,7 +138,7 @@ struct attr_state {
  * listed as they appear in the file (macros unexpanded).
  */
 struct match_attr {
-	const char *pattern;
+	struct pattern pat;
 	struct git_attr *attr;
 	char is_macro;
 	unsigned num_attr;
@@ -243,7 +250,13 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		char *p = (char *)&(res->state[num_attr]);
 		memcpy(p, name, namelen);
 		p[namelen] = 0;
-		res->pattern = p;
+		res->pat.pattern = p;
+		res->pat.patternlen = strlen(p);
+		res->pat.nowildcardlen = simple_length(p);
+		if (!strchr(p, '/'))
+			res->pat.flags |= EXC_FLAG_NODIR;
+		if (*p == '*' && no_wildcard(p+1))
+			res->pat.flags |= EXC_FLAG_ENDSWITH;
 	}
 	res->is_macro = is_macro;
 	res->num_attr = num_attr;
@@ -645,26 +658,56 @@ static void prepare_attr_stack(const char *path)
 
 static int path_matches(const char *pathname, int pathlen,
 			const char *basename,
-			const char *pattern,
+			const struct pattern *pat,
 			const char *base, int baselen)
 {
-	if (!strchr(pattern, '/')) {
+	const char *pattern = pat->pattern;
+	int prefix = pat->nowildcardlen;
+	const char *name;
+	int namelen;
+
+	if (pat->flags & EXC_FLAG_NODIR) {
+		if (prefix == pat->patternlen &&
+		    !strcmp_icase(pattern, basename))
+			return 1;
+
+		if (pat->flags & EXC_FLAG_ENDSWITH &&
+		    pat->patternlen - 1 <= pathlen &&
+		    !strcmp_icase(pattern + 1, pathname +
+				  pathlen - pat->patternlen + 1))
+			return 1;
+
 		return (fnmatch_icase(pattern, basename, 0) == 0);
 	}
 	/*
 	 * match with FNM_PATHNAME; the pattern has base implicitly
 	 * in front of it.
 	 */
-	if (*pattern == '/')
+	if (*pattern == '/') {
 		pattern++;
+		prefix--;
+	}
+
+	/*
+	 * note: unlike excluded_from_list, baselen here does not
+	 * contain the trailing slash
+	 */
+
 	if (pathlen < baselen ||
 	    (baselen && pathname[baselen] != '/') ||
 	    strncmp(pathname, base, baselen))
 		return 0;
-	if (baselen != 0)
-		baselen++;
-	return (ignore_case && iwildmatch(pattern, pathname + baselen)) ||
-		(!ignore_case && wildmatch(pattern, pathname + baselen));
+
+	namelen = baselen ? pathlen - baselen - 1 : pathlen;
+	name = pathname + pathlen - namelen;
+
+	/* if the non-wildcard part is longer than the remaining
+	   pathname, surely it cannot match */
+	if (!namelen || prefix > namelen)
+		return 0;
+
+	return (ignore_case && iwildmatch(pattern, name)) ||
+		(!ignore_case && wildmatch(pattern, name));
 }
 
 static int macroexpand_one(int attr_nr, int rem);
@@ -702,7 +745,7 @@ static int fill(const char *path, int pathlen, const char *basename,
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename,
-				 a->pattern, base, stk->originlen))
+				 &a->pat, base, stk->originlen))
 			rem = fill_one("fill", a, rem);
 	}
 	return rem;
diff --git a/dir.c b/dir.c
index 92cda82..fd49336 100644
--- a/dir.c
+++ b/dir.c
@@ -292,7 +292,7 @@ int match_pathspec_depth(const struct pathspec *ps,
 /*
  * Return the length of the "simple" part of a path match limiter.
  */
-static int simple_length(const char *match)
+int simple_length(const char *match)
 {
 	int len = -1;
 
@@ -304,7 +304,7 @@ static int simple_length(const char *match)
 	}
 }
 
-static int no_wildcard(const char *string)
+int no_wildcard(const char *string)
 {
 	return string[simple_length(string)] == '\0';
 }
diff --git a/dir.h b/dir.h
index 893465a..7ea8678 100644
--- a/dir.h
+++ b/dir.h
@@ -101,6 +101,8 @@ extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *which);
 extern void free_excludes(struct exclude_list *el);
 extern int file_exists(const char *);
+extern int simple_length(const char *match);
+extern int no_wildcard(const char *string);
 
 extern int is_inside_dir(const char *dir);
 extern int dir_inside_of(const char *subdir, const char *dir);
-- 
1.7.12.1.405.gb727dc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]