[PATCH v2 6/6] exclude: filter patterns by directory level

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A non-basename pattern that does not contain /**/ can't match anything
outside the attached directory. Record its directory level and avoid
matching unless the pathname is also at the same directory level.

This optimization shines when there are a lot of non-basename patterns
are the root .gitignore and big/deep worktree. Due to the cascading
rule of .gitignore, patterns in the root .gitignore are checked for
_all_ entries in the worktree.

        before      after
user    0m0.424s    0m0.365s
user    0m0.427s    0m0.366s
user    0m0.432s    0m0.374s
user    0m0.435s    0m0.374s
user    0m0.435s    0m0.377s
user    0m0.437s    0m0.381s
user    0m0.439s    0m0.381s
user    0m0.440s    0m0.383s
user    0m0.450s    0m0.384s
user    0m0.454s    0m0.384s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 attr.c |  3 ++-
 dir.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++------------------
 dir.h  |  9 ++++++++-
 3 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/attr.c b/attr.c
index 1818ba5..7764ddd 100644
--- a/attr.c
+++ b/attr.c
@@ -254,7 +254,8 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		parse_exclude_pattern(&res->u.pat.pattern,
 				      &res->u.pat.patternlen,
 				      &res->u.pat.flags,
-				      &res->u.pat.nowildcardlen);
+				      &res->u.pat.nowildcardlen,
+				      NULL);
 		if (res->u.pat.flags & EXC_FLAG_MUSTBEDIR)
 			res->u.pat.patternlen++;
 		if (res->u.pat.flags & EXC_FLAG_NEGATIVE) {
diff --git a/dir.c b/dir.c
index 880b5e6..de7a6ba 100644
--- a/dir.c
+++ b/dir.c
@@ -360,10 +360,12 @@ static int no_wildcard(const char *string)
 void parse_exclude_pattern(const char **pattern,
 			   int *patternlen,
 			   int *flags,
-			   int *nowildcardlen)
+			   int *nowildcardlen,
+			   int *dirs_p)
 {
 	const char *p = *pattern;
 	size_t i, len;
+	int dirs;
 
 	*flags = 0;
 	if (*p == '!') {
@@ -375,12 +377,15 @@ void parse_exclude_pattern(const char **pattern,
 		len--;
 		*flags |= EXC_FLAG_MUSTBEDIR;
 	}
-	for (i = 0; i < len; i++) {
+	for (i = 0, dirs = 0; i < len; i++) {
 		if (p[i] == '/')
-			break;
+			dirs++;
 	}
-	if (i == len)
+	if (!dirs)
 		*flags |= EXC_FLAG_NODIR;
+	else if (*p == '/')
+		dirs--;
+
 	*nowildcardlen = simple_length(p);
 	/*
 	 * we should have excluded the trailing slash from 'p' too,
@@ -393,6 +398,8 @@ void parse_exclude_pattern(const char **pattern,
 		*flags |= EXC_FLAG_ENDSWITH;
 	*pattern = p;
 	*patternlen = len;
+	if (dirs_p)
+		*dirs_p = dirs;
 }
 
 void add_exclude(const char *string, const char *base,
@@ -402,8 +409,9 @@ void add_exclude(const char *string, const char *base,
 	int patternlen;
 	int flags;
 	int nowildcardlen;
+	int dirs;
 
-	parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
+	parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen, &dirs);
 	if (flags & EXC_FLAG_MUSTBEDIR) {
 		char *s;
 		x = xmalloc(sizeof(*x) + patternlen + 1);
@@ -415,11 +423,26 @@ void add_exclude(const char *string, const char *base,
 		x = xmalloc(sizeof(*x));
 		x->pattern = string;
 	}
+	/*
+	 * TODO: nowildcardlen < patternlen is a stricter than
+	 * necessary mainly to exclude "**" that breaks directory
+	 * boundary. Patterns like "/foo-*" should be fine.
+	 */
+	if ((flags & EXC_FLAG_NODIR) || nowildcardlen < patternlen)
+		dirs = -1;
+	else {
+		int i;
+		for (i = 0; i < baselen; i++) {
+			if (base[i] == '/')
+				dirs++;
+		}
+	}
 	x->patternlen = patternlen;
 	x->nowildcardlen = nowildcardlen;
 	x->base = base;
 	x->baselen = baselen;
 	x->flags = flags;
+	x->dirs = dirs;
 	x->srcpos = srcpos;
 	ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
 	el->excludes[el->nr++] = x;
@@ -701,7 +724,7 @@ int match_pathname(const char *pathname, int pathlen,
  * matched, or NULL for undecided.
  */
 static struct exclude *last_exclude_matching_from_list(const char *pathname,
-						       int pathlen,
+						       int pathlen, int dirs,
 						       const char *basename,
 						       int *dtype,
 						       struct exclude_list *el)
@@ -732,6 +755,9 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
 			continue;
 		}
 
+		if (dirs >= 0 && x->dirs >= 0 && x->dirs != dirs)
+			continue;
+
 		assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
 		if (match_pathname(pathname, pathlen,
 				   x->base, x->baselen ? x->baselen - 1 : 0,
@@ -750,7 +776,8 @@ int is_excluded_from_list(const char *pathname,
 			  struct exclude_list *el)
 {
 	struct exclude *exclude;
-	exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
+	exclude = last_exclude_matching_from_list(pathname, pathlen, -1,
+						  basename, dtype, el);
 	if (exclude)
 		return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 	return -1; /* undecided */
@@ -765,6 +792,7 @@ int is_excluded_from_list(const char *pathname,
 static struct exclude *last_exclude_matching(struct dir_struct *dir,
 					     const char *pathname,
 					     int pathlen,
+					     int dirs,
 					     int *dtype_p)
 {
 	int i, j;
@@ -779,8 +807,8 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir,
 		group = &dir->exclude_list_group[i];
 		for (j = group->nr - 1; j >= 0; j--) {
 			exclude = last_exclude_matching_from_list(
-				pathname, pathlen, basename, dtype_p,
-				&group->el[j]);
+				pathname, pathlen, dir->dir_level,
+				basename, dtype_p, &group->el[j]);
 			if (exclude)
 				return exclude;
 		}
@@ -794,11 +822,11 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir,
  * Returns 1 if true, otherwise 0.
  */
 static int is_excluded(struct dir_struct *dir,
-		       const char *pathname, int pathlen,
+		       const char *pathname, int pathlen, int dirs,
 		       int *dtype_p)
 {
 	struct exclude *exclude =
-		last_exclude_matching(dir, pathname, pathlen, dtype_p);
+		last_exclude_matching(dir, pathname, pathlen, dirs, dtype_p);
 	if (exclude)
 		return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 	return 0;
@@ -862,7 +890,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
 			int dt = DT_DIR;
 			exclude = last_exclude_matching(check->dir,
 							path->buf, path->len,
-							&dt);
+							-1, &dt);
 			if (exclude) {
 				check->exclude = exclude;
 				return exclude;
@@ -874,7 +902,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
 	/* An entry in the index; cannot be a directory with subentries */
 	strbuf_setlen(path, 0);
 
-	return last_exclude_matching(check->dir, name, namelen, dtype);
+	return last_exclude_matching(check->dir, name, namelen, -1, dtype);
 }
 
 /*
@@ -1248,11 +1276,11 @@ enum path_treatment {
 };
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
-					  struct strbuf *path,
+					  struct strbuf *path, int dirs,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
 {
-	int exclude = is_excluded(dir, path->buf, path->len, &dtype);
+	int exclude = is_excluded(dir, path->buf, path->len, dirs, &dtype);
 	if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 	    && exclude_matches_pathspec(path->buf, path->len, simplify))
 		dir_add_ignored(dir, path->buf, path->len);
@@ -1310,7 +1338,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_ignored;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, path, simplify, dtype, de);
+	return treat_one_path(dir, path, -1, simplify, dtype, de);
 }
 
 /*
@@ -1338,6 +1366,7 @@ static int read_directory_recursive(struct dir_struct *dir,
 	if (!fdir)
 		goto out;
 
+	dir->dir_level++;
 	while ((de = readdir(fdir)) != NULL) {
 		switch (treat_path(dir, de, &path, baselen, simplify)) {
 		case path_recurse:
@@ -1357,6 +1386,7 @@ static int read_directory_recursive(struct dir_struct *dir,
 	}
 	closedir(fdir);
  out:
+	dir->dir_level--;
 	strbuf_release(&path);
 
 	return contents;
@@ -1427,7 +1457,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, &sb, -1, simplify,
 				   DT_DIR, NULL) == path_ignored)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1447,8 +1477,10 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char
 		return dir->nr;
 
 	simplify = create_simplify(pathspec);
-	if (!len || treat_leading_path(dir, path, len, simplify))
+	if (!len || treat_leading_path(dir, path, len, simplify)) {
+		dir->dir_level = -1;
 		read_directory_recursive(dir, path, len, 0, simplify);
+	}
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
diff --git a/dir.h b/dir.h
index 560ade4..c434f1c 100644
--- a/dir.h
+++ b/dir.h
@@ -45,6 +45,7 @@ struct exclude_list {
 		const char *base;
 		int baselen;
 		int flags;
+		int dirs;
 
 		/*
 		 * Counting starts from 1 for line numbers in ignore files,
@@ -87,6 +88,8 @@ struct dir_struct {
 	/* Exclude info */
 	const char *exclude_per_dir;
 
+	int dir_level;
+
 	/*
 	 * We maintain three groups of exclude pattern lists:
 	 *
@@ -171,7 +174,11 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
 extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
 					  struct exclude_list *el, int check_index);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);
-extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
+extern void parse_exclude_pattern(const char **string,
+				  int *patternlen,
+				  int *flags,
+				  int *nowildcardlen,
+				  int *dirs);
 extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *el, int srcpos);
 extern void clear_exclude_list(struct exclude_list *el);
-- 
1.8.1.2.536.gf441e6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]