A non-basename pattern that does not contain /**/ can't match anything outside the attached directory. Record its directory level and avoid matching unless the pathname is also at the same directory level. This optimization shines when there are a lot of non-basename patterns are the root .gitignore and big/deep worktree. Due to the cascading rule of .gitignore, patterns in the root .gitignore are checked for _all_ entries in the worktree. before after user 0m0.424s 0m0.365s user 0m0.427s 0m0.366s user 0m0.432s 0m0.374s user 0m0.435s 0m0.374s user 0m0.435s 0m0.377s user 0m0.437s 0m0.381s user 0m0.439s 0m0.381s user 0m0.440s 0m0.383s user 0m0.450s 0m0.384s user 0m0.454s 0m0.384s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- attr.c | 3 ++- dir.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++------------------ dir.h | 9 ++++++++- 3 files changed, 60 insertions(+), 20 deletions(-) diff --git a/attr.c b/attr.c index 1818ba5..7764ddd 100644 --- a/attr.c +++ b/attr.c @@ -254,7 +254,8 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, parse_exclude_pattern(&res->u.pat.pattern, &res->u.pat.patternlen, &res->u.pat.flags, - &res->u.pat.nowildcardlen); + &res->u.pat.nowildcardlen, + NULL); if (res->u.pat.flags & EXC_FLAG_MUSTBEDIR) res->u.pat.patternlen++; if (res->u.pat.flags & EXC_FLAG_NEGATIVE) { diff --git a/dir.c b/dir.c index 880b5e6..de7a6ba 100644 --- a/dir.c +++ b/dir.c @@ -360,10 +360,12 @@ static int no_wildcard(const char *string) void parse_exclude_pattern(const char **pattern, int *patternlen, int *flags, - int *nowildcardlen) + int *nowildcardlen, + int *dirs_p) { const char *p = *pattern; size_t i, len; + int dirs; *flags = 0; if (*p == '!') { @@ -375,12 +377,15 @@ void parse_exclude_pattern(const char **pattern, len--; *flags |= EXC_FLAG_MUSTBEDIR; } - for (i = 0; i < len; i++) { + for (i = 0, dirs = 0; i < len; i++) { if (p[i] == '/') - break; + dirs++; } - if (i == len) + if (!dirs) *flags |= EXC_FLAG_NODIR; + else if (*p == '/') + dirs--; + *nowildcardlen = simple_length(p); /* * we should have excluded the trailing slash from 'p' too, @@ -393,6 +398,8 @@ void parse_exclude_pattern(const char **pattern, *flags |= EXC_FLAG_ENDSWITH; *pattern = p; *patternlen = len; + if (dirs_p) + *dirs_p = dirs; } void add_exclude(const char *string, const char *base, @@ -402,8 +409,9 @@ void add_exclude(const char *string, const char *base, int patternlen; int flags; int nowildcardlen; + int dirs; - parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen); + parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen, &dirs); if (flags & EXC_FLAG_MUSTBEDIR) { char *s; x = xmalloc(sizeof(*x) + patternlen + 1); @@ -415,11 +423,26 @@ void add_exclude(const char *string, const char *base, x = xmalloc(sizeof(*x)); x->pattern = string; } + /* + * TODO: nowildcardlen < patternlen is a stricter than + * necessary mainly to exclude "**" that breaks directory + * boundary. Patterns like "/foo-*" should be fine. + */ + if ((flags & EXC_FLAG_NODIR) || nowildcardlen < patternlen) + dirs = -1; + else { + int i; + for (i = 0; i < baselen; i++) { + if (base[i] == '/') + dirs++; + } + } x->patternlen = patternlen; x->nowildcardlen = nowildcardlen; x->base = base; x->baselen = baselen; x->flags = flags; + x->dirs = dirs; x->srcpos = srcpos; ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); el->excludes[el->nr++] = x; @@ -701,7 +724,7 @@ int match_pathname(const char *pathname, int pathlen, * matched, or NULL for undecided. */ static struct exclude *last_exclude_matching_from_list(const char *pathname, - int pathlen, + int pathlen, int dirs, const char *basename, int *dtype, struct exclude_list *el) @@ -732,6 +755,9 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, continue; } + if (dirs >= 0 && x->dirs >= 0 && x->dirs != dirs) + continue; + assert(x->baselen == 0 || x->base[x->baselen - 1] == '/'); if (match_pathname(pathname, pathlen, x->base, x->baselen ? x->baselen - 1 : 0, @@ -750,7 +776,8 @@ int is_excluded_from_list(const char *pathname, struct exclude_list *el) { struct exclude *exclude; - exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el); + exclude = last_exclude_matching_from_list(pathname, pathlen, -1, + basename, dtype, el); if (exclude) return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return -1; /* undecided */ @@ -765,6 +792,7 @@ int is_excluded_from_list(const char *pathname, static struct exclude *last_exclude_matching(struct dir_struct *dir, const char *pathname, int pathlen, + int dirs, int *dtype_p) { int i, j; @@ -779,8 +807,8 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir, group = &dir->exclude_list_group[i]; for (j = group->nr - 1; j >= 0; j--) { exclude = last_exclude_matching_from_list( - pathname, pathlen, basename, dtype_p, - &group->el[j]); + pathname, pathlen, dir->dir_level, + basename, dtype_p, &group->el[j]); if (exclude) return exclude; } @@ -794,11 +822,11 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir, * Returns 1 if true, otherwise 0. */ static int is_excluded(struct dir_struct *dir, - const char *pathname, int pathlen, + const char *pathname, int pathlen, int dirs, int *dtype_p) { struct exclude *exclude = - last_exclude_matching(dir, pathname, pathlen, dtype_p); + last_exclude_matching(dir, pathname, pathlen, dirs, dtype_p); if (exclude) return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return 0; @@ -862,7 +890,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check, int dt = DT_DIR; exclude = last_exclude_matching(check->dir, path->buf, path->len, - &dt); + -1, &dt); if (exclude) { check->exclude = exclude; return exclude; @@ -874,7 +902,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check, /* An entry in the index; cannot be a directory with subentries */ strbuf_setlen(path, 0); - return last_exclude_matching(check->dir, name, namelen, dtype); + return last_exclude_matching(check->dir, name, namelen, -1, dtype); } /* @@ -1248,11 +1276,11 @@ enum path_treatment { }; static enum path_treatment treat_one_path(struct dir_struct *dir, - struct strbuf *path, + struct strbuf *path, int dirs, const struct path_simplify *simplify, int dtype, struct dirent *de) { - int exclude = is_excluded(dir, path->buf, path->len, &dtype); + int exclude = is_excluded(dir, path->buf, path->len, dirs, &dtype); if (exclude && (dir->flags & DIR_COLLECT_IGNORED) && exclude_matches_pathspec(path->buf, path->len, simplify)) dir_add_ignored(dir, path->buf, path->len); @@ -1310,7 +1338,7 @@ static enum path_treatment treat_path(struct dir_struct *dir, return path_ignored; dtype = DTYPE(de); - return treat_one_path(dir, path, simplify, dtype, de); + return treat_one_path(dir, path, -1, simplify, dtype, de); } /* @@ -1338,6 +1366,7 @@ static int read_directory_recursive(struct dir_struct *dir, if (!fdir) goto out; + dir->dir_level++; while ((de = readdir(fdir)) != NULL) { switch (treat_path(dir, de, &path, baselen, simplify)) { case path_recurse: @@ -1357,6 +1386,7 @@ static int read_directory_recursive(struct dir_struct *dir, } closedir(fdir); out: + dir->dir_level--; strbuf_release(&path); return contents; @@ -1427,7 +1457,7 @@ static int treat_leading_path(struct dir_struct *dir, break; if (simplify_away(sb.buf, sb.len, simplify)) break; - if (treat_one_path(dir, &sb, simplify, + if (treat_one_path(dir, &sb, -1, simplify, DT_DIR, NULL) == path_ignored) break; /* do not recurse into it */ if (len <= baselen) { @@ -1447,8 +1477,10 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char return dir->nr; simplify = create_simplify(pathspec); - if (!len || treat_leading_path(dir, path, len, simplify)) + if (!len || treat_leading_path(dir, path, len, simplify)) { + dir->dir_level = -1; read_directory_recursive(dir, path, len, 0, simplify); + } free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); diff --git a/dir.h b/dir.h index 560ade4..c434f1c 100644 --- a/dir.h +++ b/dir.h @@ -45,6 +45,7 @@ struct exclude_list { const char *base; int baselen; int flags; + int dirs; /* * Counting starts from 1 for line numbers in ignore files, @@ -87,6 +88,8 @@ struct dir_struct { /* Exclude info */ const char *exclude_per_dir; + int dir_level; + /* * We maintain three groups of exclude pattern lists: * @@ -171,7 +174,11 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, struct exclude_list *el, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname); -extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen); +extern void parse_exclude_pattern(const char **string, + int *patternlen, + int *flags, + int *nowildcardlen, + int *dirs); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos); extern void clear_exclude_list(struct exclude_list *el); -- 1.8.1.2.536.gf441e6d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html