The File System Excludes module is a new programmatic way to exclude files and folders from git's traversal of the working directory. fsexcludes_init() should be called with a string buffer that contains a NUL separated list of path names of the files and/or directories that should be included. Any path not listed will be excluded. The paths should be relative to the root of the working directory and be separated by a single NUL. The excludes logic in dir.c has been updated to honor the results of fsexcludes_is_excluded_from(). If fsexcludes does not exclude the file, the normal excludes logic is also checked as it could further reduce the set of files that should be included. Signed-off-by: Ben Peart <benpeart@xxxxxxxxxxxxx> --- Makefile | 1 + dir.c | 24 +++++- fsexcludes.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++ fsexcludes.h | 29 +++++++ 4 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 fsexcludes.c create mode 100644 fsexcludes.h diff --git a/Makefile b/Makefile index f181687250..a4f1471272 100644 --- a/Makefile +++ b/Makefile @@ -822,6 +822,7 @@ LIB_OBJS += exec_cmd.o LIB_OBJS += fetch-object.o LIB_OBJS += fetch-pack.o LIB_OBJS += fsck.o +LIB_OBJS += fsexcludes.o LIB_OBJS += fsmonitor.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o diff --git a/dir.c b/dir.c index 63a917be45..47a073efe1 100644 --- a/dir.c +++ b/dir.c @@ -18,6 +18,7 @@ #include "utf8.h" #include "varint.h" #include "ewah/ewok.h" +#include "fsexcludes.h" #include "fsmonitor.h" /* @@ -1102,6 +1103,12 @@ int is_excluded_from_list(const char *pathname, struct exclude_list *el, struct index_state *istate) { struct exclude *exclude; + + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, istate, pathname, pathlen); + if (fsexcludes_is_excluded_from(istate, pathname, pathlen, *dtype) > 0) + return 1; + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el, istate); if (exclude) @@ -1317,8 +1324,15 @@ struct exclude *last_exclude_matching(struct dir_struct *dir, int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *pathname, int *dtype_p) { - struct exclude *exclude = - last_exclude_matching(dir, istate, pathname, dtype_p); + struct exclude *exclude; + int pathlen = strlen(pathname); + + if (*dtype_p == DT_UNKNOWN) + *dtype_p = get_dtype(NULL, istate, pathname, pathlen); + if (fsexcludes_is_excluded_from(istate, pathname, pathlen, *dtype_p) > 0) + return 1; + + exclude = last_exclude_matching(dir, istate, pathname, dtype_p); if (exclude) return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; return 0; @@ -1671,6 +1685,9 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, if (dtype != DT_DIR && has_path_in_index) return path_none; + if (fsexcludes_is_excluded_from(istate, path->buf, path->len, dtype) > 0) + return path_excluded; + /* * When we are looking at a directory P in the working tree, * there are three cases: @@ -2011,6 +2028,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, /* add the path to the appropriate result list */ switch (state) { case path_excluded: + if (fsexcludes_is_excluded_from(istate, path.buf, path.len, + get_dtype(cdir.de, istate, path.buf, path.len)) > 0) + break; if (dir->flags & DIR_SHOW_IGNORED) dir_add_name(dir, istate, path.buf, path.len); else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || diff --git a/fsexcludes.c b/fsexcludes.c new file mode 100644 index 0000000000..0ef57f107b --- /dev/null +++ b/fsexcludes.c @@ -0,0 +1,211 @@ +#include "cache.h" +#include "fsexcludes.h" +#include "hashmap.h" +#include "strbuf.h" + +static int fsexcludes_initialized = 0; +static struct strbuf fsexcludes_data = STRBUF_INIT; +static struct hashmap fsexcludes_hashmap; +static struct hashmap parent_directory_hashmap; + +struct fsexcludes { + struct hashmap_entry ent; /* must be the first member! */ + const char *pattern; + int patternlen; +}; + +static unsigned int(*fsexcludeshash)(const void *buf, size_t len); +static int(*fsexcludescmp)(const char *a, const char *b, size_t len); + +static int fsexcludes_hashmap_cmp(const void *unused_cmp_data, + const void *a, const void *b, const void *key) +{ + const struct fsexcludes *fse1 = a; + const struct fsexcludes *fse2 = b; + + return fsexcludescmp(fse1->pattern, fse2->pattern, fse1->patternlen); +} + +static int check_fsexcludes_hashmap(struct hashmap *map, const char *pattern, int patternlen) +{ + struct strbuf sb = STRBUF_INIT; + struct fsexcludes fse; + char *slash; + + /* Check straight mapping */ + strbuf_add(&sb, pattern, patternlen); + fse.pattern = sb.buf; + fse.patternlen = sb.len; + hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, fse.patternlen)); + if (hashmap_get(map, &fse, NULL)) { + strbuf_release(&sb); + return 0; + } + + /* + * Check to see if it matches a directory or any path + * underneath it. In other words, 'a/b/foo.txt' will match + * '/', 'a/', and 'a/b/'. + */ + slash = strchr(sb.buf, '/'); + while (slash) { + fse.pattern = sb.buf; + fse.patternlen = slash - sb.buf + 1; + hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, fse.patternlen)); + if (hashmap_get(map, &fse, NULL)) { + strbuf_release(&sb); + return 0; + } + slash = strchr(slash + 1, '/'); + } + + strbuf_release(&sb); + return 1; +} + +static void fsexcludes_hashmap_add(struct hashmap *map, const char *pattern, const int patternlen) +{ + struct fsexcludes *fse; + + fse = xmalloc(sizeof(struct fsexcludes)); + fse->pattern = pattern; + fse->patternlen = patternlen; + hashmap_entry_init(fse, fsexcludeshash(fse->pattern, fse->patternlen)); + hashmap_add(map, fse); +} + +static void initialize_fsexcludes_hashmap(struct hashmap *map, struct strbuf *fsexcludes_data) +{ + char *buf, *entry; + size_t len; + int i; + + /* + * Build a hashmap of the fsexcludes data we can use to look + * for cache entry matches quickly + */ + fsexcludeshash = ignore_case ? memihash : memhash; + fsexcludescmp = ignore_case ? strncasecmp : strncmp; + hashmap_init(map, fsexcludes_hashmap_cmp, NULL, 0); + + entry = buf = fsexcludes_data->buf; + len = fsexcludes_data->len; + for (i = 0; i < len; i++) { + if (buf[i] == '\0') { + fsexcludes_hashmap_add(map, entry, buf + i - entry); + entry = buf + i + 1; + } + } +} + +static void parent_directory_hashmap_add(struct hashmap *map, const char *pattern, const int patternlen) +{ + char *slash; + struct fsexcludes *fse; + + /* + * Add any directories leading up to the file as the excludes logic + * needs to match directories leading up to the files as well. Detect + * and prevent unnecessary duplicate entries which will be common. + */ + if (patternlen > 1) { + slash = strchr(pattern + 1, '/'); + while (slash) { + fse = xmalloc(sizeof(struct fsexcludes)); + fse->pattern = pattern; + fse->patternlen = slash - pattern + 1; + hashmap_entry_init(fse, fsexcludeshash(fse->pattern, fse->patternlen)); + if (hashmap_get(map, fse, NULL)) + free(fse); + else + hashmap_add(map, fse); + slash = strchr(slash + 1, '/'); + } + } +} + +static void initialize_parent_directory_hashmap(struct hashmap *map, struct strbuf *vfs_data) +{ + char *buf, *entry; + size_t len; + int i; + + /* + * Build a hashmap of the parent directories contained in the virtual + * file system data we can use to look for matches quickly + */ + fsexcludeshash = ignore_case ? memihash : memhash; + fsexcludescmp = ignore_case ? strncasecmp : strncmp; + hashmap_init(map, fsexcludes_hashmap_cmp, NULL, 0); + + entry = buf = vfs_data->buf; + len = vfs_data->len; + for (i = 0; i < len; i++) { + if (buf[i] == '\0') { + parent_directory_hashmap_add(map, entry, buf + i - entry); + entry = buf + i + 1; + } + } +} + +static int check_directory_hashmap(struct hashmap *map, const char *pathname, int pathlen) +{ + struct strbuf sb = STRBUF_INIT; + struct fsexcludes fse; + + /* Check for directory */ + strbuf_add(&sb, pathname, pathlen); + strbuf_addch(&sb, '/'); + fse.pattern = sb.buf; + fse.patternlen = sb.len; + hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, fse.patternlen)); + if (hashmap_get(map, &fse, NULL)) { + strbuf_release(&sb); + return 0; + } + + strbuf_release(&sb); + return 1; +} + +/* + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int fsexcludes_is_excluded_from(struct index_state *istate, + const char *pathname, int pathlen, int dtype) +{ + if (!fsexcludes_initialized) + return -1; + + if (dtype == DT_REG) { + /* lazily init the hashmap */ + if (!fsexcludes_hashmap.cmpfn_data) + initialize_fsexcludes_hashmap(&fsexcludes_hashmap, &fsexcludes_data); + + return check_fsexcludes_hashmap(&fsexcludes_hashmap, pathname, pathlen); + } + + if (dtype == DT_DIR || dtype == DT_LNK) { + /* lazily init the hashmap */ + if (!parent_directory_hashmap.cmpfn_data) + initialize_parent_directory_hashmap(&parent_directory_hashmap, &fsexcludes_data); + + return check_directory_hashmap(&parent_directory_hashmap, pathname, pathlen); + } + + return -1; +} + +void fsexcludes_init(struct strbuf *sb) +{ + fsexcludes_initialized = 1; + fsexcludes_data = *sb; + strbuf_detach(sb, NULL); +} + +void fsexcludes_free(void) { + strbuf_release(&fsexcludes_data); + hashmap_free(&fsexcludes_hashmap, 1); + hashmap_free(&parent_directory_hashmap, 1); + fsexcludes_initialized = 0; +} diff --git a/fsexcludes.h b/fsexcludes.h new file mode 100644 index 0000000000..10246daa02 --- /dev/null +++ b/fsexcludes.h @@ -0,0 +1,29 @@ +#ifndef FSEXCLUDES_H +#define FSEXCLUDES_H + +/* + * The file system excludes functions provides a way to programatically limit + * where git will scan for untracked files. This is used to speed up the + * scan by avoiding scanning parts of the work directory that do not have + * any new files. + */ + +/* + * sb should contain a NUL separated list of path names of the files + * and/or directories that should be checked. Any path not listed will + * be excluded from the scan. + * + * NOTE: fsexcludes_init() will take ownership of the storage passed in + * sb and will reset sb to `STRBUF_INIT` + */ +void fsexcludes_init(struct strbuf *sb); +void fsexcludes_free(void); + +/* + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int fsexcludes_is_excluded_from(struct index_state *istate, + const char *pathname, int pathlen, int dtype_p); + + +#endif -- 2.17.0.windows.1