On 09/18, Jameson Miller wrote: > Improve the performance of the directory listing logic when it wants to list > non-empty ignored directories. In order to show non-empty ignored directories, > the existing logic will recursively iterate through all contents of an ignored > directory. This change introduces the optimization to stop iterating through > the contents once it finds the first file. This can have a significant > improvement in 'git status --ignored' performance in repositories with a large > number of files in ignored directories. > > For an example of the performance difference on an example repository with > 196,000 files in 400 ignored directories: > > | Command | Time (s) | > | -------------------------- | --------- | > | git status | 1.2 | > | git status --ignored (old) | 3.9 | > | git status --ignored (new) | 1.4 | > > Signed-off-by: Jameson Miller <jamill@xxxxxxxxxxxxx> Everything looks good to me. My only nit (and no need to change it for this patch) is that the first line of the commit msg usually has the form: <area>: <short summary> So that its easy to tell which part of the code a commit is changing. Seriously, great patch. Thanks! > --- > dir.c | 47 +++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 41 insertions(+), 6 deletions(-) > > diff --git a/dir.c b/dir.c > index 1c55dc3..1d17b80 100644 > --- a/dir.c > +++ b/dir.c > @@ -49,7 +49,7 @@ struct cached_dir { > static enum path_treatment read_directory_recursive(struct dir_struct *dir, > struct index_state *istate, const char *path, int len, > struct untracked_cache_dir *untracked, > - int check_only, const struct pathspec *pathspec); > + int check_only, int stop_at_first_file, const struct pathspec *pathspec); > static int get_dtype(struct dirent *de, struct index_state *istate, > const char *path, int len); > > @@ -1404,8 +1404,13 @@ static enum path_treatment treat_directory(struct dir_struct *dir, > > untracked = lookup_untracked(dir->untracked, untracked, > dirname + baselen, len - baselen); > + > + /* > + * If this is an excluded directory, then we only need to check if > + * the directory contains any files. > + */ > return read_directory_recursive(dir, istate, dirname, len, > - untracked, 1, pathspec); > + untracked, 1, exclude, pathspec); > } > > /* > @@ -1633,7 +1638,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir, > * with check_only set. > */ > return read_directory_recursive(dir, istate, path->buf, path->len, > - cdir->ucd, 1, pathspec); > + cdir->ucd, 1, 0, pathspec); > /* > * We get path_recurse in the first run when > * directory_exists_in_index() returns index_nonexistent. We > @@ -1793,12 +1798,20 @@ static void close_cached_dir(struct cached_dir *cdir) > * Also, we ignore the name ".git" (even if it is not a directory). > * That likely will not change. > * > + * If 'stop_at_first_file' is specified, 'path_excluded' is returned > + * to signal that a file was found. This is the least significant value that > + * indicates that a file was encountered that does not depend on the order of > + * whether an untracked or exluded path was encountered first. > + * > * Returns the most significant path_treatment value encountered in the scan. > + * If 'stop_at_first_file' is specified, `path_excluded` is the most > + * significant path_treatment value that will be returned. > */ > + > static enum path_treatment read_directory_recursive(struct dir_struct *dir, > struct index_state *istate, const char *base, int baselen, > struct untracked_cache_dir *untracked, int check_only, > - const struct pathspec *pathspec) > + int stop_at_first_file, const struct pathspec *pathspec) > { > struct cached_dir cdir; > enum path_treatment state, subdir_state, dir_state = path_none; > @@ -1832,12 +1845,34 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, > subdir_state = > read_directory_recursive(dir, istate, path.buf, > path.len, ud, > - check_only, pathspec); > + check_only, stop_at_first_file, pathspec); > if (subdir_state > dir_state) > dir_state = subdir_state; > } > > if (check_only) { > + if (stop_at_first_file) { > + /* > + * If stopping at first file, then > + * signal that a file was found by > + * returning `path_excluded`. This is > + * to return a consistent value > + * regardless of whether an ignored or > + * excluded file happened to be > + * encountered 1st. > + * > + * In current usage, the > + * `stop_at_first_file` is passed when > + * an ancestor directory has matched > + * an exclude pattern, so any found > + * files will be excluded. > + */ > + if (dir_state >= path_excluded) { > + dir_state = path_excluded; > + break; > + } > + } > + > /* abort early if maximum state has been reached */ > if (dir_state == path_untracked) { > if (cdir.fdir) > @@ -2108,7 +2143,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, > */ > dir->untracked = NULL; > if (!len || treat_leading_path(dir, istate, path, len, pathspec)) > - read_directory_recursive(dir, istate, path, len, untracked, 0, pathspec); > + read_directory_recursive(dir, istate, path, len, untracked, 0, 0, pathspec); > QSORT(dir->entries, dir->nr, cmp_dir_entry); > QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry); > > -- > 2.7.4 > -- Brandon Williams