On Tue, Feb 10, 2009 at 11:25 PM, Junio C Hamano <gitster@xxxxxxxxx> wrote: > Jeff King <peff@xxxxxxxx> writes: > >> Warm cache, it runs in .042s on my git repo, about half of which is the >> untracked files check. It takes about .49s on the kernel repo. The >> read_directory() bit is not optimized at all, and could probably benefit >> from an early return (OTOH, the worst case is still going to need to >> look at every path). > > I suspect that with a large tree your have_untracked() would show > unnecessary overhead from dir_add_name(), because you only want one bit of > information but there is no way to stop with "ok, we know enough". This > toy patch adds a trivial "early return" to read_directory() codepath, but > there are two sad things about it. > > * In order to cheaply run "is there a single other file", you really > should scan the level you have already opened first before digging > deeper. I didn't bother because the primary use of read_directory is > the depth first traversal. > > * In a cloned work tree, the tracked files and directories come early in > the physical directory and then crufts you created yourself comes at > the end in readdir() order. We tend to read a lot of tracked ones > first and the finally hit other files. I've done some measurements from within .bashrc via a custom timer() bash function called before and after git ministatus call. The box I'm testing on is a Core2Duo with 1.8GHz and 2GB of RAM. I have faster machines I can test on but do Git coding on this one. For WebKit.git this is similar to the previous patch. It does also take at least 8 or 9 seconds. That's an improvement over 8-11secs. Is it good to test against WebKit.git? I mean it's apparently big. > builtin-ministatus.c | 4 +++- > dir.c | 32 +++++++++++++++++++++++++++----- > dir.h | 2 ++ > 3 files changed, 32 insertions(+), 6 deletions(-) > > diff --git a/builtin-ministatus.c b/builtin-ministatus.c > index aff9e5a..4b5a191 100644 > --- a/builtin-ministatus.c > +++ b/builtin-ministatus.c > @@ -25,7 +25,7 @@ static int have_untracked(void) > > read_directory(&dir, ".", "", 0, NULL); > /* XXX we are probably leaking memory from dir */ > - for (i = 0; i < dir.nr; i++) > + for (i = 0; i < dir.nr; i++) { > struct dir_entry *ent = dir.entries[i]; > if (cache_name_is_other(ent->name, ent->len)) > return 1; > @@ -47,6 +47,8 @@ int cmd_ministatus(int argc, const char **argv, const char *prefix) > putchar('*'); > if (have_untracked()) > putchar('?'); > + if (untracked_files_exist()) > + putchar('%'); > > return 0; > } > diff --git a/dir.c b/dir.c > index cfd1ea5..8d4fcdd 100644 > --- a/dir.c > +++ b/dir.c > @@ -16,7 +16,10 @@ struct path_simplify { > > static int read_directory_recursive(struct dir_struct *dir, > const char *path, const char *base, int baselen, > - int check_only, const struct path_simplify *simplify); > + int mode, const struct path_simplify *simplify); > +#define READ_DIRECTORY_EMPTY_CHECK 1 > +#define READ_DIRECTORY_OTHER_CHECK 2 > + > static int get_dtype(struct dirent *de, const char *path); > > int common_prefix(const char **pathspec) > @@ -505,7 +508,8 @@ static enum directory_treatment treat_directory(struct dir_struct *dir, > /* This is the "show_other_directories" case */ > if (!dir->hide_empty_directories) > return show_directory; > - if (!read_directory_recursive(dir, dirname, dirname, len, 1, simplify)) > + if (!read_directory_recursive(dir, dirname, dirname, len, > + READ_DIRECTORY_EMPTY_CHECK, simplify)) > return ignore_directory; > return show_directory; > } > @@ -574,10 +578,12 @@ static int get_dtype(struct dirent *de, const char *path) > * Also, we ignore the name ".git" (even if it is not a directory). > * That likely will not change. > */ > -static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify) > +static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int mode, const struct path_simplify *simplify) > { > DIR *fdir = opendir(path); > int contents = 0; > + int empty_check_only = (mode == READ_DIRECTORY_EMPTY_CHECK); > + int other_check_only = (mode == READ_DIRECTORY_OTHER_CHECK); > > if (fdir) { > struct dirent *de; > @@ -639,7 +645,7 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co > break; > case recurse_into_directory: > contents += read_directory_recursive(dir, > - fullname, fullname, baselen + len, 0, simplify); > + fullname, fullname, baselen + len, mode, simplify); > continue; > case ignore_directory: > continue; > @@ -650,10 +656,12 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co > break; > } > contents++; > - if (check_only) > + if (empty_check_only) > goto exit_early; > else > dir_add_name(dir, fullname, baselen + len); > + if (other_check_only && dir->nr) > + goto exit_early; > } > exit_early: > closedir(fdir); > @@ -731,6 +739,20 @@ int read_directory(struct dir_struct *dir, const char *path, const char *base, i > return dir->nr; > } > > +int untracked_files_exist(void) > +{ > + struct dir_struct dir; > + int i; > + > + memset(&dir, 0, sizeof(dir)); > + setup_standard_excludes(&dir); > + read_directory_recursive(&dir, ".", "", 0, READ_DIRECTORY_OTHER_CHECK, > + NULL); > + for (i = 0; i < dir.nr; i++) > + free(dir.entries[i]); > + return !!dir.nr; > +} > + > int file_exists(const char *f) > { > struct stat sb; > diff --git a/dir.h b/dir.h > index bdc2d47..1f8b575 100644 > --- a/dir.h > +++ b/dir.h > @@ -92,4 +92,6 @@ extern int remove_dir_recursively(struct strbuf *path, int only_empty); > /* tries to remove the path with empty directories along it, ignores ENOENT */ > extern int remove_path(const char *path); > > +extern int untracked_files_exist(void); > + > #endif > -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html