When we do an 'git checkout' after some time we end up in the 'checkout_entry()' function inside entry.c, and from here we call the 'create_directories()' function to make sure the all the directories exists for the possible new file or entry. The 'create_directories()' function happily started to check that all path component exists. This resulted in tons and tons of calls to lstat() or stat() when we checkout files nested deep inside a directory. We try to avoid this by remembering the last checked and possible newly created directory. Signed-off-by: Kjetil Barvik <barvik@xxxxxxxxxxxx> --- :100644 100644 768ba38... ec1297f... M cache.h :100644 100644 aa2ee46... 36d6f98... M entry.c :100644 100644 28e2759... 0a03e65... M unpack-trees.c cache.h | 1 + entry.c | 82 +++++++++++++++++++++++++++++++++++++++++++------------ unpack-trees.c | 1 + 3 files changed, 66 insertions(+), 18 deletions(-) diff --git a/cache.h b/cache.h index 768ba3825f3015828381490b0c387177a4f71578..ec1297ff5621cc9eb7fce51cc025f18a030ac9ea 100644 --- a/cache.h +++ b/cache.h @@ -718,6 +718,7 @@ struct checkout { refresh_cache:1; }; +extern void clear_created_dirs_cache(void); extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath); #define LSTAT_DIR (1u << 0) diff --git a/entry.c b/entry.c index aa2ee46a84033585d8e07a585610c5a697af82c2..36d6f98c1f59a86a5e9c117e1181c1169208168f 100644 --- a/entry.c +++ b/entry.c @@ -1,33 +1,67 @@ #include "cache.h" #include "blob.h" -static void create_directories(const char *path, const struct checkout *state) +static char dirs_path[PATH_MAX]; +static int dirs_len = 0; + +static inline int +greatest_common_created_dirs_prefix(int len, const char *name) { - int len = strlen(path); - char *buf = xmalloc(len + 1); - const char *slash = path; + int max_len, match_len = 0, i = 0; - while ((slash = strchr(slash+1, '/')) != NULL) { - struct stat st; - int stat_status; + max_len = len < dirs_len ? len : dirs_len; + while (i < max_len && name[i] == dirs_path[i]) { + if (name[i] == '/') match_len = i; + i++; + } + if (i == dirs_len && len > dirs_len && name[dirs_len] == '/') + match_len = dirs_len; + return match_len; +} - len = slash - path; - memcpy(buf, path, len); - buf[len] = 0; +void clear_created_dirs_cache(void) +{ + dirs_path[0] = '\0'; + dirs_len = 0; +} - if (len <= state->base_dir_len) +static void +create_directories(int len, const char *path, const struct checkout *state) +{ + int i, max_len, last_slash, stat_status; + struct stat st; + + /* Check the cache for previously checked or created + * directories (and components) within this function. There + * is no need to check or re-create directory components more + * than once! + */ + max_len = len < PATH_MAX ? len : PATH_MAX; + i = last_slash = greatest_common_created_dirs_prefix(max_len, path); + + while (i < max_len) { + do { + dirs_path[i] = path[i]; + i++; + } while (i < max_len && path[i] != '/'); + if (i >= max_len) + break; + last_slash = i; + dirs_path[last_slash] = '\0'; + + if (last_slash <= state->base_dir_len) /* * checkout-index --prefix=<dir>; <dir> is * allowed to be a symlink to an existing * directory. */ - stat_status = stat(buf, &st); + stat_status = stat(dirs_path, &st); else /* * if there currently is a symlink, we would * want to replace it with a real directory. */ - stat_status = lstat(buf, &st); + stat_status = lstat(dirs_path, &st); if (!stat_status && S_ISDIR(st.st_mode)) continue; /* ok, it is already a directory. */ @@ -38,14 +72,20 @@ static void create_directories(const char *path, const struct checkout *state) * error codepath; we do not care, as we unlink and * mkdir again in such a case. */ - if (mkdir(buf, 0777)) { + if (mkdir(dirs_path, 0777)) { if (errno == EEXIST && state->force && - !unlink(buf) && !mkdir(buf, 0777)) + !unlink(dirs_path) && !mkdir(dirs_path, 0777)) continue; - die("cannot create directory at %s", buf); + die("cannot create directory at %s", dirs_path); } } - free(buf); + /* Update the cache of already created/checked directories */ + if (last_slash > 0 && last_slash < PATH_MAX) { + dirs_path[last_slash] = '\0'; + dirs_len = last_slash; + } else { + clear_created_dirs_cache(); + } } static void remove_subtree(const char *path) @@ -55,6 +95,11 @@ static void remove_subtree(const char *path) char pathbuf[PATH_MAX]; char *name; + /* To be utterly safe we invalidate the cache of the + * previously created directories. + */ + clear_created_dirs_cache(); + if (!dir) die("cannot opendir %s (%s)", path, strerror(errno)); strcpy(pathbuf, path); @@ -201,6 +246,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t memcpy(path, state->base_dir, len); strcpy(path + len, ce->name); + len += ce_namelen(ce); if (!lstat(path, &st)) { unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID); @@ -229,6 +275,6 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t return error("unable to unlink old '%s' (%s)", path, strerror(errno)); } else if (state->not_new) return 0; - create_directories(path, state); + create_directories(len, path, state); return write_entry(ce, path, state, 0); } diff --git a/unpack-trees.c b/unpack-trees.c index 28e275981a21b033459ef9c7e420cce4bf7e5513..0a03e65f9c9d869ab2d8b3c337f032ff2b8e7b2f 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -119,6 +119,7 @@ static int check_updates(struct unpack_trees_options *o) } } + clear_created_dirs_cache(); for (i = 0; i < index->cache_nr; i++) { struct cache_entry *ce = index->cache[i]; -- 1.6.1.rc1.49.g7f705 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html