[PATCH/RFC v6 3/3] lstat_cache(): introduce has_dirs_only_path() function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Inside the create_directories() function inside entry.c we currently
do an stat() or lstat() call for each path component of the pathname
'path' each and every time called.  For the 'git checkout' command,
this function is called on each file for which we must do an update
(ce->ce_flags & CE_UPDATE), so we get lots and lots of calls...

To fix this, we make a new wrapper to the lstat_cache() function, and
call the wrapper function instead of the calls to the stat() or the
lstat() functions.  Since the paths given to the create_directories()
function, is sorted alphabetically, the new wrapper would be very
cache effective in this situation.

To support it we must update the lstat_cache() function to be able to
say that "pleace test the complete length of 'name'", and also to give
it the length of a prefix, where the cache should use the stat()
function instead of the lstat() function to test each path component.

Thanks to Junio C Hamano, Linus Torvalds and Ren�charfe for valuable
comments to this patch!

Signed-off-by: Kjetil Barvik <barvik@xxxxxxxxxxxx>
---
:100644 100644 11181aa... 7c8c8e4... M	cache.h
:100644 100644 aa2ee46... b0295bd... M	entry.c
:100644 100644 0004e97... c92fac7... M	symlinks.c
 cache.h    |    1 +
 entry.c    |   34 +++++++++++-----------------------
 symlinks.c |   59 ++++++++++++++++++++++++++++++++++++++++++++---------------
 3 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/cache.h b/cache.h
index 11181aa0079ce94bfbdb2bba77205f49aa3cbcb3..7c8c8e484259a69bdc2c75e9e820247b0df04d01 100644
--- a/cache.h
+++ b/cache.h
@@ -721,6 +721,7 @@ struct checkout {
 extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
 extern int has_symlink_leading_path(int len, const char *name);
 extern int has_symlink_or_noent_leading_path(int len, const char *name);
+extern int has_dirs_only_path(int len, const char *name, int prefix_len);
 
 extern struct alternate_object_database {
 	struct alternate_object_database *next;
diff --git a/entry.c b/entry.c
index aa2ee46a84033585d8e07a585610c5a697af82c2..b0295bd5559d4a106d977da7a54b0cdd8399c208 100644
--- a/entry.c
+++ b/entry.c
@@ -8,35 +8,23 @@ static void create_directories(const char *path, const struct checkout *state)
 	const char *slash = path;
 
 	while ((slash = strchr(slash+1, '/')) != NULL) {
-		struct stat st;
-		int stat_status;
-
 		len = slash - path;
 		memcpy(buf, path, len);
 		buf[len] = 0;
 
-		if (len <= state->base_dir_len)
-			/*
-			 * checkout-index --prefix=<dir>; <dir> is
-			 * allowed to be a symlink to an existing
-			 * directory.
-			 */
-			stat_status = stat(buf, &st);
-		else
-			/*
-			 * if there currently is a symlink, we would
-			 * want to replace it with a real directory.
-			 */
-			stat_status = lstat(buf, &st);
-
-		if (!stat_status && S_ISDIR(st.st_mode))
+		/* For 'checkout-index --prefix=<dir>', <dir> is
+		 * allowed to be a symlink to an existing directory,
+		 * and we set 'state->base_dir_len' below, such that
+		 * we test the path components of the prefix with the
+		 * stat() function instead of the lstat() function.
+		 */
+		if (has_dirs_only_path(len, buf, state->base_dir_len))
 			continue; /* ok, it is already a directory. */
 
-		/*
-		 * We know stat_status == 0 means something exists
-		 * there and this mkdir would fail, but that is an
-		 * error codepath; we do not care, as we unlink and
-		 * mkdir again in such a case.
+		/* If this mkdir() would fail, it could be that there
+		 * is already a symlink or something else exists
+		 * there, therefore we then try to unlink it and try
+		 * one more time to create the directory.
 		 */
 		if (mkdir(buf, 0777)) {
 			if (errno == EEXIST && state->force &&
diff --git a/symlinks.c b/symlinks.c
index 0004e97d2547467564b146232f40cba4f5b04a3e..c92fac7506a702157e4b780d4331f3c7b412798d 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,10 +1,11 @@
 #include "cache.h"
 
 static struct cache_def {
-	char path[PATH_MAX];
+	char path[PATH_MAX + 1];
 	int len;
 	int flags;
 	int track_flags;
+	int prefix_len_stat_func;
 } cache;
 
 static inline int greatest_match_lstat_cache(int len, const char *name)
@@ -25,12 +26,13 @@ static inline int greatest_match_lstat_cache(int len, const char *name)
 	return match_len;
 }
 
-static inline void reset_lstat_cache(int track_flags)
+static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
 {
 	cache.path[0] = '\0';
 	cache.len = 0;
 	cache.flags = 0;
 	cache.track_flags = track_flags;
+	cache.prefix_len_stat_func = prefix_len_stat_func;
 }
 
 #define FL_DIR      (1 << 0)
@@ -38,28 +40,35 @@ static inline void reset_lstat_cache(int track_flags)
 #define FL_SYMLINK  (1 << 2)
 #define FL_LSTATERR (1 << 3)
 #define FL_ERR      (1 << 4)
+#define FL_FULLPATH (1 << 5)
 
 /*
  * Check if name 'name' of length 'len' has a symlink leading
  * component, or if the directory exists and is real, or not.
  *
  * To speed up the check, some information is allowed to be cached.
- * This is can be indicated by the 'track_flags' argument.
+ * This is can be indicated by the 'track_flags' argument, which also
+ * can be used to indicate that we should always check the full path.
+ *
+ * The 'prefix_len_stat_func' parameter can be used to set the length
+ * of the prefix, where the cache should use the stat() function
+ * instead of the lstat() function to test each path component.
  */
 static int lstat_cache(int len, const char *name,
-		       int track_flags)
+		       int track_flags, int prefix_len_stat_func)
 {
-	int match_len, last_slash, last_slash_dir, max_len;
+	int match_len, last_slash, last_slash_dir, max_len, ret;
 	int match_flags, ret_flags, save_flags;
 	struct stat st;
 
-	if (cache.track_flags != track_flags) {
+	if (cache.track_flags != track_flags ||
+	    cache.prefix_len_stat_func != prefix_len_stat_func) {
 		/*
-		 * As a safeguard we clear the cache if the value of
-		 * track_flags does not match with the last supplied
-		 * value.
+		 * As a safeguard we clear the cache if the values of
+		 * track_flags and/or prefix_len_stat_func does not
+		 * match with the last supplied values.
 		 */
-		reset_lstat_cache(track_flags);
+		reset_lstat_cache(track_flags, prefix_len_stat_func);
 		match_len = last_slash = 0;
 	} else {
 		/*
@@ -91,12 +100,17 @@ static int lstat_cache(int len, const char *name,
 			cache.path[match_len] = name[match_len];
 			match_len++;
 		} while (match_len < max_len && name[match_len] != '/');
-		if (match_len >= max_len)
+		if (match_len >= max_len && !(track_flags & FL_FULLPATH))
 			break;
 		last_slash = match_len;
 		cache.path[last_slash] = '\0';
 
-		if (lstat(cache.path, &st)) {
+		if (last_slash <= prefix_len_stat_func)
+			ret = stat(cache.path, &st);
+		else
+			ret = lstat(cache.path, &st);
+
+		if (ret) {
 			ret_flags = FL_LSTATERR;
 			if (errno == ENOENT)
 				ret_flags |= FL_NOENT;
@@ -136,17 +150,19 @@ static int lstat_cache(int len, const char *name,
 		cache.len   = last_slash_dir;
 		cache.flags = FL_DIR;
 	} else {
-		reset_lstat_cache(track_flags);
+		reset_lstat_cache(track_flags, prefix_len_stat_func);
 	}
 	return ret_flags;
 }
 
+#define USE_ONLY_LSTAT  0
+
 /* Return non-zero if path 'name' has a leading symlink component.
  */
 int has_symlink_leading_path(int len, const char *name)
 {
 	return lstat_cache(len, name,
-			   FL_SYMLINK|FL_DIR) &
+			   FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
 		FL_SYMLINK;
 }
 
@@ -156,6 +172,19 @@ int has_symlink_leading_path(int len, const char *name)
 int has_symlink_or_noent_leading_path(int len, const char *name)
 {
 	return lstat_cache(len, name,
-			   FL_SYMLINK|FL_NOENT|FL_DIR) &
+			   FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
 		(FL_SYMLINK|FL_NOENT);
 }
+
+/* Return non-zero if all path components of 'name' exists as a
+ * directory.  If prefix_len > 0, we will test with the stat()
+ * function instead of the lstat() function for a prefix length of
+ * 'prefix_len', thus we then allow for symlink(s) in the prefix part
+ * as long as those points to real existing directories.
+ */
+int has_dirs_only_path(int len, const char *name, int prefix_len)
+{
+	return lstat_cache(len, name,
+			   FL_DIR|FL_FULLPATH, prefix_len) &
+		FL_DIR;
+}
-- 
1.6.1.rc1.49.g7f705

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux