[PATCH v3 13/13] read_directory: calculate name hashes incrementally

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead of index_name_exists() calculating a hash for full pathname
for every entry, we calculate partial hash per directory, use it as a
seed. The number of characters that icase_hash has to chew will
reduce.

treat_leading_path:   0.000  0.000
read_directory:       1.296  1.235
+treat_one_path:      0.599  0.531
++is_excluded:        0.102  0.102
+++prep_exclude:      0.040  0.040
+++matching:          0.035  0.035
++dir_exist:          0.035  0.035
++index_name_exists:  0.292  0.225
lazy_init_name_hash:  0.155  0.155
+simplify_away:       0.082  0.083
+dir_add_name:        0.000  0.000

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 dir.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/dir.c b/dir.c
index 5fda5af..8638dcd 100644
--- a/dir.c
+++ b/dir.c
@@ -46,6 +46,7 @@ struct path_simplify {
 
 static void read_directory_recursive(struct dir_struct *dir,
 				     const char *path, int len,
+				     unsigned int hash,
 				     int check_only,
 				     const struct path_simplify *simplify,
 				     int *contents);
@@ -1044,12 +1045,17 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
 	return ent;
 }
 
-static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
+static struct dir_entry *dir_add_name(struct dir_struct *dir,
+				      const char *pathname, int len,
+				      unsigned int hash, int baselen)
 {
 	if (!(dir->flags & DIR_SHOW_IGNORED)) {
 		struct cache_entry *ce;
 		START_CLOCK();
-		ce = cache_name_exists(pathname, len, ignore_case);
+		ce = index_name_exists_base(&the_index,
+					    hash, baselen,
+					    pathname, len,
+					    ignore_case);
 		STOP_CLOCK(tv_index_name_exists);
 		if (ce)
 			return NULL;
@@ -1225,7 +1231,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 	if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) {
 		dir->flags &= ~DIR_SHOW_IGNORED;
 		dir->flags |= DIR_HIDE_EMPTY_DIRECTORIES;
-		read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+		read_directory_recursive(dir, dirname, len,
+					 hash_name(dirname, len),
+					 1, simplify, &contents);
 		dir->flags &= ~DIR_HIDE_EMPTY_DIRECTORIES;
 		dir->flags |= DIR_SHOW_IGNORED;
 
@@ -1234,7 +1242,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_SHOW_IGNORED) &&
 	    !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return show_directory;
-	read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+	read_directory_recursive(dir, dirname, len,
+				 hash_name(dirname, len),
+				 1, simplify, &contents);
 	if (!contents)
 		return ignore_directory;
 	return show_directory;
@@ -1401,6 +1411,8 @@ enum path_treatment {
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct strbuf *path,
+					  unsigned int hash,
+					  int baselen,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de,
 					  int exclude_shortcut_ok)
@@ -1416,7 +1428,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	    dtype != DT_DIR) {
 		struct cache_entry *ce;
 		START_CLOCK();
-		ce = cache_name_exists(path->buf, path->len, ignore_case);
+		ce = index_name_exists_base(&the_index, hash, baselen,
+					    path->buf, path->len, ignore_case);
 		STOP_CLOCK(tv_index_name_exists);
 		if (ce)
 			return path_ignored;
@@ -1467,6 +1480,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct dirent *de,
 				      struct strbuf *path,
+				      unsigned int hash,
 				      int baselen,
 				      const struct path_simplify *simplify,
 				      int exclude_shortcut_ok)
@@ -1485,7 +1499,8 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 
 	dtype = DTYPE(de);
 	START_CLOCK();
-	ret = treat_one_path(dir, path, simplify, dtype, de, exclude_shortcut_ok);
+	ret = treat_one_path(dir, path, hash, baselen,
+			     simplify, dtype, de, exclude_shortcut_ok);
 	STOP_CLOCK(tv_treat_one_path);
 	return ret;
 }
@@ -1501,6 +1516,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static void read_directory_recursive(struct dir_struct *dir,
 				     const char *base, int baselen,
+				     unsigned int hash,
 				     int check_only,
 				     const struct path_simplify *simplify,
 				     int *contents)
@@ -1517,12 +1533,16 @@ static void read_directory_recursive(struct dir_struct *dir,
 
 	dir->exclude_prepared = 0;
 	while ((de = readdir(fdir)) != NULL) {
-		switch (treat_path(dir, de, &path, baselen,
+		switch (treat_path(dir, de, &path, hash, baselen,
 				   simplify,
 				   !check_only && !contents)) {
 		case path_recurse:
 			read_directory_recursive(dir, path.buf,
-						 path.len, 0,
+						 path.len,
+						 hash_name_from(hash,
+								path.buf + baselen,
+								path.len - baselen),
+						 0,
 						 simplify,
 						 contents);
 			continue;
@@ -1543,7 +1563,7 @@ static void read_directory_recursive(struct dir_struct *dir,
 		if (check_only)
 			break;
 		START_CLOCK();
-		dir_add_name(dir, path.buf, path.len);
+		dir_add_name(dir, path.buf, path.len, hash, baselen);
 		STOP_CLOCK(tv_dir_add_name);
 	}
 	closedir(fdir);
@@ -1619,7 +1639,7 @@ static int treat_leading_path(struct dir_struct *dir,
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
 		dir->exclude_prepared = 0;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, &sb, 0, 0, simplify,
 				   DT_DIR, NULL, 0) == path_ignored)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1648,7 +1668,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char
 		STOP_CLOCK(tv_lazy_init_name_hash);
 #endif
 		START_CLOCK();
-		read_directory_recursive(dir, path, len, 0, simplify, NULL);
+		read_directory_recursive(dir, path, len,
+					 hash_name(path, len),
+					 0, simplify, NULL);
 		STOP_CLOCK(tv_read_directory);
 	}
 #ifdef MEASURE_EXCLUDE
-- 
1.8.1.2.536.gf441e6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]