Re: .. anybody know of any filesystems that depend on the exact VFS 'namehash' implementation?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ok, guys, here's a *very* preliminary version of the patch I was thinking of.

NOTE! The changes are all totally unconditional, which means that this
patch will break on:

 - any big-endian machine (the "find first NUL/slash byte" and mask
generation really is little-endian only)

   Maybe somebody with a big-endian machine could try to figure out
how the logic would work there to efficiently find the mask of bytes
that are before the first NUL or slash.

 - any hardware that doesn't do good unaligned 'unsigned long' accesses

 - if you enable CONFIG_DEBUG_PAGEALLOC

In addition, I've only *tested* this on x86-64, and I'm pretty sure
that on x86-32 it will at the very least warn about the big 64-bit
constants I use (but I think it should work - they'll just get
truncated, and I tried to use "sizeof(unsigned long)" instead of "8"
etc).

Anyway, on x86-64 it works for me. And my stupid test-case (look up
the same pathname ten million times - I'm explicitly looking up the
*same* pathname to not show the D$ trashing of the hash tables looking
up millions of different names) went from 9.053s to 8.099s.

So that's a 10% improvement on an admittedly extreme load, but the two
functions that were improved (__d_lookup_rcu and link_path_walk)
really *are* the two hottest functions on actual real loads too, so
while my microbenchmark was extreme, it's at least testing something
relevant. And it's the whole microbenchmark that improved by 10%, so
those hot functions themselves each actually improved by about 30%.

Comments?

I agree that the games I play to do things one word at a time aren't
*pretty*, but I did try to split things up into helper functions so
that the code is actually conceptually understandable even if you
can't follow the tricks I play. Staring too much at that new
hash_name() function may turn you mad. Not only is the "find byte in
word" logic subtle, the whole crazy "do { } while ()" loop is written
that way so that gcc doesn't go crazy and try to unroll it once (which
gcc does if you turn it into the more obvious "for ()" loop).

That said, trying to figure out *why* hash_name() works can be a fun
exercise if you're into these kinds of tricks. It was certainly fun to
write.

                     Linus
 fs/dcache.c            |   13 +++---
 fs/namei.c             |  102 ++++++++++++++++++++++++++++++++++++++----------
 include/linux/dcache.h |   45 ++++++++++++++-------
 3 files changed, 120 insertions(+), 40 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index fe19ac13f75f..138be96e25b6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly;
 
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
-static inline struct hlist_bl_head *d_hash(struct dentry *parent,
+static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 					unsigned long hash)
 {
 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci);
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
  */
-struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
-				unsigned *seq, struct inode **inode)
+struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
+				unsigned *seqp, struct inode **inode)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 * See Documentation/filesystems/path-lookup.txt for more details.
 	 */
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+		unsigned seq;
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 			continue;
 
 seqretry:
-		*seq = read_seqcount_begin(&dentry->d_seq);
+		seq = read_seqcount_begin(&dentry->d_seq);
 		if (dentry->d_parent != parent)
 			continue;
 		if (d_unhashed(dentry))
@@ -1771,7 +1773,7 @@ seqretry:
 		 * edge of memory when walking. If we could load this
 		 * atomically some other way, we could drop this check.
 		 */
-		if (read_seqcount_retry(&dentry->d_seq, *seq))
+		if (read_seqcount_retry(&dentry->d_seq, seq))
 			goto seqretry;
 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 			if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1790,7 @@ seqretry:
 		 * order to do anything useful with the returned dentry
 		 * anyway.
 		 */
+		*seqp = seq;
 		*inode = i;
 		return dentry;
 	}
diff --git a/fs/namei.c b/fs/namei.c
index a780ea515c47..06df2605c9e8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1374,6 +1374,85 @@ static inline int can_lookup(struct inode *inode)
 	return 1;
 }
 
+static inline unsigned int fold_hash(unsigned long hash)
+{
+	if (sizeof(long) != sizeof(int))
+		hash += hash >> (8*sizeof(int));
+	return hash;
+}
+
+/* Only works for little-endian with fast unaligned accesses! */
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long a, mask;
+	unsigned long hash = 0;
+
+	for (;;) {
+		a = *(unsigned long *)name;
+		hash *= 11;
+		if (len < sizeof(unsigned long))
+			break;
+		hash += a;
+		name += sizeof(unsigned long);
+		len -= sizeof(unsigned long);
+		if (!len)
+			goto done;
+	}
+	mask = ~(~0ul << len*8);
+	hash += mask & a;
+done:
+	return fold_hash(hash);
+}
+
+#define ONEBYTES	0x0101010101010101ul
+#define SLASHBYTES	0x2f2f2f2f2f2f2f2ful
+#define HIGHBITS	0x8080808080808080ul
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+	return ((a - ONEBYTES) & ~a) & HIGHBITS;
+}
+
+/*
+ * Calculate the length and hash of the path component, and
+ * return the beginning of the next one (or the pointer to the
+ * final NUL character if none).
+ */
+static inline const char *hash_name(struct qstr *str, const char *name)
+{
+	unsigned long a, mask;
+	unsigned long hash = 0;
+	unsigned long len = 0;
+
+	str->name = name;
+	a = 0;
+	len = -sizeof(unsigned long);
+	do {
+		hash = (hash + a) * 11;
+		len += sizeof(unsigned long);
+		a = *(unsigned long *)(name+len);
+		/* Do we have any NUL or '/' bytes in this word? */
+		mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
+	} while (!mask);
+
+	/* The mask *below* the first high bit set */
+	mask = (mask - 1) & ~mask;
+	mask >>= 7;
+	hash += a & mask;
+	str->hash = fold_hash(hash);
+
+	/* Get the final path component length */
+	len += ffz(mask) / 8;
+	str->len = len;
+
+	/* remove trailing slashes */
+	while (name[len] == '/')
+		len++;
+
+	return name+len;
+}
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
@@ -1394,26 +1473,14 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
 	/* At this point we know we have a real path component. */
 	for(;;) {
-		unsigned long hash;
 		struct qstr this;
-		unsigned int c;
 		int type;
 
 		err = may_lookup(nd);
  		if (err)
 			break;
 
-		this.name = name;
-		c = *(const unsigned char *)name;
-
-		hash = init_name_hash();
-		do {
-			name++;
-			hash = partial_name_hash(c, hash);
-			c = *(const unsigned char *)name;
-		} while (c && (c != '/'));
-		this.len = name - (const char *) this.name;
-		this.hash = end_name_hash(hash);
+		name = hash_name(&this, name);
 
 		type = LAST_NORM;
 		if (this.name[0] == '.') switch (this.len) {
@@ -1437,10 +1504,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			}
 		}
 
-		/* remove trailing slashes? */
-		if (!c)
-			goto last_component;
-		while (*++name == '/');
 		if (!*name)
 			goto last_component;
 
@@ -1775,24 +1838,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
 	struct qstr this;
-	unsigned long hash;
 	unsigned int c;
 
 	WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
 
 	this.name = name;
 	this.len = len;
+	this.hash = full_name_hash(name, len);
 	if (!len)
 		return ERR_PTR(-EACCES);
 
-	hash = init_name_hash();
 	while (len--) {
 		c = *(const unsigned char *)name++;
 		if (c == '/' || c == '\0')
 			return ERR_PTR(-EACCES);
-		hash = partial_name_hash(c, hash);
 	}
-	this.hash = end_name_hash(hash);
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d64a55b23afd..79e77f9419ff 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,18 +54,41 @@ extern struct dentry_stat_t dentry_stat;
 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 				const unsigned char *ct, size_t tcount)
 {
-	int ret;
+#if 1
+/* Little-endian with fast unaligned accesses? */
+	unsigned long a,b,mask;
+
+	if (unlikely(scount != tcount))
+		return 1;
+
+	for (;;) {
+		a = *(unsigned long *)cs;
+		b = *(unsigned long *)ct;
+		if (tcount < sizeof(unsigned long))
+			break;
+		if (unlikely(a != b))
+			return 1;
+		cs += sizeof(unsigned long);
+		ct += sizeof(unsigned long);
+		tcount -= sizeof(unsigned long);
+		if (!tcount)
+			return 0;
+	}
+	mask = ~(~0ul << tcount*8);
+	return unlikely(!!((a ^ b) & mask));
+#else
 	if (scount != tcount)
 		return 1;
+
 	do {
-		ret = (*cs != *ct);
-		if (ret)
-			break;
+		if (*cs != *ct)
+			return 1;
 		cs++;
 		ct++;
 		tcount--;
 	} while (tcount);
-	return ret;
+	return 0;
+#endif
 }
 
 /* Name hashing routines. Initial hash value */
@@ -89,14 +112,7 @@ static inline unsigned long end_name_hash(unsigned long hash)
 }
 
 /* Compute the hash for a name string. */
-static inline unsigned int
-full_name_hash(const unsigned char *name, unsigned int len)
-{
-	unsigned long hash = init_name_hash();
-	while (len--)
-		hash = partial_name_hash(*name++, hash);
-	return end_name_hash(hash);
-}
+extern unsigned int full_name_hash(const unsigned char *, unsigned int);
 
 /*
  * Try to keep struct dentry aligned on 64 byte cachelines (this will
@@ -309,7 +325,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 extern struct dentry *d_lookup(struct dentry *, struct qstr *);
 extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
 extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
 				unsigned *seq, struct inode **inode);
 
 /**

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux