Ok, guys, here's a *very* preliminary version of the patch I was thinking of. NOTE! The changes are all totally unconditional, which means that this patch will break on: - any big-endian machine (the "find first NUL/slash byte" and mask generation really is little-endian only) Maybe somebody with a big-endian machine could try to figure out how the logic would work there to efficiently find the mask of bytes that are before the first NUL or slash. - any hardware that doesn't do good unaligned 'unsigned long' accesses - if you enable CONFIG_DEBUG_PAGEALLOC In addition, I've only *tested* this on x86-64, and I'm pretty sure that on x86-32 it will at the very least warn about the big 64-bit constants I use (but I think it should work - they'll just get truncated, and I tried to use "sizeof(unsigned long)" instead of "8" etc). Anyway, on x86-64 it works for me. And my stupid test-case (look up the same pathname ten million times - I'm explicitly looking up the *same* pathname to not show the D$ trashing of the hash tables looking up millions of different names) went from 9.053s to 8.099s. So that's a 10% improvement on an admittedly extreme load, but the two functions that were improved (__d_lookup_rcu and link_path_walk) really *are* the two hottest functions on actual real loads too, so while my microbenchmark was extreme, it's at least testing something relevant. And it's the whole microbenchmark that improved by 10%, so those hot functions themselves each actually improved by about 30%. Comments? I agree that the games I play to do things one word at a time aren't *pretty*, but I did try to split things up into helper functions so that the code is actually conceptually understandable even if you can't follow the tricks I play. Staring too much at that new hash_name() function may turn you mad. Not only is the "find byte in word" logic subtle, the whole crazy "do { } while ()" loop is written that way so that gcc doesn't go crazy and try to unroll it once (which gcc does if you turn it into the more obvious "for ()" loop). That said, trying to figure out *why* hash_name() works can be a fun exercise if you're into these kinds of tricks. It was certainly fun to write. Linus
fs/dcache.c | 13 +++--- fs/namei.c | 102 ++++++++++++++++++++++++++++++++++++++---------- include/linux/dcache.h | 45 ++++++++++++++------- 3 files changed, 120 insertions(+), 40 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index fe19ac13f75f..138be96e25b6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; -static inline struct hlist_bl_head *d_hash(struct dentry *parent, +static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned long hash) { hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; @@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci); * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. */ -struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, - unsigned *seq, struct inode **inode) +struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, + unsigned *seqp, struct inode **inode) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + unsigned seq; struct inode *i; const char *tname; int tlen; @@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, continue; seqretry: - *seq = read_seqcount_begin(&dentry->d_seq); + seq = read_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) @@ -1771,7 +1773,7 @@ seqretry: * edge of memory when walking. If we could load this * atomically some other way, we could drop this check. */ - if (read_seqcount_retry(&dentry->d_seq, *seq)) + if (read_seqcount_retry(&dentry->d_seq, seq)) goto seqretry; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, @@ -1788,6 +1790,7 @@ seqretry: * order to do anything useful with the returned dentry * anyway. */ + *seqp = seq; *inode = i; return dentry; } diff --git a/fs/namei.c b/fs/namei.c index a780ea515c47..06df2605c9e8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1374,6 +1374,85 @@ static inline int can_lookup(struct inode *inode) return 1; } +static inline unsigned int fold_hash(unsigned long hash) +{ + if (sizeof(long) != sizeof(int)) + hash += hash >> (8*sizeof(int)); + return hash; +} + +/* Only works for little-endian with fast unaligned accesses! */ +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long a, mask; + unsigned long hash = 0; + + for (;;) { + a = *(unsigned long *)name; + hash *= 11; + if (len < sizeof(unsigned long)) + break; + hash += a; + name += sizeof(unsigned long); + len -= sizeof(unsigned long); + if (!len) + goto done; + } + mask = ~(~0ul << len*8); + hash += mask & a; +done: + return fold_hash(hash); +} + +#define ONEBYTES 0x0101010101010101ul +#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful +#define HIGHBITS 0x8080808080808080ul + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - ONEBYTES) & ~a) & HIGHBITS; +} + +/* + * Calculate the length and hash of the path component, and + * return the beginning of the next one (or the pointer to the + * final NUL character if none). + */ +static inline const char *hash_name(struct qstr *str, const char *name) +{ + unsigned long a, mask; + unsigned long hash = 0; + unsigned long len = 0; + + str->name = name; + a = 0; + len = -sizeof(unsigned long); + do { + hash = (hash + a) * 11; + len += sizeof(unsigned long); + a = *(unsigned long *)(name+len); + /* Do we have any NUL or '/' bytes in this word? */ + mask = has_zero(a) | has_zero(a ^ SLASHBYTES); + } while (!mask); + + /* The mask *below* the first high bit set */ + mask = (mask - 1) & ~mask; + mask >>= 7; + hash += a & mask; + str->hash = fold_hash(hash); + + /* Get the final path component length */ + len += ffz(mask) / 8; + str->len = len; + + /* remove trailing slashes */ + while (name[len] == '/') + len++; + + return name+len; +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1394,26 +1473,14 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - unsigned long hash; struct qstr this; - unsigned int c; int type; err = may_lookup(nd); if (err) break; - this.name = name; - c = *(const unsigned char *)name; - - hash = init_name_hash(); - do { - name++; - hash = partial_name_hash(c, hash); - c = *(const unsigned char *)name; - } while (c && (c != '/')); - this.len = name - (const char *) this.name; - this.hash = end_name_hash(hash); + name = hash_name(&this, name); type = LAST_NORM; if (this.name[0] == '.') switch (this.len) { @@ -1437,10 +1504,6 @@ static int link_path_walk(const char *name, struct nameidata *nd) } } - /* remove trailing slashes? */ - if (!c) - goto last_component; - while (*++name == '/'); if (!*name) goto last_component; @@ -1775,24 +1838,21 @@ static struct dentry *lookup_hash(struct nameidata *nd) struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned long hash; unsigned int c; WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); this.name = name; this.len = len; + this.hash = full_name_hash(name, len); if (!len) return ERR_PTR(-EACCES); - hash = init_name_hash(); while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return ERR_PTR(-EACCES); - hash = partial_name_hash(c, hash); } - this.hash = end_name_hash(hash); /* * See if the low-level filesystem might want * to use its own hash.. diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d64a55b23afd..79e77f9419ff 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -54,18 +54,41 @@ extern struct dentry_stat_t dentry_stat; static inline int dentry_cmp(const unsigned char *cs, size_t scount, const unsigned char *ct, size_t tcount) { - int ret; +#if 1 +/* Little-endian with fast unaligned accesses? */ + unsigned long a,b,mask; + + if (unlikely(scount != tcount)) + return 1; + + for (;;) { + a = *(unsigned long *)cs; + b = *(unsigned long *)ct; + if (tcount < sizeof(unsigned long)) + break; + if (unlikely(a != b)) + return 1; + cs += sizeof(unsigned long); + ct += sizeof(unsigned long); + tcount -= sizeof(unsigned long); + if (!tcount) + return 0; + } + mask = ~(~0ul << tcount*8); + return unlikely(!!((a ^ b) & mask)); +#else if (scount != tcount) return 1; + do { - ret = (*cs != *ct); - if (ret) - break; + if (*cs != *ct) + return 1; cs++; ct++; tcount--; } while (tcount); - return ret; + return 0; +#endif } /* Name hashing routines. Initial hash value */ @@ -89,14 +112,7 @@ static inline unsigned long end_name_hash(unsigned long hash) } /* Compute the hash for a name string. */ -static inline unsigned int -full_name_hash(const unsigned char *name, unsigned int len) -{ - unsigned long hash = init_name_hash(); - while (len--) - hash = partial_name_hash(*name++, hash); - return end_name_hash(hash); -} +extern unsigned int full_name_hash(const unsigned char *, unsigned int); /* * Try to keep struct dentry aligned on 64 byte cachelines (this will @@ -309,7 +325,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(struct dentry *, struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, +extern struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, unsigned *seq, struct inode **inode); /**