On Mon, 2021-06-21 at 22:46 +0100, David Howells wrote: > The current hash algorithm used for hashing cookie keys is really bad, > producing almost no dispersion (after a test kernel build, ~30000 files > were split over just 18 out of the 32768 hash buckets). > > Borrow the full_name_hash() hash function into fscache to do the hashing > for cookie keys and, in the future, volume keys. > > I don't want to use full_name_hash() as-is because I want the hash value to > be consistent across arches and over time as the hash value produced may > get used on disk. > > I can also optimise parts of it away as the key will always be a padded > array of aligned 32-bit words. > > Signed-off-by: David Howells <dhowells@xxxxxxxxxx> > --- > What happens when this patch encounters a cache that was built before it? Do you need to couple this with some sort of global cache invalidation or rehashing event? > fs/fscache/cookie.c | 14 +------------- > fs/fscache/internal.h | 2 ++ > fs/fscache/main.c | 39 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 42 insertions(+), 13 deletions(-) > > diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c > index ec9bce33085f..2558814193e9 100644 > --- a/fs/fscache/cookie.c > +++ b/fs/fscache/cookie.c > @@ -87,10 +87,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie) > static int fscache_set_key(struct fscache_cookie *cookie, > const void *index_key, size_t index_key_len) > { > - unsigned long long h; > u32 *buf; > int bufs; > - int i; > > bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); > > @@ -104,17 +102,7 @@ static int fscache_set_key(struct fscache_cookie *cookie, > } > > memcpy(buf, index_key, index_key_len); > - > - /* Calculate a hash and combine this with the length in the first word > - * or first half word > - */ > - h = (unsigned long)cookie->parent; > - h += index_key_len + cookie->type; > - > - for (i = 0; i < bufs; i++) > - h += buf[i]; > - > - cookie->key_hash = h ^ (h >> 32); > + cookie->key_hash = fscache_hash(0, buf, bufs); > return 0; > } > > diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h > index 200082cafdda..a49136c63e4b 100644 > --- a/fs/fscache/internal.h > +++ b/fs/fscache/internal.h > @@ -74,6 +74,8 @@ extern struct workqueue_struct *fscache_object_wq; > extern struct workqueue_struct *fscache_op_wq; > DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); > > +extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n); > + > static inline bool fscache_object_congested(void) > { > return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); > diff --git a/fs/fscache/main.c b/fs/fscache/main.c > index c1e6cc9091aa..4207f98e405f 100644 > --- a/fs/fscache/main.c > +++ b/fs/fscache/main.c > @@ -93,6 +93,45 @@ static struct ctl_table fscache_sysctls_root[] = { > }; > #endif > > +/* > + * Mixing scores (in bits) for (7,20): > + * Input delta: 1-bit 2-bit > + * 1 round: 330.3 9201.6 > + * 2 rounds: 1246.4 25475.4 > + * 3 rounds: 1907.1 31295.1 > + * 4 rounds: 2042.3 31718.6 > + * Perfect: 2048 31744 > + * (32*64) (32*31/2 * 64) > + */ > +#define HASH_MIX(x, y, a) \ > + ( x ^= (a), \ > + y ^= x, x = rol32(x, 7),\ > + x += y, y = rol32(y,20),\ > + y *= 9 ) > + > +static inline unsigned int fold_hash(unsigned long x, unsigned long y) > +{ > + /* Use arch-optimized multiply if one exists */ > + return __hash_32(y ^ __hash_32(x)); > +} > + > +/* > + * Generate a hash. This is derived from full_name_hash(), but we want to be > + * sure it is arch independent and that it doesn't change as bits of the > + * computed hash value might appear on disk. The caller also guarantees that > + * the hashed data will be a series of aligned 32-bit words. > + */ > +unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n) > +{ > + unsigned int a, x = 0, y = salt; > + > + for (; n; n--) { > + a = *data++; > + HASH_MIX(x, y, a); > + } > + return fold_hash(x, y); > +} > + > /* > * initialise the fs caching module > */ > > -- Jeff Layton <jlayton@xxxxxxxxxx>