The patch titled knfsd: make nfsd readahead params cache SMP-friendly has been added to the -mm tree. Its filename is knfsd-make-nfsd-readahead-params-cache-smp-friendly.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: knfsd: make nfsd readahead params cache SMP-friendly From: Greg Banks <gnb@xxxxxxxxxxxxxxxxx> Make the nfsd read-ahead params cache more SMP-friendly by changing the single global list and lock into a fixed 16-bucket hashtable with per-bucket locks. This reduces spinlock contention in nfsd_read() on read-heavy workloads on multiprocessor servers. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate. The server had 128 nfsd threads, which sizes the RA cache at 256 entries, of which only a handful were used. Flat profiling shows nfsd_read(), including the inlined nfsd_get_raparms(), taking 10.4% of each CPU. This patch drops the contribution from nfsd() to 1.71% for each CPU. Signed-off-by: Greg Banks <gnb@xxxxxxxxxxxxxxxxx> Signed-off-by: Neil Brown <neilb@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- fs/nfsd/vfs.c | 60 +++++++++++++++++++++++++++++++++++------------- 1 files changed, 44 insertions(+), 16 deletions(-) diff -puN fs/nfsd/vfs.c~knfsd-make-nfsd-readahead-params-cache-smp-friendly fs/nfsd/vfs.c --- a/fs/nfsd/vfs.c~knfsd-make-nfsd-readahead-params-cache-smp-friendly +++ a/fs/nfsd/vfs.c @@ -54,6 +54,7 @@ #include <linux/nfsd_idmap.h> #include <linux/security.h> #endif /* CONFIG_NFSD_V4 */ +#include <linux/jhash.h> #include <asm/uaccess.h> @@ -81,10 +82,19 @@ struct raparms { dev_t p_dev; int p_set; struct file_ra_state p_ra; + unsigned int p_hindex; }; +struct raparm_hbucket { + struct raparms *pb_head; + spinlock_t pb_lock; +} ____cacheline_aligned_in_smp; + static struct raparms * raparml; -static struct raparms * raparm_cache; +#define RAPARM_HASH_BITS 4 +#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) +#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) +static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed @@ -743,16 +753,20 @@ nfsd_sync_dir(struct dentry *dp) * Obtain the readahead parameters for the file * specified by (dev, ino). */ -static DEFINE_SPINLOCK(ra_lock); static inline struct raparms * nfsd_get_raparms(dev_t dev, ino_t ino) { struct raparms *ra, **rap, **frap = NULL; int depth = 0; + unsigned int hash; + struct raparm_hbucket *rab; + + hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; + rab = &raparm_hash[hash]; - spin_lock(&ra_lock); - for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { + spin_lock(&rab->pb_lock); + for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { if (ra->p_ino == ino && ra->p_dev == dev) goto found; depth++; @@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) } depth = nfsdstats.ra_size*11/10; if (!frap) { - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return NULL; } rap = frap; @@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ra->p_dev = dev; ra->p_ino = ino; ra->p_set = 0; + ra->p_hindex = hash; found: - if (rap != &raparm_cache) { + if (rap != &rab->pb_head) { *rap = ra->p_next; - ra->p_next = raparm_cache; - raparm_cache = ra; + ra->p_next = rab->pb_head; + rab->pb_head = ra; } ra->p_count++; nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return ra; } @@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st /* Write back readahead params */ if (ra) { - spin_lock(&ra_lock); + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + spin_lock(&rab->pb_lock); ra->p_ra = file->f_ra; ra->p_set = 1; ra->p_count--; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); } if (err >= 0) { @@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, void nfsd_racache_shutdown(void) { - if (!raparm_cache) + if (!raparml) return; dprintk("nfsd: freeing readahead buffers.\n"); kfree(raparml); - raparm_cache = raparml = NULL; + raparml = NULL; } /* * Initialize readahead param cache @@ -1846,19 +1862,31 @@ int nfsd_racache_init(int cache_size) { int i; + int j = 0; + int nperbucket; - if (raparm_cache) + + if (raparml) return 0; + if (cache_size < 2*RAPARM_HASH_SIZE) + cache_size = 2*RAPARM_HASH_SIZE; raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); if (raparml != NULL) { dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); + for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { + raparm_hash[i].pb_head = NULL; + spin_lock_init(&raparm_hash[i].pb_lock); + } + nperbucket = cache_size >> RAPARM_HASH_BITS; memset(raparml, 0, sizeof(struct raparms) * cache_size); for (i = 0; i < cache_size - 1; i++) { - raparml[i].p_next = raparml + i + 1; + if (i % nperbucket == 0) + raparm_hash[j++].pb_head = raparml + i; + if (i % nperbucket < nperbucket-1) + raparml[i].p_next = raparml + i + 1; } - raparm_cache = raparml; } else { printk(KERN_WARNING "nfsd: Could not allocate memory read-ahead cache.\n"); _ Patches currently in -mm which might be from gnb@xxxxxxxxxxxxxxxxx are cpumask-add-highest_possible_node_id.patch cpumask-export-cpu_online_map-and-cpu_possible_map.patch cpumask-export-node_to_cpu_mask-consistently.patch knfsd-knfsd-add-some-missing-newlines-in-printks.patch knfsd-knfsd-remove-an-unused-variable-from-e_show.patch knfsd-knfsd-remove-an-unused-variable-from-auth_unix_lookup.patch knfsd-use-seq_start_token-instead-of-hardcoded-magic-void1.patch knfsd-move-tempsock-aging-to-a-timer.patch knfsd-move-tempsock-aging-to-a-timer-tidy.patch knfsd-convert-sk_inuse-to-atomic_t.patch knfsd-use-new-lock-for-svc_sock-deferred-list.patch knfsd-convert-sk_reserved-to-atomic_t.patch knfsd-test-and-set-sk_busy-atomically.patch knfsd-split-svc_serv-into-pools.patch knfsd-add-svc_get.patch knfsd-add-svc_set_num_threads.patch knfsd-use-svc_set_num_threads-to-manage-threads-in-knfsd.patch knfsd-make-rpc-threads-pools-numa-aware.patch knfsd-make-rpc-threads-pools-numa-aware-fix.patch knfsd-allow-admin-to-set-nthreads-per-node.patch knfsd-prepare-knfsd-for-support-of-rsize-wsize-of-up-to-1mb-over-tcp.patch knfsd-make-nfsd-readahead-params-cache-smp-friendly.patch knfsd-knfsd-cache-ipmap-per-tcp-socket.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html