FUSE currently doesn't scale well on large NUMA systems. This is due to latency in accessing fc->lock spinlock from different NUMA nodes. The fix is to localize spinlock access within the NUMA node. This patch adds 'numa' mount option. When 'numa' option is enabled FUSE groups all queues and creates one set per each NUMA node. Users of /dev/fuse should listen on /dev/fuse from all NUMA nodes. By default numa is disabled. Signed-off-by: Srinivas Eeda <srinivas.eeda@xxxxxxxxxx> --- fs/fuse/cuse.c | 2 +- fs/fuse/fuse_i.h | 5 ++++- fs/fuse/inode.c | 15 +++++++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 6f96a8d..de10bdf 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -498,7 +498,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) if (!cc) return -ENOMEM; - fuse_conn_init(&cc->fc); + fuse_conn_init(&cc->fc, 0); INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6aeba86..dd9a7ad 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -353,6 +353,9 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; + /** tracks if numa enabled */ + int numa_on; + /** Mutex protecting against directory alias creation */ struct mutex inst_mutex; @@ -763,7 +766,7 @@ void fuse_conn_kill(struct fuse_conn *fc); /** * Initialize fuse_conn */ -void fuse_conn_init(struct fuse_conn *fc); +void fuse_conn_init(struct fuse_conn *fc, int numaon); /** * Release reference to fuse_conn diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 137185c..1837f74 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -69,6 +69,7 @@ struct fuse_mount_data { unsigned flags; unsigned max_read; unsigned blksize; + unsigned numaon; }; struct fuse_forget_link *fuse_alloc_forget(void) @@ -443,6 +444,7 @@ enum { OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, + OPT_NUMA_ON, OPT_ERR }; @@ -455,6 +457,7 @@ static const match_table_t tokens = { {OPT_ALLOW_OTHER, "allow_other"}, {OPT_MAX_READ, "max_read=%u"}, {OPT_BLKSIZE, "blksize=%u"}, + {OPT_NUMA_ON, "numa"}, {OPT_ERR, NULL} }; @@ -528,6 +531,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) d->blksize = value; break; + case OPT_NUMA_ON: + d->numaon = 1; + break; + default: return 0; } @@ -555,16 +562,20 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",max_read=%u", fc->max_read); if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) seq_printf(m, ",blksize=%lu", sb->s_blocksize); + if (fc->numa_on) + seq_puts(m, "numa"); return 0; } -void fuse_conn_init(struct fuse_conn *fc) +void fuse_conn_init(struct fuse_conn *fc, int numaon) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); + if (numaon) + fc->numa_on = 1; init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); @@ -1008,7 +1019,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!fc) goto err_fput; - fuse_conn_init(fc); + fuse_conn_init(fc, d.numaon); fc->dev = sb->s_dev; fc->sb = sb; -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html