[PATCH 1/3] fuse: add numa mount option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



FUSE currently doesn't scale well on large NUMA systems. This is due to latency
in accessing fc->lock spinlock from different NUMA nodes. The fix is to
localize spinlock access within the NUMA node.

This patch adds 'numa' mount option. When 'numa' option is enabled FUSE groups
all queues and creates one set per each NUMA node. Users of /dev/fuse should
listen on /dev/fuse from all NUMA nodes.

By default numa is disabled.

Signed-off-by: Srinivas Eeda <srinivas.eeda@xxxxxxxxxx>
---
 fs/fuse/cuse.c   |    2 +-
 fs/fuse/fuse_i.h |    5 ++++-
 fs/fuse/inode.c  |   15 +++++++++++++--
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 6f96a8d..de10bdf 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -498,7 +498,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
 	if (!cc)
 		return -ENOMEM;
 
-	fuse_conn_init(&cc->fc);
+	fuse_conn_init(&cc->fc, 0);
 
 	INIT_LIST_HEAD(&cc->list);
 	cc->fc.release = cuse_fc_release;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6aeba86..dd9a7ad 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -353,6 +353,9 @@ struct fuse_conn {
 	/** Lock protecting accessess to  members of this structure */
 	spinlock_t lock;
 
+	/** tracks if numa enabled */
+	int numa_on;
+
 	/** Mutex protecting against directory alias creation */
 	struct mutex inst_mutex;
 
@@ -763,7 +766,7 @@ void fuse_conn_kill(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-void fuse_conn_init(struct fuse_conn *fc);
+void fuse_conn_init(struct fuse_conn *fc, int numaon);
 
 /**
  * Release reference to fuse_conn
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 137185c..1837f74 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -69,6 +69,7 @@ struct fuse_mount_data {
 	unsigned flags;
 	unsigned max_read;
 	unsigned blksize;
+	unsigned numaon;
 };
 
 struct fuse_forget_link *fuse_alloc_forget(void)
@@ -443,6 +444,7 @@ enum {
 	OPT_ALLOW_OTHER,
 	OPT_MAX_READ,
 	OPT_BLKSIZE,
+	OPT_NUMA_ON,
 	OPT_ERR
 };
 
@@ -455,6 +457,7 @@ static const match_table_t tokens = {
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_MAX_READ,			"max_read=%u"},
 	{OPT_BLKSIZE,			"blksize=%u"},
+	{OPT_NUMA_ON,			"numa"},
 	{OPT_ERR,			NULL}
 };
 
@@ -528,6 +531,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 			d->blksize = value;
 			break;
 
+		case OPT_NUMA_ON:
+			d->numaon = 1;
+			break;
+
 		default:
 			return 0;
 		}
@@ -555,16 +562,20 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
 		seq_printf(m, ",max_read=%u", fc->max_read);
 	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
 		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
+	if (fc->numa_on)
+		seq_puts(m, "numa");
 	return 0;
 }
 
-void fuse_conn_init(struct fuse_conn *fc)
+void fuse_conn_init(struct fuse_conn *fc, int numaon)
 {
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
 	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
+	if (numaon)
+		fc->numa_on = 1;
 	init_waitqueue_head(&fc->waitq);
 	init_waitqueue_head(&fc->blocked_waitq);
 	init_waitqueue_head(&fc->reserved_req_waitq);
@@ -1008,7 +1019,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!fc)
 		goto err_fput;
 
-	fuse_conn_init(fc);
+	fuse_conn_init(fc, d.numaon);
 
 	fc->dev = sb->s_dev;
 	fc->sb = sb;
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux