Re: [PATCH -v2 2/6] fuse: make the maximum read/write request size tunable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mitsuo Hayasaka <mitsuo.hayasaka.hu@xxxxxxxxxxx> writes:

> Make the maximum read/write request size tunable between
> 32 pages and the number of pages equivalent to pipe_max_size.
> The max_read/max_write mount options affect the size. The
> 32 pages are used by default without these options.
>
> Currently, the maximum read/write request size is limited to
> FUSE_MAX_PAGES_PER_REQ which is equal to 32 pages. It is
> required to change it in order to maximize the throughput
> since the optimized value depends on various factors such as
> type and version of local filesystems used and hardware specs,
> etc.
>
> In addition, recently FUSE is widely used as a gateway to
> connect cloud storage services and distributed filesystems.
> Larger data might be stored in them over networking via FUSE
> and the overhead might affect the read/write throughput.
>
> So, a tunable functionality of read/write request size is
> useful.

One problem with this patch is that one request will use more memory
even by default (to be more precise 50% more on 64bit since
sizeof(struct fuse_request) is currently 608 bytes, six of which fit in
a single page slab, so one request uses 4096/6 bytes.  After this patch
requests will use 1024 bytes).

One idea that I already mentioned is to allocate the page array
separately (possibly adding a small array inline, but I suspect that
doesn't make much sense).  And allocate the page array only for requests
that actually need it (i.e. read, write and friends).  That way the
memory use of requests would actually decrease significantly, not
increase.

Hmm?

Thanks,
Miklos

>
> Signed-off-by: Mitsuo Hayasaka <mitsuo.hayasaka.hu@xxxxxxxxxxx>
> Cc: Miklos Szeredi <miklos@xxxxxxxxxx>
> Cc: Nikolaus Rath <Nikolaus@xxxxxxxx>
> Cc: Liu Yuan <namei.unix@xxxxxxxxx>
> Cc: Has-Wen Nienhuys <hanwen@xxxxxxxxx>
> ---
>
>  fs/fuse/dev.c    |   27 ++++++++++++++-------------
>  fs/fuse/file.c   |   32 +++++++++++++++++---------------
>  fs/fuse/fuse_i.h |   27 +++++++++++++++++----------
>  fs/fuse/inode.c  |   42 +++++++++++++++++++++++++++++++++++-------
>  4 files changed, 83 insertions(+), 45 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 7df2b5e..511560b 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -34,35 +34,36 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
>  	return file->private_data;
>  }
>  
> -static void fuse_request_init(struct fuse_req *req)
> +static void fuse_request_init(struct fuse_conn *fc, struct fuse_req *req)
>  {
> -	memset(req, 0, sizeof(*req));
> +	memset(req, 0, fc->fuse_req_size);
>  	INIT_LIST_HEAD(&req->list);
>  	INIT_LIST_HEAD(&req->intr_entry);
>  	init_waitqueue_head(&req->waitq);
>  	atomic_set(&req->count, 1);
>  }
>  
> -struct fuse_req *fuse_request_alloc(void)
> +struct fuse_req *fuse_request_alloc(struct fuse_conn *fc)
>  {
> -	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
> +	struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_KERNEL);
> +
>  	if (req)
> -		fuse_request_init(req);
> +		fuse_request_init(fc, req);
>  	return req;
>  }
>  EXPORT_SYMBOL_GPL(fuse_request_alloc);
>  
> -struct fuse_req *fuse_request_alloc_nofs(void)
> +struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc)
>  {
> -	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
> +	struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_NOFS);
>  	if (req)
> -		fuse_request_init(req);
> +		fuse_request_init(fc, req);
>  	return req;
>  }
>  
>  void fuse_request_free(struct fuse_req *req)
>  {
> -	kmem_cache_free(fuse_req_cachep, req);
> +	kfree(req);
>  }
>  
>  static void block_sigs(sigset_t *oldset)
> @@ -116,7 +117,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
>  	if (!fc->connected)
>  		goto out;
>  
> -	req = fuse_request_alloc();
> +	req = fuse_request_alloc(fc);
>  	err = -ENOMEM;
>  	if (!req)
>  		goto out;
> @@ -166,7 +167,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
>  	struct fuse_file *ff = file->private_data;
>  
>  	spin_lock(&fc->lock);
> -	fuse_request_init(req);
> +	fuse_request_init(fc, req);
>  	BUG_ON(ff->reserved_req);
>  	ff->reserved_req = req;
>  	wake_up_all(&fc->reserved_req_waitq);
> @@ -193,7 +194,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
>  
>  	atomic_inc(&fc->num_waiting);
>  	wait_event(fc->blocked_waitq, !fc->blocked);
> -	req = fuse_request_alloc();
> +	req = fuse_request_alloc(fc);
>  	if (!req)
>  		req = get_reserved_req(fc, file);
>  
> @@ -1564,7 +1565,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
>  	else if (outarg->offset + num > file_size)
>  		num = file_size - outarg->offset;
>  
> -	while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
> +	while (num && req->num_pages < fc->max_pages) {
>  		struct page *page;
>  		unsigned int this_num;
>  
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index b321a68..7b96b00 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
>  		return NULL;
>  
>  	ff->fc = fc;
> -	ff->reserved_req = fuse_request_alloc();
> +	ff->reserved_req = fuse_request_alloc(fc);
>  	if (unlikely(!ff->reserved_req)) {
>  		kfree(ff);
>  		return NULL;
> @@ -653,7 +653,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
>  	fuse_wait_on_page_writeback(inode, page->index);
>  
>  	if (req->num_pages &&
> -	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
> +	    (req->num_pages == fc->max_pages ||
>  	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
>  	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
>  		fuse_send_readpages(req, data->file);
> @@ -866,7 +866,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
>  		if (!fc->big_writes)
>  			break;
>  	} while (iov_iter_count(ii) && count < fc->max_write &&
> -		 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
> +		 req->num_pages < fc->max_pages && offset == 0);
>  
>  	return count > 0 ? count : err;
>  }
> @@ -1020,8 +1020,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
>  	}
>  }
>  
> -static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
> -			       size_t *nbytesp, int write)
> +static int fuse_get_user_pages(struct fuse_conn *fc, struct fuse_req *req,
> +			       const char __user *buf, size_t *nbytesp,
> +			       int write)
>  {
>  	size_t nbytes = *nbytesp;
>  	unsigned long user_addr = (unsigned long) buf;
> @@ -1038,9 +1039,9 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
>  		return 0;
>  	}
>  
> -	nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
> +	nbytes = min_t(size_t, nbytes, fc->max_pages << PAGE_SHIFT);
>  	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
> -	npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
> +	npages = clamp(npages, 1, (int)fc->max_pages);
>  	npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
>  	if (npages < 0)
>  		return npages;
> @@ -1077,7 +1078,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
>  		size_t nres;
>  		fl_owner_t owner = current->files;
>  		size_t nbytes = min(count, nmax);
> -		int err = fuse_get_user_pages(req, buf, &nbytes, write);
> +		int err = fuse_get_user_pages(fc, req, buf, &nbytes, write);
>  		if (err) {
>  			res = err;
>  			break;
> @@ -1269,7 +1270,7 @@ static int fuse_writepage_locked(struct page *page)
>  
>  	set_page_writeback(page);
>  
> -	req = fuse_request_alloc_nofs();
> +	req = fuse_request_alloc_nofs(fc);
>  	if (!req)
>  		goto err;
>  
> @@ -1695,10 +1696,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
>  }
>  
>  /* Make sure iov_length() won't overflow */
> -static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
> +static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
> +				 size_t count)
>  {
>  	size_t n;
> -	u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
> +	u32 max = fc->max_pages << PAGE_SHIFT;
>  
>  	for (n = 0; n < count; n++) {
>  		if (iov->iov_len > (size_t) max)
> @@ -1821,7 +1823,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
>  	BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
>  
>  	err = -ENOMEM;
> -	pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
> +	pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
>  	iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
>  	if (!pages || !iov_page)
>  		goto out;
> @@ -1860,7 +1862,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
>  
>  	/* make sure there are enough buffer pages and init request with them */
>  	err = -ENOMEM;
> -	if (max_pages > FUSE_MAX_PAGES_PER_REQ)
> +	if (max_pages > fc->max_pages)
>  		goto out;
>  	while (num_pages < max_pages) {
>  		pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
> @@ -1943,11 +1945,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
>  		in_iov = iov_page;
>  		out_iov = in_iov + in_iovs;
>  
> -		err = fuse_verify_ioctl_iov(in_iov, in_iovs);
> +		err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
>  		if (err)
>  			goto out;
>  
> -		err = fuse_verify_ioctl_iov(out_iov, out_iovs);
> +		err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
>  		if (err)
>  			goto out;
>  
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 771fb63..46df615 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -22,9 +22,10 @@
>  #include <linux/rbtree.h>
>  #include <linux/poll.h>
>  #include <linux/workqueue.h>
> +#include <linux/pipe_fs_i.h>
>  
> -/** Max number of pages that can be used in a single read request */
> -#define FUSE_MAX_PAGES_PER_REQ 32
> +/** Default number of pages that can be used in a single read/write request */
> +#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
>  
>  /** Bias for fi->writectr, meaning new writepages must not be sent */
>  #define FUSE_NOWRITE INT_MIN
> @@ -290,12 +291,6 @@ struct fuse_req {
>  		struct fuse_lk_in lk_in;
>  	} misc;
>  
> -	/** page vector */
> -	struct page *pages[FUSE_MAX_PAGES_PER_REQ];
> -
> -	/** number of pages in vector */
> -	unsigned num_pages;
> -
>  	/** offset of data on first page */
>  	unsigned page_offset;
>  
> @@ -313,6 +308,12 @@ struct fuse_req {
>  
>  	/** Request is stolen from fuse_file->reserved_req */
>  	struct file *stolen_file;
> +
> +	/** number of pages in vector */
> +	unsigned num_pages;
> +
> +	/** page vector */
> +	struct page *pages[0];
>  };
>  
>  /**
> @@ -347,6 +348,12 @@ struct fuse_conn {
>  	/** Maximum write size */
>  	unsigned max_write;
>  
> +	/** Maximum number of pages per req */
> +	unsigned max_pages;
> +
> +	/** fuse_req size per connection */
> +	unsigned fuse_req_size;
> +
>  	/** Readers of the connection are waiting on this */
>  	wait_queue_head_t waitq;
>  
> @@ -655,9 +662,9 @@ void fuse_ctl_cleanup(void);
>  /**
>   * Allocate a request
>   */
> -struct fuse_req *fuse_request_alloc(void);
> +struct fuse_req *fuse_request_alloc(struct fuse_conn *fc);
>  
> -struct fuse_req *fuse_request_alloc_nofs(void);
> +struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc);
>  
>  /**
>   * Free a request
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 1cd6165..f7f3c5d 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -550,6 +550,9 @@ void fuse_conn_init(struct fuse_conn *fc)
>  	atomic_set(&fc->num_waiting, 0);
>  	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
>  	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
> +	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
> +	fc->fuse_req_size = sizeof(struct fuse_req) +
> +			    fc->max_pages * sizeof(struct page *);
>  	fc->khctr = 0;
>  	fc->polled_files = RB_ROOT;
>  	fc->reqctr = 0;
> @@ -774,6 +777,18 @@ static int set_global_limit(const char *val, struct kernel_param *kp)
>  	return 0;
>  }
>  
> +static void set_conn_max_pages(struct fuse_conn *fc, unsigned max_pages)
> +{
> +	unsigned pipe_max_size = pipe_get_max_size();
> +	unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
> +
> +	if (max_pages > fc->max_pages) {
> +		fc->max_pages = min_t(unsigned, pipe_max_pages, max_pages);
> +		fc->fuse_req_size = sizeof(struct fuse_req) +
> +				    fc->max_pages * sizeof(struct page *);
> +	}
> +}
> +
>  static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
>  {
>  	int cap_sys_admin = capable(CAP_SYS_ADMIN);
> @@ -807,6 +822,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
>  		fc->conn_error = 1;
>  	else {
>  		unsigned long ra_pages;
> +		unsigned max_pages;
>  
>  		process_init_limits(fc, arg);
>  
> @@ -844,6 +860,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
>  		fc->minor = arg->minor;
>  		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
>  		fc->max_write = max_t(unsigned, 4096, fc->max_write);
> +		max_pages = DIV_ROUND_UP(fc->max_write, PAGE_SIZE);
> +		set_conn_max_pages(fc, max_pages);
>  		fc->conn_init = 1;
>  	}
>  	fc->blocked = 0;
> @@ -880,6 +898,20 @@ static void fuse_free_conn(struct fuse_conn *fc)
>  	kfree(fc);
>  }
>  
> +static void fuse_conn_setup(struct fuse_conn *fc,
> +			    struct fuse_mount_data *d)
> +{
> +	unsigned max_pages;
> +
> +	fc->release = fuse_free_conn;
> +	fc->flags = d->flags;
> +	fc->user_id = d->user_id;
> +	fc->group_id = d->group_id;
> +	fc->max_read = max_t(unsigned, 4096, d->max_read);
> +	max_pages = DIV_ROUND_UP(fc->max_read, PAGE_SIZE);
> +	set_conn_max_pages(fc, max_pages);
> +}
> +
>  static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
>  {
>  	int err;
> @@ -986,11 +1018,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>  		fc->dont_mask = 1;
>  	sb->s_flags |= MS_POSIXACL;
>  
> -	fc->release = fuse_free_conn;
> -	fc->flags = d.flags;
> -	fc->user_id = d.user_id;
> -	fc->group_id = d.group_id;
> -	fc->max_read = max_t(unsigned, 4096, d.max_read);
> +	fuse_conn_setup(fc, &d);
>  
>  	/* Used by get_root_inode() */
>  	sb->s_fs_info = fc;
> @@ -1003,12 +1031,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>  	/* only now - we want root dentry with NULL ->d_op */
>  	sb->s_d_op = &fuse_dentry_operations;
>  
> -	init_req = fuse_request_alloc();
> +	init_req = fuse_request_alloc(fc);
>  	if (!init_req)
>  		goto err_put_root;
>  
>  	if (is_bdev) {
> -		fc->destroy_req = fuse_request_alloc();
> +		fc->destroy_req = fuse_request_alloc(fc);
>  		if (!fc->destroy_req)
>  			goto err_free_init_req;
>  	}
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux