On Sat, Mar 09, 2024 at 12:14:23PM +0800, Hou Tao wrote: > Hi, > > On 2/29/2024 11:01 PM, Brian Foster wrote: > > On Wed, Feb 28, 2024 at 10:41:24PM +0800, Hou Tao wrote: > >> From: Hou Tao <houtao1@xxxxxxxxxx> > >> > >> When reading a file kept in virtiofs from kernel (e.g., insmod a kernel > >> module), if the cache of virtiofs is disabled, the read buffer will be > >> passed to virtiofs through out_args[0].value instead of pages. Because > >> virtiofs can't get the pages for the read buffer, virtio_fs_argbuf_new() > >> will create a bounce buffer for the read buffer by using kmalloc() and > >> copy the read buffer into bounce buffer. If the read buffer is large > >> (e.g., 1MB), the allocation will incur significant stress on the memory > >> subsystem. > >> > >> So instead of allocating bounce buffer by using kmalloc(), allocate a > >> bounce buffer which is backed by scattered pages. The original idea is > >> to use vmap(), but the use of GFP_ATOMIC is no possible for vmap(). To > >> simplify the copy operations in the bounce buffer, use a bio_vec flex > >> array to represent the argbuf. Also add an is_flat field in struct > >> virtio_fs_argbuf to distinguish between kmalloc-ed and scattered bounce > >> buffer. > >> > >> Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> > >> --- > >> fs/fuse/virtio_fs.c | 163 ++++++++++++++++++++++++++++++++++++++++---- > >> 1 file changed, 149 insertions(+), 14 deletions(-) > >> > >> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c > >> index f10fff7f23a0f..ffea684bd100d 100644 > >> --- a/fs/fuse/virtio_fs.c > >> +++ b/fs/fuse/virtio_fs.c > > ... > >> @@ -408,42 +425,143 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) > >> } > >> } > >> > > ... > >> static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf, > >> unsigned int offset, > >> const void *src, unsigned int len) > >> { > >> - memcpy(argbuf->buf + offset, src, len); > >> + struct iov_iter iter; > >> + unsigned int copied; > >> + > >> + if (argbuf->is_flat) { > >> + memcpy(argbuf->f.buf + offset, src, len); > >> + return; > >> + } > >> + > >> + iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec, > >> + argbuf->s.nr, argbuf->s.size); > >> + iov_iter_advance(&iter, offset); > > Hi Hou, > > > > Just a random comment, but it seems a little inefficient to reinit and > > readvance the iter like this on every copy/call. It looks like offset is > > already incremented in the callers of the argbuf copy helpers. Perhaps > > iov_iter could be lifted into the callers and passed down, or even just > > include it in the argbuf structure and init it at alloc time? > > Sorry for the late reply. Being busy with off-site workshop these days. > No worries. > I have tried a similar idea before in which iov_iter was saved directly > in argbuf struct, but it didn't work out well. The reason is that for > copy both in_args and out_args, an iov_iter is needed, but the direction > is different. Currently the bi-directional io_vec is not supported, so > the code have to initialize the iov_iter twice: one for copy from > in_args and another one for copy to out_args. > Ok, seems reasonable enough. > For dio read initiated from kernel, both of its in_numargs and > out_numargs is 1, so there will be only one iov_iter_advance() in > virtio_fs_argbuf_copy_to_out_arg() and the offset is 64, so I think the > overhead will be fine. For dio write initiated from kernel, its > in_numargs is 2 and out_numargs is 1, so there will be two invocations > of iov_iter_advance(). The first one with offset=64, and the another one > with offset=round_up_page_size(64 + write_size), so the later one may > introduce extra overhead. But compared with the overhead of data copy, I > still think the overhead of calling iov_iter_advance() is fine. > I'm not claiming the overhead is some practical problem here, but rather that we shouldn't need to be concerned with it in the first place with some cleaner code. It's been a bit since I first looked at this. I was originally wondering about defining the iov_iter in the caller and pass as a param, but on another look, do the lowest level helpers really need to exist? If you don't anticipate further users, IMO something like the diff below is a bit more clean (compile tested only, but no reinits and less code overall). But that's just my .02, feel free to use it or not.. Brian --- 8< --- diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 9ee71051c89f..9de477e9ccd5 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -544,26 +544,6 @@ static unsigned int virtio_fs_argbuf_setup_sg(struct virtio_fs_argbuf *argbuf, return cur - sg; } -static void virtio_fs_argbuf_copy_from_in_arg(struct virtio_fs_argbuf *argbuf, - unsigned int offset, - const void *src, unsigned int len) -{ - struct iov_iter iter; - unsigned int copied; - - if (argbuf->is_flat) { - memcpy(argbuf->f.buf + offset, src, len); - return; - } - - iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec, - argbuf->s.nr, argbuf->s.size); - iov_iter_advance(&iter, offset); - - copied = _copy_to_iter(src, len, &iter); - WARN_ON_ONCE(copied != len); -} - static unsigned int virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf, const struct fuse_args *args) @@ -577,26 +557,6 @@ virtio_fs_argbuf_out_args_offset(struct virtio_fs_argbuf *argbuf, return round_up(offset, PAGE_SIZE); } -static void virtio_fs_argbuf_copy_to_out_arg(struct virtio_fs_argbuf *argbuf, - unsigned int offset, void *dst, - unsigned int len) -{ - struct iov_iter iter; - unsigned int copied; - - if (argbuf->is_flat) { - memcpy(dst, argbuf->f.buf + offset, len); - return; - } - - iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec, - argbuf->s.nr, argbuf->s.size); - iov_iter_advance(&iter, offset); - - copied = _copy_from_iter(dst, len, &iter); - WARN_ON_ONCE(copied != len); -} - /* * Returns 1 if queue is full and sender should wait a bit before sending * next request, 0 otherwise. @@ -684,23 +644,41 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) static void copy_args_to_argbuf(struct fuse_req *req) { struct fuse_args *args = req->args; + struct virtio_fs_argbuf *argbuf = req->argbuf; + struct iov_iter iter; + unsigned int copied; unsigned int offset = 0; unsigned int num_in; unsigned int i; + if (!argbuf->is_flat) { + iov_iter_bvec(&iter, ITER_DEST, argbuf->s.bvec, argbuf->s.nr, + argbuf->s.size); + } + num_in = args->in_numargs - args->in_pages; for (i = 0; i < num_in; i++) { - virtio_fs_argbuf_copy_from_in_arg(req->argbuf, offset, - args->in_args[i].value, - args->in_args[i].size); - offset += args->in_args[i].size; + const void *src = args->in_args[i].value; + unsigned int len = args->in_args[i].size; + + if (argbuf->is_flat) { + memcpy(argbuf->f.buf + offset, src, len); + offset += len; + continue; + } + + iov_iter_advance(&iter, len); + copied = _copy_to_iter(src, len, &iter); + WARN_ON_ONCE(copied != len); } } /* Copy args out of req->argbuf */ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) { - struct virtio_fs_argbuf *argbuf; + struct virtio_fs_argbuf *argbuf = req->argbuf; + struct iov_iter iter; + unsigned int copied; unsigned int remaining; unsigned int offset; unsigned int num_out; @@ -711,10 +689,14 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) if (!num_out) goto out; - argbuf = req->argbuf; + if (!argbuf->is_flat) + iov_iter_bvec(&iter, ITER_SOURCE, argbuf->s.bvec, + argbuf->s.nr, argbuf->s.size); + offset = virtio_fs_argbuf_out_args_offset(argbuf, args); for (i = 0; i < num_out; i++) { unsigned int argsize = args->out_args[i].size; + void *dst = args->out_args[i].value; if (args->out_argvar && i == args->out_numargs - 1 && @@ -722,10 +704,14 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) argsize = remaining; } - virtio_fs_argbuf_copy_to_out_arg(argbuf, offset, - args->out_args[i].value, - argsize); - offset += argsize; + if (argbuf->is_flat) { + memcpy(dst, argbuf->f.buf + offset, argsize); + offset += argsize; + } else { + iov_iter_advance(&iter, argsize); + copied = _copy_from_iter(dst, argsize, &iter); + WARN_ON_ONCE(copied != argsize); + } if (i != args->out_numargs - 1) remaining -= argsize;