On Mon, Aug 5, 2019 at 12:14 PM Geert Custers <geert.aj.custers@xxxxxxxxx> wrote: > > Hello, > > I recently wrote a fuse driver for a custom on-disk file system. As far > as I could tell, the documentation didn't have an explicit example for > how to deal with filesystem that are on disk, most examples centered > around a very simple in memory "filesystem". The way I do it now is > by calling open() on the .img on init and fread()'ing and fwrite()'ing > to it (with some caching to make it faster). I'm not sure this is the > proper way to implement something like this, but I'll assume it is. > > Implementing the file system I noticed that when performing fuse read()s > and write()s that I do a lot of unneeded copying. Right now I fread() > into the buffer passed to the read() function, but as far as I can tell > this buffer is then copied from the fuse server to kernel space where it > is copied back to the user program. A more natural way (the way I see it) > would be a mechanism by which I could tell the kernel "read from fd 4 > 512 bytes starting at position 0x1000" for example. Then the whole > operation involves only one copy operation. Reading around I have seen > some ideas around this, but as far as I could tell this isn't actively > being worked on... So my question is if there are any plans to implement a > zero-copy system for fuse. Actually it is being worked on. Attaching the current proof-of-concept kernel patch for this. I don't have a patch for libfuse yet, as I'm testing new ideas with a dummy filesystem that does raw /dev/fuse access. Also attached, needs to be run with "-m" to enable the file mapping mode. To make this more useful, the kernel would need to cache the mapping, so it doesn't need to issue a MAP request on each read. That would also optimize the case of long extents, or files mirrored completely from an underlying filesystem (as done by the test program). Thanks, Miklos
From a90a38e4700fbf0e8f73ce19cb6dfe30db5902f2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi@xxxxxxxxxx> Date: Mon, 5 Aug 2019 13:44:59 +0200 Subject: [PATCH] fuse: add map request Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> --- fs/fuse/Makefile | 2 +- fs/fuse/dev.c | 23 ++++++++++++++ fs/fuse/file.c | 64 +++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 5 +++ fs/fuse/map.c | 58 +++++++++++++++++++++++++++++++++++ include/uapi/linux/fuse.h | 10 ++++++ 6 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 fs/fuse/map.c diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 9485019c2a14..7e110c77d553 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o +fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o map.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ea8237513dfa..ed64ce383b11 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2306,6 +2306,26 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) return 0; } +static long fuse_dev_map_ioctl(unsigned int cmd, unsigned long arg) +{ + struct file *file; + + switch (cmd) { + case FUSE_DEV_IOC_MAP_OPEN: + file = fget(arg); + if (!file) + return -EBADF; + return fuse_map_open(file); + + case FUSE_DEV_IOC_MAP_CLOSE: + return fuse_map_close(arg); + + default: + return -ENOTTY; + } + +} + static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2338,7 +2358,10 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, fput(old); } } + } else { + err = fuse_dev_map_ioctl(cmd, arg); } + return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5ae2828beb00..6413f41cd2ac 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1481,6 +1481,67 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) return res; } +static int fuse_send_map(struct kiocb *iocb, size_t count, + struct fuse_map_out *outarg) +{ + struct file *file = iocb->ki_filp; + struct fuse_conn *fc = get_fuse_conn(file_inode(file)); + struct fuse_file *ff = file->private_data; + struct fuse_read_in inarg = { + .fh = ff->fh, + .offset = iocb->ki_pos, + .size = count, + .flags = file->f_flags, + }; + FUSE_ARGS(args); + + args.in.h.opcode = FUSE_MAP; + args.in.h.nodeid = ff->nodeid; + args.in.numargs = 1; + args.in.args[0].size = sizeof(inarg); + args.in.args[0].value = &inarg; + args.out.numargs = 1; + args.out.args[0].size = sizeof(*outarg); + args.out.args[0].value = outarg; + + return fuse_simple_request(fc, &args); +} + +static ssize_t fuse_file_map_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct fuse_map_out outarg; + struct file *mapfile; + ssize_t res, total = 0; + size_t count; + loff_t pos; + + while ((count = iov_iter_count(to))) { + res = fuse_send_map(iocb, count, &outarg); + if (res || !outarg.size) + break; + + res = -EBADF; + mapfile = fuse_map_get(outarg.mapfd); + if (!mapfile) + break; + + iov_iter_truncate(to, outarg.size); + pos = outarg.offset; + res = vfs_iter_read(mapfile, to, &pos, /* FIXME */ 0); + fput(mapfile); + if (res < 0) + break; + iov_iter_reexpand(to, count - res); + if (res == 0) + break; + + total += res; + iocb->ki_pos += res; + } + + return total ?: res; +} + static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; @@ -1489,6 +1550,9 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (is_bad_inode(file_inode(file))) return -EIO; + if (ff->open_flags & FOPEN_MAP) + return fuse_file_map_iter(iocb, to); + if (!(ff->open_flags & FOPEN_DIRECT_IO)) return fuse_cache_read_iter(iocb, to); else diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 24dbca777775..ea7b0548e034 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1093,4 +1093,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* readdir.c */ int fuse_readdir(struct file *file, struct dir_context *ctx); +/* map.c */ +int fuse_map_open(struct file *file); +int fuse_map_close(unsigned long mapfd); +struct file *fuse_map_get(u64 mapfd); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/map.c b/fs/fuse/map.c new file mode 100644 index 000000000000..e5801b9465cd --- /dev/null +++ b/fs/fuse/map.c @@ -0,0 +1,58 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2008 Miklos Szeredi <miklos@xxxxxxxxxx> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/file.h> +#include <linux/idr.h> + +static DEFINE_SPINLOCK(fuse_map_lock); +static DEFINE_IDR(fuse_map); + +int fuse_map_open(struct file *file) +{ + int res; + + idr_preload(GFP_KERNEL); + spin_lock(&fuse_map_lock); + res = idr_alloc(&fuse_map, file, 0, 0, GFP_ATOMIC); + spin_unlock(&fuse_map_lock); + idr_preload_end(); + if (res) + fput(file); + + return res; +} + +int fuse_map_close(unsigned long mapfd) +{ + struct file *file; + + spin_lock(&fuse_map_lock); + file = idr_remove(&fuse_map, mapfd); + spin_unlock(&fuse_map_lock); + + if (!file) + return -EBADF; + + fput(file); + return 0; +} + +struct file *fuse_map_get(u64 mapfd) +{ + struct file *file; + + rcu_read_lock(); + file = idr_find(&fuse_map, mapfd); + if (file) + get_file(file); + rcu_read_unlock(); + + return file; +} diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2971d29a42e4..65fca0128716 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -244,6 +244,7 @@ struct fuse_file_lock { #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_MAP (1 << 5) /** * INIT request/reply flags @@ -422,6 +423,7 @@ enum fuse_opcode { FUSE_RENAME2 = 45, FUSE_LSEEK = 46, FUSE_COPY_FILE_RANGE = 47, + FUSE_MAP = 50, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +573,12 @@ struct fuse_read_in { uint32_t padding; }; +struct fuse_map_out { + uint64_t mapfd; + uint64_t offset; + uint64_t size; +}; + #define FUSE_COMPAT_WRITE_IN_SIZE 24 struct fuse_write_in { @@ -823,6 +831,8 @@ struct fuse_notify_retrieve_in { /* Device ioctls: */ #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_MAP_OPEN _IO(229, 4) +#define FUSE_DEV_IOC_MAP_CLOSE _IO(229, 5) struct fuse_lseek_in { uint64_t fh; -- 2.21.0
Attachment:
loraw.tar.gz
Description: application/gzip