On Tue, Apr 18, 2023 at 4:41 AM Daniel Rosenberg <drosen@xxxxxxxxxx> wrote: > > Adds backing support for FUSE_READ and FUSE_WRITE > > This includes adjustments from Amir Goldstein's patch to FUSE > Passthrough > > Signed-off-by: Daniel Rosenberg <drosen@xxxxxxxxxx> > Signed-off-by: Paul Lawrence <paullawrence@xxxxxxxxxx> > --- > fs/fuse/backing.c | 371 ++++++++++++++++++++++++++++++++++++++ > fs/fuse/control.c | 2 +- > fs/fuse/file.c | 8 + > fs/fuse/fuse_i.h | 19 +- > fs/fuse/inode.c | 13 ++ > include/uapi/linux/fuse.h | 10 + > 6 files changed, 421 insertions(+), 2 deletions(-) > > diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c > index c6ef10aeec15..c7709a880e9c 100644 > --- a/fs/fuse/backing.c > +++ b/fs/fuse/backing.c > @@ -11,6 +11,7 @@ > #include <linux/file.h> > #include <linux/fs_stack.h> > #include <linux/namei.h> > +#include <linux/uio.h> > > /* > * expression statement to wrap the backing filter logic > @@ -76,6 +77,89 @@ > handled; \ > }) > > +#define FUSE_BPF_IOCB_MASK (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC) > + > +struct fuse_bpf_aio_req { > + struct kiocb iocb; > + refcount_t ref; > + struct kiocb *iocb_orig; > + struct timespec64 pre_atime; > +}; > + > +static struct kmem_cache *fuse_bpf_aio_request_cachep; > + > +static void fuse_file_accessed(struct file *dst_file, struct file *src_file) > +{ > + struct inode *dst_inode; > + struct inode *src_inode; > + > + if (dst_file->f_flags & O_NOATIME) > + return; > + > + dst_inode = file_inode(dst_file); > + src_inode = file_inode(src_file); > + > + if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) || > + !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) { > + dst_inode->i_mtime = src_inode->i_mtime; > + dst_inode->i_ctime = src_inode->i_ctime; > + } > + > + touch_atime(&dst_file->f_path); > +} > + > +static void fuse_copyattr(struct file *dst_file, struct file *src_file) > +{ > + struct inode *dst = file_inode(dst_file); > + struct inode *src = file_inode(src_file); > + > + dst->i_atime = src->i_atime; > + dst->i_mtime = src->i_mtime; > + dst->i_ctime = src->i_ctime; > + i_size_write(dst, i_size_read(src)); > + fuse_invalidate_attr(dst); > +} > + > +static void fuse_file_start_write(struct file *fuse_file, struct file *backing_file, > + loff_t pos, size_t count) > +{ > + struct inode *inode = file_inode(fuse_file); > + struct fuse_inode *fi = get_fuse_inode(inode); > + > + if (inode->i_size < pos + count) > + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); > + > + file_start_write(backing_file); > +} > + > +static void fuse_file_end_write(struct file *fuse_file, struct file *backing_file, > + loff_t pos, size_t res) > +{ > + struct inode *inode = file_inode(fuse_file); > + struct fuse_inode *fi = get_fuse_inode(inode); > + > + file_end_write(backing_file); > + > + if (res > 0) > + fuse_write_update_attr(inode, pos, res); > + > + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); > + fuse_invalidate_attr(inode); This part is a bit out-of-date (was taken from my old branch) FWIW, I pushed a more recent version of these patches to: https://github.com/amir73il/linux/commits/fuse-passthrough-fd (only compile tested) > +} > + > +static void fuse_file_start_read(struct file *backing_file, struct timespec64 *pre_atime) > +{ > + *pre_atime = file_inode(backing_file)->i_atime; > +} > + > +static void fuse_file_end_read(struct file *fuse_file, struct file *backing_file, > + struct timespec64 *pre_atime) > +{ > + /* Mimic atime update policy of passthrough inode, not the value */ > + if (!timespec64_equal(&file_inode(backing_file)->i_atime, pre_atime)) > + fuse_invalidate_atime(file_inode(fuse_file)); > +} > + > static void fuse_get_backing_path(struct file *file, struct path *path) > { > path_get(&file->f_path); > @@ -664,6 +748,277 @@ int fuse_bpf_lseek(loff_t *out, struct inode *inode, struct file *file, loff_t o > file, offset, whence); > } > > +static inline void fuse_bpf_aio_put(struct fuse_bpf_aio_req *aio_req) > +{ > + if (refcount_dec_and_test(&aio_req->ref)) > + kmem_cache_free(fuse_bpf_aio_request_cachep, aio_req); > +} > + > +static void fuse_bpf_aio_cleanup_handler(struct fuse_bpf_aio_req *aio_req, long res) > +{ > + struct kiocb *iocb = &aio_req->iocb; > + struct kiocb *iocb_orig = aio_req->iocb_orig; > + struct file *filp = iocb->ki_filp; > + struct file *fuse_filp = iocb_orig->ki_filp; > + > + if (iocb->ki_flags & IOCB_WRITE) { > + __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, > + SB_FREEZE_WRITE); > + fuse_file_end_write(iocb_orig->ki_filp, iocb->ki_filp, iocb->ki_pos, res); > + } else { > + fuse_file_end_read(fuse_filp, filp, &aio_req->pre_atime); > + } > + iocb_orig->ki_pos = iocb->ki_pos; > + fuse_bpf_aio_put(aio_req); > +} > + > +static void fuse_bpf_aio_rw_complete(struct kiocb *iocb, long res) > +{ > + struct fuse_bpf_aio_req *aio_req = > + container_of(iocb, struct fuse_bpf_aio_req, iocb); > + struct kiocb *iocb_orig = aio_req->iocb_orig; > + > + fuse_bpf_aio_cleanup_handler(aio_req, res); > + iocb_orig->ki_complete(iocb_orig, res); > +} > + > +struct fuse_file_read_iter_args { > + struct fuse_read_in in; > + struct fuse_read_iter_out out; > +}; > + > +static int fuse_file_read_iter_initialize_in(struct bpf_fuse_args *fa, struct fuse_file_read_iter_args *args, > + struct kiocb *iocb, struct iov_iter *to) > +{ > + struct file *file = iocb->ki_filp; > + struct fuse_file *ff = file->private_data; > + > + args->in = (struct fuse_read_in) { > + .fh = ff->fh, > + .offset = iocb->ki_pos, > + .size = to->count, > + }; > + > + /* TODO we can't assume 'to' is a kvec */ > + /* TODO we also can't assume the vector has only one component */ > + *fa = (struct bpf_fuse_args) { > + .info = (struct bpf_fuse_meta_info) { > + .opcode = FUSE_READ, > + .nodeid = ff->nodeid, > + }, .in_numargs = 1, > + .in_args[0].size = sizeof(args->in), > + .in_args[0].value = &args->in, > + /* > + * TODO Design this properly. > + * Possible approach: do not pass buf to bpf > + * If going to userland, do a deep copy > + * For extra credit, do that to/from the vector, rather than > + * making an extra copy in the kernel > + */ > + }; > + > + return 0; > +} > + > +static int fuse_file_read_iter_initialize_out(struct bpf_fuse_args *fa, struct fuse_file_read_iter_args *args, > + struct kiocb *iocb, struct iov_iter *to) > +{ > + args->out = (struct fuse_read_iter_out) { > + .ret = args->in.size, > + }; > + > + fa->out_numargs = 1; > + fa->out_args[0].size = sizeof(args->out); > + fa->out_args[0].value = &args->out; > + > + return 0; > +} > + > +static int fuse_file_read_iter_backing(struct bpf_fuse_args *fa, ssize_t *out, > + struct kiocb *iocb, struct iov_iter *to) > +{ > + struct fuse_read_iter_out *frio = fa->out_args[0].value; > + struct file *file = iocb->ki_filp; > + struct fuse_file *ff = file->private_data; > + > + if (!iov_iter_count(to)) > + return 0; > + > + if ((iocb->ki_flags & IOCB_DIRECT) && > + (!ff->backing_file->f_mapping->a_ops || > + !ff->backing_file->f_mapping->a_ops->direct_IO)) > + return -EINVAL; > + > + /* TODO This just plain ignores any change to fuse_read_in */ > + if (is_sync_kiocb(iocb)) { > + struct timespec64 pre_atime; > + > + fuse_file_start_read(ff->backing_file, &pre_atime); > + *out = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos, > + iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK)); > + fuse_file_end_read(file, ff->backing_file, &pre_atime); > + } else { > + struct fuse_bpf_aio_req *aio_req; > + > + *out = -ENOMEM; > + aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL); > + if (!aio_req) > + goto out; > + > + aio_req->iocb_orig = iocb; > + fuse_file_start_read(ff->backing_file, &aio_req->pre_atime); > + kiocb_clone(&aio_req->iocb, iocb, ff->backing_file); > + aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete; > + refcount_set(&aio_req->ref, 2); > + *out = vfs_iocb_iter_read(ff->backing_file, &aio_req->iocb, to); > + fuse_bpf_aio_put(aio_req); > + if (*out != -EIOCBQUEUED) > + fuse_bpf_aio_cleanup_handler(aio_req, *out); > + } > + > + frio->ret = *out; > + > + /* TODO Need to point value at the buffer for post-modification */ > + > +out: > + fuse_file_accessed(file, ff->backing_file); fuse_file_accessed() looks redundant and less subtle what fuse_file_end_read() already does. > + > + return *out; > +} > + > +static int fuse_file_read_iter_finalize(struct bpf_fuse_args *fa, ssize_t *out, > + struct kiocb *iocb, struct iov_iter *to) > +{ > + struct fuse_read_iter_out *frio = fa->out_args[0].value; > + > + *out = frio->ret; > + > + return 0; > +} > + > +int fuse_bpf_file_read_iter(ssize_t *out, struct inode *inode, struct kiocb *iocb, struct iov_iter *to) > +{ > + return bpf_fuse_backing(inode, struct fuse_file_read_iter_args, out, > + fuse_file_read_iter_initialize_in, > + fuse_file_read_iter_initialize_out, > + fuse_file_read_iter_backing, > + fuse_file_read_iter_finalize, > + iocb, to); > +} > + > +struct fuse_file_write_iter_args { > + struct fuse_write_in in; > + struct fuse_write_iter_out out; > +}; > + > +static int fuse_file_write_iter_initialize_in(struct bpf_fuse_args *fa, > + struct fuse_file_write_iter_args *args, > + struct kiocb *iocb, struct iov_iter *from) > +{ > + struct file *file = iocb->ki_filp; > + struct fuse_file *ff = file->private_data; > + > + *args = (struct fuse_file_write_iter_args) { > + .in.fh = ff->fh, > + .in.offset = iocb->ki_pos, > + .in.size = from->count, > + }; > + > + /* TODO we can't assume 'from' is a kvec */ > + *fa = (struct bpf_fuse_args) { > + .info = (struct bpf_fuse_meta_info) { > + .opcode = FUSE_WRITE, > + .nodeid = ff->nodeid, > + }, > + .in_numargs = 1, > + .in_args[0].size = sizeof(args->in), > + .in_args[0].value = &args->in, > + }; > + > + return 0; > +} > + > +static int fuse_file_write_iter_initialize_out(struct bpf_fuse_args *fa, > + struct fuse_file_write_iter_args *args, > + struct kiocb *iocb, struct iov_iter *from) > +{ > + /* TODO we can't assume 'from' is a kvec */ > + fa->out_numargs = 1; > + fa->out_args[0].size = sizeof(args->out); > + fa->out_args[0].value = &args->out; > + > + return 0; > +} > + > +static int fuse_file_write_iter_backing(struct bpf_fuse_args *fa, ssize_t *out, > + struct kiocb *iocb, struct iov_iter *from) > +{ > + struct file *file = iocb->ki_filp; > + struct fuse_file *ff = file->private_data; > + struct fuse_write_iter_out *fwio = fa->out_args[0].value; > + ssize_t count = iov_iter_count(from); > + > + if (!count) > + return 0; > + > + /* TODO This just plain ignores any change to fuse_write_in */ > + /* TODO uint32_t seems smaller than ssize_t.... right? */ > + inode_lock(file_inode(file)); > + > + fuse_copyattr(file, ff->backing_file); fuse_copyattr() looks redundant and less subtle than what fuse_file_end_write() already does. Thanks, Amir.