Adds an io_uring interface for asynchronous encoded reads, using the same interface as for the ioctl. To use this you would use an SQE opcode of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and addr would point to the userspace address of the btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have CAP_SYS_ADMIN for this to work. Signed-off-by: Mark Harmstone <maharmstone@xxxxxx> --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 57 ++++++++++++++++++++++++++++++++++++---- fs/btrfs/ioctl.c | 59 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ioctl.h | 3 +++ 5 files changed, 115 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a5d786c6d7d4..62e5757d3ba2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -620,6 +620,8 @@ struct btrfs_encoded_read_private { struct btrfs_ioctl_encoded_io_args args; struct file *file; void __user *copy_out; + struct io_uring_cmd *cmd; + unsigned int issue_flags; }; ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9914419f3b7d..2db76422d126 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3769,6 +3769,7 @@ const struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_compat_ioctl, #endif .remap_file_range = btrfs_remap_file_range, + .uring_cmd = btrfs_uring_cmd, .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC, }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bd4c74e8c51..e4458168c340 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -34,6 +34,7 @@ #include <linux/iomap.h> #include <asm/unaligned.h> #include <linux/fsverity.h> +#include <linux/io_uring/cmd.h> #include "misc.h" #include "ctree.h" #include "disk-io.h" @@ -9078,7 +9079,7 @@ static ssize_t btrfs_encoded_read_inline( return ret; } -static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) +static void btrfs_encoded_read_ioctl_endio(struct btrfs_bio *bbio) { struct btrfs_encoded_read_private *priv = bbio->private; @@ -9098,6 +9099,47 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) bio_put(&bbio->bio); } +static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu( + struct io_uring_cmd *cmd) +{ + return *(struct btrfs_encoded_read_private **)cmd->pdu; +} +static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct btrfs_encoded_read_private *priv; + ssize_t ret; + + priv = btrfs_uring_encoded_read_pdu(cmd); + ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED); + + io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags); + + kfree(priv); +} + +static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio) +{ + struct btrfs_encoded_read_private *priv = bbio->private; + + if (bbio->bio.bi_status) { + /* + * The memory barrier implied by the atomic_dec_return() here + * pairs with the memory barrier implied by the + * atomic_dec_return() or io_wait_event() in + * btrfs_encoded_read_regular_fill_pages() to ensure that this + * write is observed before the load of status in + * btrfs_encoded_read_regular_fill_pages(). + */ + WRITE_ONCE(priv->status, bbio->bio.bi_status); + } + if (!atomic_dec_return(&priv->pending)) { + io_uring_cmd_complete_in_task(priv->cmd, + btrfs_finish_uring_encoded_read); + } + bio_put(&bbio->bio); +} + static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, u64 file_offset, u64 disk_bytenr, u64 disk_io_size, @@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned long i = 0; struct btrfs_bio *bbio; + btrfs_bio_end_io_t cb; - init_waitqueue_head(&priv->wait); + if (priv->cmd) { + cb = btrfs_encoded_read_uring_endio; + } else { + init_waitqueue_head(&priv->wait); + cb = btrfs_encoded_read_ioctl_endio; + } - bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, - btrfs_encoded_read_endio, priv); + bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv); bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; bbio->inode = inode; @@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, btrfs_submit_bio(bbio, 0); bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, - btrfs_encoded_read_endio, priv); + cb, priv); bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; bbio->inode = inode; continue; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c1886209933a..85a512a9ca64 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -29,6 +29,7 @@ #include <linux/fileattr.h> #include <linux/fsverity.h> #include <linux/sched/xacct.h> +#include <linux/io_uring/cmd.h> #include "ctree.h" #include "disk-io.h" #include "export.h" @@ -4509,8 +4510,8 @@ static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool return ret; } -static ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv, - ssize_t ret) +ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv, + ssize_t ret) { size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags); @@ -4725,6 +4726,60 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool return ret; } +static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + ssize_t ret; + void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr; + struct btrfs_encoded_read_private *priv; + bool compat = issue_flags & IO_URING_F_COMPAT; + + priv = kmalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_uring; + } + + ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp); + if (ret) + goto out_finish; + + if (iov_iter_count(&priv->iter) == 0) { + ret = 0; + goto out_finish; + } + + *(struct btrfs_encoded_read_private **)cmd->pdu = priv; + priv->cmd = cmd; + priv->issue_flags = issue_flags; + ret = btrfs_encoded_read(priv); + + if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending)) + return; + +out_finish: + ret = btrfs_encoded_read_finish(priv, ret); + kfree(priv); + +out_uring: + io_uring_cmd_done(cmd, ret, 0, issue_flags); +} + +int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + switch (cmd->cmd_op) { + case BTRFS_IOC_ENCODED_READ: +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) + case BTRFS_IOC_ENCODED_READ_32: +#endif + btrfs_uring_encoded_read(cmd, issue_flags); + return -EIOCBQUEUED; + } + + io_uring_cmd_done(cmd, -EINVAL, 0, issue_flags); + return -EIOCBQUEUED; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 19cd26b0244a..9d1522de79d3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,5 +22,8 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); int __pure btrfs_is_empty_uuid(const u8 *uuid); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); +int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); +ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv, + ssize_t ret); #endif -- 2.44.2