Add ->uring_cmd callback for block device files and use it to implement asynchronous discard. Normally, it first tries to execute the command from non-blocking context, which we limit to a single bio because otherwise one of sub-bios may need to wait for other bios, and we don't want to deal with partial IO. If non-blocking attempt fails, we'll retry it in a blocking context. Suggested-by: Conrad Meyer <conradmeyer@xxxxxxxx> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- block/blk.h | 1 + block/fops.c | 2 + block/ioctl.c | 94 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/fs.h | 2 + 4 files changed, 99 insertions(+) diff --git a/block/blk.h b/block/blk.h index e180863f918b..5178c5ba6852 100644 --- a/block/blk.h +++ b/block/blk.h @@ -571,6 +571,7 @@ blk_mode_t file_to_blk_mode(struct file *file); int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend); long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; diff --git a/block/fops.c b/block/fops.c index 9825c1713a49..8154b10b5abf 100644 --- a/block/fops.c +++ b/block/fops.c @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/iomap.h> #include <linux/module.h> +#include <linux/io_uring/cmd.h> #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) @@ -873,6 +874,7 @@ const struct file_operations def_blk_fops = { .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .uring_cmd = blkdev_uring_cmd, .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/block/ioctl.c b/block/ioctl.c index c7a3e6c6f5fa..f7f9c4c6d6b5 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -11,6 +11,8 @@ #include <linux/blktrace_api.h> #include <linux/pr.h> #include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <linux/io_uring/cmd.h> #include "blk.h" static int blkpg_do_ioctl(struct block_device *bdev, @@ -744,4 +746,96 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return ret; } + +struct blk_cmd { + blk_status_t status; + bool nowait; +}; + +static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd); + int res = blk_status_to_errno(bc->status); + + if (res == -EAGAIN && bc->nowait) + io_uring_cmd_issue_blocking(cmd); + else + io_uring_cmd_done(cmd, res, 0, issue_flags); +} + +static void bio_cmd_end(struct bio *bio) +{ + struct io_uring_cmd *cmd = bio->bi_private; + struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd); + + if (unlikely(bio->bi_status) && !bc->status) + bc->status = bio->bi_status; + + io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); + bio_put(bio); +} + +static int blkdev_cmd_discard(struct io_uring_cmd *cmd, + struct block_device *bdev, + uint64_t start, uint64_t len, bool nowait) +{ + sector_t sector = start >> SECTOR_SHIFT; + sector_t nr_sects = len >> SECTOR_SHIFT; + struct bio *prev = NULL, *bio; + int err; + + err = blk_validate_discard(bdev, file_to_blk_mode(cmd->file), + start, len); + if (err) + return err; + err = filemap_invalidate_pages(bdev->bd_mapping, start, + start + len - 1, nowait); + if (err) + return err; + + while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + GFP_KERNEL))) { + if (nowait) { + if (unlikely(nr_sects)) { + bio_put(bio); + return -EAGAIN; + } + bio->bi_opf |= REQ_NOWAIT; + } + prev = bio_chain_and_submit(prev, bio); + } + if (!prev) + return -EFAULT; + + prev->bi_private = cmd; + prev->bi_end_io = bio_cmd_end; + submit_bio(prev); + return -EIOCBQUEUED; +} + +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); + struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd); + const struct io_uring_sqe *sqe = cmd->sqe; + u32 cmd_op = cmd->cmd_op; + uint64_t start, len; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bc->status = BLK_STS_OK; + bc->nowait = issue_flags & IO_URING_F_NONBLOCK; + + start = READ_ONCE(sqe->addr); + len = READ_ONCE(sqe->addr3); + + switch (cmd_op) { + case BLOCK_URING_CMD_DISCARD: + return blkdev_cmd_discard(cmd, bdev, start, len, bc->nowait); + } + return -EINVAL; +} + #endif diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 753971770733..0016e38ed33c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -208,6 +208,8 @@ struct fsxattr { * (see uapi/linux/blkzoned.h) */ +#define BLOCK_URING_CMD_DISCARD 0 + #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ -- 2.45.2