Introduce IORING_OP_IOCTL_PT for async ioctl. It skips entering into block-layer and reaches to underlying block-driver managing the block-device. This is done by calling newly introduced "async_ioctl" block-device operation. The requested operation may be completed synchronously, and in that case CQE is updated on the fly. For asynchronous update, lower-layer calls the completion-callback supplied by io-uring. Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> Signed-off-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> --- fs/io_uring.c | 77 +++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 7 +++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 985a9e3f976d..c15852dfb727 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -468,6 +468,19 @@ struct io_rw { u64 len; }; +/* + * passthru ioctl skips block-layer and reaches to block device driver via + * async_ioctl() block-dev operation. + */ +struct io_pt_ioctl { + struct file *file; + /* arg and cmd like regular ioctl */ + u64 arg; + u32 cmd; + /* defined by block layer */ + struct pt_ioctl_ctx ioctx; +}; + struct io_connect { struct file *file; struct sockaddr __user *addr; @@ -699,6 +712,7 @@ struct io_kiocb { struct io_shutdown shutdown; struct io_rename rename; struct io_unlink unlink; + struct io_pt_ioctl ptioctl; /* use only after cleaning per-op data, see io_clean_op() */ struct io_completion compl; }; @@ -824,6 +838,10 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .work_flags = IO_WQ_WORK_BLKCG, }, + [IORING_OP_IOCTL_PT] = { + .needs_file = 1, + .work_flags = IO_WQ_WORK_MM, + }, [IORING_OP_READ_FIXED] = { .needs_file = 1, .unbound_nonreg_file = 1, @@ -3704,6 +3722,60 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, return ret; } +static int io_pt_ioctl_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + unsigned int cmd = READ_ONCE(sqe->ioctl_cmd); + unsigned long arg = READ_ONCE(sqe->ioctl_arg); + struct io_ring_ctx *ctx = req->ctx; + struct block_device *bdev = I_BDEV(req->file->f_mapping->host); + struct gendisk *disk = NULL; + + disk = bdev->bd_disk; + if (!disk || !disk->fops || !disk->fops->async_ioctl) + return -EOPNOTSUPP; + /* for sqpoll, use sqo_task */ + if (ctx->flags & IORING_SETUP_SQPOLL) + req->ptioctl.ioctx.task = ctx->sqo_task; + else + req->ptioctl.ioctx.task = current; + + req->ptioctl.arg = arg; + req->ptioctl.cmd = cmd; + return 0; +} + +void pt_complete(struct pt_ioctl_ctx *ptioc, long ret) +{ + struct io_kiocb *req = container_of(ptioc, struct io_kiocb, ptioctl.ioctx); + + if (ret < 0) + req_set_fail_links(req); + io_req_complete(req, ret); +} + +static int io_pt_ioctl(struct io_kiocb *req, bool force_nonblock) +{ + long ret = 0; + struct block_device *bdev = I_BDEV(req->file->f_mapping->host); + fmode_t mode = req->file->f_mode; + struct gendisk *disk = NULL; + + disk = bdev->bd_disk; + /* set up callback for async */ + req->ptioctl.ioctx.pt_complete = pt_complete; + + ret = disk->fops->async_ioctl(bdev, mode, req->ptioctl.cmd, + req->ptioctl.arg, &req->ptioctl.ioctx); + if (ret == -EIOCBQUEUED) /*async completion */ + return 0; + if (ret < 0) + req_set_fail_links(req); + + io_req_complete(req, ret); + return 0; +} + static int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -6078,6 +6150,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_renameat_prep(req, sqe); case IORING_OP_UNLINKAT: return io_unlinkat_prep(req, sqe); + case IORING_OP_IOCTL_PT: + return io_pt_ioctl_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -6337,6 +6411,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, case IORING_OP_UNLINKAT: ret = io_unlinkat(req, force_nonblock); break; + case IORING_OP_IOCTL_PT: + ret = io_pt_ioctl(req, force_nonblock); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d31a2a1e8ef9..60671e2b00ba 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -22,12 +22,16 @@ struct io_uring_sqe { union { __u64 off; /* offset into file */ __u64 addr2; + __u64 ioctl_arg; }; union { __u64 addr; /* pointer to buffer or iovecs */ __u64 splice_off_in; }; - __u32 len; /* buffer size or number of iovecs */ + union { + __u32 len; /* buffer size or number of iovecs */ + __u32 ioctl_cmd; + }; union { __kernel_rwf_t rw_flags; __u32 fsync_flags; @@ -137,6 +141,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_IOCTL_PT, /* this goes last, obviously */ IORING_OP_LAST, -- 2.25.1