IORING_OP_PROVIDE_BUFFER uses the buffer registration infrastructure to support passing in an addr/len that is associated with a buffer ID and buffer group ID. The group ID is used to index and lookup the buffers, while the buffer ID can be used to notify the application which buffer in the group was used. At least for now, no validation is done of the buffer ID. If the application provides buffers within the same group with identical buffer IDs, then it'll have a hard time telling which buffer ID was used. The only restriction is that the buffer ID can be a max of 16-bits in size, so USHRT_MAX is the maximum ID that can be used. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 84 +++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 1 + 2 files changed, 85 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 98b0e9552ef2..8b7c5ab69658 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -442,6 +442,14 @@ struct io_epoll { struct epoll_event event; }; +struct io_provide_buffer { + struct file *file; + __u64 addr; + __s32 len; + __u32 gid; + __u16 bid; +}; + struct io_async_connect { struct sockaddr_storage address; }; @@ -566,6 +574,7 @@ struct io_kiocb { struct io_fadvise fadvise; struct io_madvise madvise; struct io_epoll epoll; + struct io_provide_buffer pbuf; }; struct io_async_ctx *io; @@ -790,6 +799,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .file_table = 1, }, + [IORING_OP_PROVIDE_BUFFER] = {}, }; static void io_wq_submit_work(struct io_wq_work **workptr); @@ -2703,6 +2713,69 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt, return io_openat2(req, nxt, force_nonblock); } +static int io_provide_buffer_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_provide_buffer *p = &req->pbuf; + u64 off; + + p->addr = READ_ONCE(sqe->addr); + p->len = READ_ONCE(sqe->len); + p->gid = READ_ONCE(sqe->fd); + off = READ_ONCE(sqe->off); + if (off > USHRT_MAX) + return -EINVAL; + p->bid = off; + return 0; +} + +static int io_provide_buffer(struct io_kiocb *req, struct io_kiocb **nxt) +{ + struct io_provide_buffer *p = &req->pbuf; + struct io_ring_ctx *ctx = req->ctx; + struct list_head *list; + struct io_buffer *buf; + int ret = 0; + + list = idr_find(&ctx->io_buffer_idr, p->gid); + if (!list) { + list = kmalloc(sizeof(*list), GFP_KERNEL); + if (!list) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(list); + ret = idr_alloc(&ctx->io_buffer_idr, list, p->gid, p->gid + 1, + GFP_KERNEL); + if (ret < 0) { + kfree(list); + goto out; + } + } + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) { + if (list_empty(list)) { + idr_remove(&ctx->io_buffer_idr, p->gid); + kfree(list); + } + ret = -ENOMEM; + goto out; + } + + buf->addr = p->addr; + buf->len = p->len; + buf->bid = p->bid; + list_add(&buf->list, list); + ret = buf->bid; +out: + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + io_put_req_find_next(req, nxt); + return 0; +} + static int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -4314,6 +4387,9 @@ static int io_req_defer_prep(struct io_kiocb *req, case IORING_OP_EPOLL_CTL: ret = io_epoll_ctl_prep(req, sqe); break; + case IORING_OP_PROVIDE_BUFFER: + ret = io_provide_buffer_prep(req, sqe); + break; default: printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", req->opcode); @@ -4579,6 +4655,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, } ret = io_epoll_ctl(req, nxt, force_nonblock); break; + case IORING_OP_PROVIDE_BUFFER: + if (sqe) { + ret = io_provide_buffer_prep(req, sqe); + if (ret) + break; + } + ret = io_provide_buffer(req, nxt); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 653865554691..21915ada9507 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -113,6 +113,7 @@ enum { IORING_OP_RECV, IORING_OP_OPENAT2, IORING_OP_EPOLL_CTL, + IORING_OP_PROVIDE_BUFFER, /* this goes last, obviously */ IORING_OP_LAST, -- 2.25.1