Hi Linus, Here are the io_uring updates for 6.14. Not a lot in terms of features this time around, mostly just cleanups and code consolidation. In detail, this pull request contains: - Support for PI meta data read/write via io_uring, with NVMe and SCSI covered. - Cleanup the per-op structure caching, making it consistent across various command types. - Consolidate the various user mapped features into a concept called regions, making the various users of that consistent. - Various cleanups and fixes. Note that this will throw a merge conflict, as there's a conflict between a fix that went into mainline after 6.13-rc4. The io_uring/register.c one is trivial, the io_uring/uring_cmd.c requires a small addition. Here's my resolution, while merging it into my for-next branch: commit 3761b21b00320fc676aa8f5df8c9158046372b73 Merge: 65a64ecb3357 bab4b2cca027 Author: Jens Axboe <axboe@xxxxxxxxx> Date: Wed Jan 15 08:52:03 2025 -0700 Merge branch 'for-6.14/io_uring' into for-next * for-6.14/io_uring: (55 commits) io_uring: reuse io_should_terminate_tw() for cmds io_uring: Factor out a function to parse restrictions io_uring/rsrc: require cloned buffers to share accounting contexts io_uring: simplify the SQPOLL thread check when cancelling requests io_uring: expose read/write attribute capability io_uring/rw: don't gate retry on completion context io_uring/rw: handle -EAGAIN retry at IO completion time io_uring/rw: use io_rw_recycle() from cleanup path io_uring/rsrc: simplify the bvec iter count calculation io_uring: ensure io_queue_deferred() is out-of-line io_uring/rw: always clear ->bytes_done on io_async_rw setup io_uring/rw: use NULL for rw->free_iovec assigment io_uring/rw: don't mask in f_iocb_flags io_uring/msg_ring: Drop custom destructor io_uring: Move old async data allocation helper to header io_uring/rw: Allocate async data through helper io_uring/net: Allocate msghdr async data through helper io_uring/uring_cmd: Allocate async data through generic helper io_uring/poll: Allocate apoll with generic alloc_cache helper io_uring/futex: Allocate ifd with generic alloc_cache helper ... Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> diff --cc io_uring/register.c index 371aec87e078,4d507a0390e8..6efbeee734a9 --- a/io_uring/register.c +++ b/io_uring/register.c @@@ -403,11 -396,11 +396,11 @@@ static void io_register_free_rings(stru static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) { + struct io_uring_region_desc rd; struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL; size_t size, sq_array_offset; + unsigned i, tail, old_head; struct io_uring_params p; - void *ptr; - unsigned i, tail; int ret; /* for single issuer, must be owner resizing */ @@@ -441,29 -434,26 +434,34 @@@ if (size == SIZE_MAX) return -EOVERFLOW; - if (!(p.flags & IORING_SETUP_NO_MMAP)) - n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size); - else - n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages, - p.cq_off.user_addr, size); - if (IS_ERR(n.rings)) - return PTR_ERR(n.rings); + memset(&rd, 0, sizeof(rd)); + rd.size = PAGE_ALIGN(size); + if (p.flags & IORING_SETUP_NO_MMAP) { + rd.user_addr = p.cq_off.user_addr; + rd.flags |= IORING_MEM_REGION_TYPE_USER; + } + ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); + if (ret) { + io_register_free_rings(ctx, &p, &n); + return ret; + } + n.rings = io_region_get_ptr(&n.ring_region); - n.rings->sq_ring_mask = p.sq_entries - 1; - n.rings->cq_ring_mask = p.cq_entries - 1; - n.rings->sq_ring_entries = p.sq_entries; - n.rings->cq_ring_entries = p.cq_entries; + /* + * At this point n.rings is shared with userspace, just like o.rings + * is as well. While we don't expect userspace to modify it while + * a resize is in progress, and it's most likely that userspace will + * shoot itself in the foot if it does, we can't always assume good + * intent... Use read/write once helpers from here on to indicate the + * shared nature of it. + */ + WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1); + WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1); + WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries); + WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); if (copy_to_user(arg, &p, sizeof(p))) { - io_register_free_rings(&p, &n); + io_register_free_rings(ctx, &p, &n); return -EFAULT; } @@@ -516,14 -508,12 +516,13 @@@ * Now copy SQ and CQ entries, if any. If either of the destination * rings can't hold what is already there, then fail the operation. */ - n.sq_sqes = ptr; - tail = READ_ONCE(o.rings->sq.tail); + tail = o.rings->sq.tail; - if (tail - o.rings->sq.head > p.sq_entries) + old_head = READ_ONCE(o.rings->sq.head); + if (tail - old_head > p.sq_entries) goto overflow; - for (i = o.rings->sq.head; i < tail; i++) { + for (i = old_head; i < tail; i++) { unsigned src_head = i & (ctx->sq_entries - 1); - unsigned dst_head = i & n.rings->sq_ring_mask; + unsigned dst_head = i & (p.sq_entries - 1); n.sq_sqes[dst_head] = o.sq_sqes[src_head]; } diff --cc io_uring/uring_cmd.c index ce7726a04883,d235043db21e..7fb7c0a08996 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@@ -188,14 -163,14 +168,22 @@@ void io_uring_cmd_done(struct io_uring_ } EXPORT_SYMBOL_GPL(io_uring_cmd_done); ++static void io_uring_cmd_init_once(void *obj) ++{ ++ struct io_uring_cmd_data *data = obj; ++ ++ data->op_data = NULL; ++} ++ static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - struct uring_cache *cache; + struct io_uring_cmd_data *cache; - cache = io_uring_async_get(req); - if (unlikely(!cache)) - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, NULL); ++ cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, ++ io_uring_cmd_init_once); + if (!cache) return -ENOMEM; if (!(req->flags & REQ_F_FORCE_ASYNC)) { Please pull! The following changes since commit 4bbf9020becbfd8fc2c3da790855b7042fad455b: Linux 6.13-rc4 (2024-12-22 13:22:21 -0800) are available in the Git repository at: git://git.kernel.dk/linux.git tags/for-6.14/io_uring-20250119 for you to fetch changes up to 561e3a0c40dc7e3ab7b0b3647a2b89eca16215d9: io_uring/fdinfo: fix io_uring_show_fdinfo() misuse of ->d_iname (2025-01-19 07:28:37 -0700) ---------------------------------------------------------------- for-6.14/io_uring-20250119 ---------------------------------------------------------------- Al Viro (1): io_uring/fdinfo: fix io_uring_show_fdinfo() misuse of ->d_iname Anuj Gupta (8): block: define set of integrity flags to be inherited by cloned bip block: modify bio_integrity_map_user to accept iov_iter as argument fs, iov_iter: define meta io descriptor fs: introduce IOCB_HAS_METADATA for metadata io_uring: introduce attributes for read/write and PI support block: introduce BIP_CHECK_GUARD/REFTAG/APPTAG bip_flags scsi: add support for user-meta interface io_uring: expose read/write attribute capability Bui Quang Minh (2): io_uring/rsrc: simplify the bvec iter count calculation io_uring: simplify the SQPOLL thread check when cancelling requests Christoph Hellwig (1): block: copy back bounce buffer to user-space correctly in case of split Colin Ian King (1): io_uring/kbuf: fix unintentional sign extension on shift of reg.bgid David Wei (1): io_uring: clean up io_prep_rw_setup() Gabriel Krisman Bertazi (9): io_uring: Fold allocation into alloc_cache helper io_uring: Add generic helper to allocate async data io_uring/futex: Allocate ifd with generic alloc_cache helper io_uring/poll: Allocate apoll with generic alloc_cache helper io_uring/uring_cmd: Allocate async data through generic helper io_uring/net: Allocate msghdr async data through helper io_uring/rw: Allocate async data through helper io_uring: Move old async data allocation helper to header io_uring/msg_ring: Drop custom destructor Jann Horn (1): io_uring/rsrc: require cloned buffers to share accounting contexts Jens Axboe (8): block: make bio_integrity_map_user() static inline io_uring/rw: don't mask in f_iocb_flags io_uring/rw: use NULL for rw->free_iovec assigment io_uring/rw: always clear ->bytes_done on io_async_rw setup io_uring: ensure io_queue_deferred() is out-of-line io_uring/rw: use io_rw_recycle() from cleanup path io_uring/rw: handle -EAGAIN retry at IO completion time io_uring/rw: don't gate retry on completion context Josh Triplett (1): io_uring: Factor out a function to parse restrictions Kanchan Joshi (2): nvme: add support for passing on the application tag block: add support to pass user meta buffer Pavel Begunkov (21): io_uring: rename ->resize_lock io_uring/rsrc: export io_check_coalesce_buffer io_uring/memmap: flag vmap'ed regions io_uring/memmap: flag regions with user pages io_uring/memmap: account memory before pinning io_uring/memmap: reuse io_free_region for failure path io_uring/memmap: optimise single folio regions io_uring/memmap: helper for pinning region pages io_uring/memmap: add IO_REGION_F_SINGLE_REF io_uring/memmap: implement kernel allocated regions io_uring/memmap: implement mmap for regions io_uring: pass ctx to io_register_free_rings io_uring: use region api for SQ io_uring: use region api for CQ io_uring/kbuf: use mmap_lock to sync with mmap io_uring/kbuf: remove pbuf ring refcounting io_uring/kbuf: use region api for pbuf rings io_uring/memmap: unify io_uring mmap'ing code io_uring: don't vmap single page regions io_uring: prevent reg-wait speculations io_uring: reuse io_should_terminate_tw() for cmds block/bio-integrity.c | 84 +++++++-- block/blk-integrity.c | 10 +- block/fops.c | 45 +++-- drivers/nvme/host/core.c | 21 ++- drivers/scsi/sd.c | 4 +- include/linux/bio-integrity.h | 25 ++- include/linux/fs.h | 1 + include/linux/io_uring_types.h | 26 +-- include/linux/uio.h | 9 + include/uapi/linux/fs.h | 9 + include/uapi/linux/io_uring.h | 17 ++ io_uring/alloc_cache.h | 13 ++ io_uring/fdinfo.c | 9 +- io_uring/futex.c | 13 +- io_uring/io_uring.c | 136 +++++++-------- io_uring/io_uring.h | 23 +++ io_uring/kbuf.c | 226 ++++++++----------------- io_uring/kbuf.h | 20 +-- io_uring/memmap.c | 375 ++++++++++++++++++++--------------------- io_uring/memmap.h | 23 ++- io_uring/msg_ring.c | 7 - io_uring/msg_ring.h | 1 - io_uring/net.c | 35 ++-- io_uring/poll.c | 13 +- io_uring/register.c | 155 +++++++++-------- io_uring/rsrc.c | 40 +++-- io_uring/rsrc.h | 4 + io_uring/rw.c | 212 +++++++++++++---------- io_uring/rw.h | 14 +- io_uring/timeout.c | 5 +- io_uring/uring_cmd.c | 22 +-- io_uring/waitid.c | 4 +- 32 files changed, 833 insertions(+), 768 deletions(-) -- Jens Axboe