Add a new flag IORING_REGISTER_USE_REGISTERED_RING (set via the high bit of the opcode) to treat the fd as a registered index rather than a file descriptor. This makes it possible for a library to open an io_uring, register the ring fd, close the ring fd, and subsequently use the ring entirely via registered index. Signed-off-by: Josh Triplett <josh@xxxxxxxxxxxxxxxx> --- This is enough for many libraries to use io_uring transparently without disrupting any callers. Libraries with even more stringent requirements (e.g. never even transiently having a file descriptor open) will need two additional pieces: - Adding a flag to io_uring_setup to set up the ring directly as a registered file descriptor, without ever putting it in the file descriptor table. - Supporting the initial mmap via a registered file descriptor, such as via an io_uring_register call. include/uapi/linux/io_uring.h | 6 +++++- io_uring/io_uring.c | 30 +++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6b83177fd41d..103b4babc175 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -427,6 +427,7 @@ struct io_uring_params { #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) #define IORING_FEAT_LINKED_FILE (1U << 12) +#define IORING_FEAT_REG_REG_RING (1U << 13) /* * io_uring_register(2) opcodes and arguments @@ -474,7 +475,10 @@ enum { IORING_REGISTER_FILE_ALLOC_RANGE = 25, /* this goes last */ - IORING_REGISTER_LAST + IORING_REGISTER_LAST, + + /* flag added to the opcode to use a registered ring fd */ + IORING_REGISTER_USE_REGISTERED_RING = 1U << 31 }; /* io-wq worker categories */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2965b354efc8..efe5170d3e77 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3350,7 +3350,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | - IORING_FEAT_LINKED_FILE; + IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; @@ -3857,13 +3857,29 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; - f = fdget(fd); - if (!f.file) - return -EBADF; + /* + * Ring fd has been registered via IORING_REGISTER_RING_FDS, we + * need only dereference our task private array to find it. + */ + if (opcode & IORING_REGISTER_USE_REGISTERED_RING) { + struct io_uring_task *tctx = current->io_uring; - ret = -EOPNOTSUPP; - if (!io_is_uring_fops(f.file)) - goto out_fput; + if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) + return -EINVAL; + fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); + f.file = tctx->registered_rings[fd]; + f.flags = 0; + if (unlikely(!f.file)) + return -EBADF; + opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; + } else { + f = fdget(fd); + if (unlikely(!f.file)) + return -EBADF; + ret = -EOPNOTSUPP; + if (!io_is_uring_fops(f.file)) + goto out_fput; + } ctx = f.file->private_data; -- 2.37.2