In hindsight everything is clearer, but it probably should've been known that 8 bits of ->flags would run out sooner than later. Rather than gobble up the last bit for a random use case, add a bit that controls whether or not ->personality is used as a flags2 argument. If that is the case, then there's a new IOSQE2_PERSONALITY flag that tells io_uring which personality field to read. While this isn't the prettiest, it does allow extending with 15 extra flags, and retains being able to use personality with any kind of command. The exception is uring cmd, where personality2 will overlap with the space set aside for SQE128. If they really need that, then that would have to be done via a uring cmd flag. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- Was toying with this idea to allow for some more flags, I just don't like grabbing the last flag and punting the problem both to the future and to "somebody elses problem". Here's one way we could do it, without rewriting the entire sqe into a v2. Which does need to happen at some point, but preferably without pressing issues around. I don't _hate_ it, there's really not a great way to do this. And I do think personality is the least used of all the things, and probably will never get used with uring_cmd. But if it had to work for that, then there are certainly ways to pass in that info. Not that we ever would... diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 77fd508d043a..8a45bf6a68ca 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -433,6 +433,7 @@ struct io_tw_state { }; enum { + /* 8 bits of sqe->flags */ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, @@ -440,9 +441,13 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + REQ_F_FLAGS2_BIT = IOSQE_FLAGS2_BIT, - /* first byte is taken by user flags, shift it to not overlap */ - REQ_F_FAIL_BIT = 8, + /* 16 bits of sqe->flags2 */ + REQ_F_PERSONALITY_BIT = IOSQE2_PERSONALITY_BIT + 8, + + /* first byte taken by sqe->flags, next 2 by sqe->flags2 */ + REQ_F_FAIL_BIT = 24, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, @@ -492,6 +497,10 @@ enum { REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), + /* ->flags2 is valid */ + REQ_F_FLAGS2 = IO_REQ_FLAG(REQ_F_FLAGS2_BIT), + + REQ_F_PERSONALITY = IO_REQ_FLAG(REQ_F_PERSONALITY_BIT), /* fail rest of links */ REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ce58c4590de6..c7c3ba69ffdd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -82,8 +82,12 @@ struct io_uring_sqe { /* for grouped buffer selection */ __u16 buf_group; } __attribute__((packed)); - /* personality to use, if used */ - __u16 personality; + union { + /* personality to use, if used */ + __u16 personality; + /* 2nd set of flags, can't be used with personality */ + __u16 flags2; + }; union { __s32 splice_fd_in; __u32 file_index; @@ -99,11 +103,17 @@ struct io_uring_sqe { __u64 __pad2[1]; }; __u64 optval; - /* - * If the ring is initialized with IORING_SETUP_SQE128, then - * this field is used for 80 bytes of arbitrary command data - */ - __u8 cmd[0]; + struct { + /* + * If the ring is initialized with IORING_SETUP_SQE128, + * then this field is used for 80 bytes of arbitrary + * command data + */ + __u8 cmd[0]; + + /* personality to use, if IOSQE2_PERSONALITY set */ + __u16 personality2; + }; }; }; @@ -124,6 +134,11 @@ enum io_uring_sqe_flags_bit { IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, IOSQE_CQE_SKIP_SUCCESS_BIT, + IOSQE_FLAGS2_BIT, +}; + +enum io_uring_sqe_flags2_bit { + IOSQE2_PERSONALITY_BIT, }; /* @@ -143,6 +158,14 @@ enum io_uring_sqe_flags_bit { #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) /* don't post CQE if request succeeded */ #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) +/* ->flags2 is valid */ +#define IOSQE_FLAGS2 (1U << IOSQE_FLAGS2_BIT) + +/* + * sqe->flags2 + */ + /* if set, sqe->personality2 contains personality */ +#define IOSQE2_PERSONALITY (1U << IOSQE2_PERSONALITY_BIT) /* * io_uring_setup() flags diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 1149fba20503..c2bbadd5640d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -109,7 +109,8 @@ IOSQE_IO_HARDLINK | IOSQE_ASYNC) #define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ - IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) + IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS | \ + IOSQE_FLAGS2 | IOSQE2_PERSONALITY) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ @@ -2032,6 +2033,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->opcode = opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ sqe_flags = READ_ONCE(sqe->flags); + if (sqe_flags & REQ_F_FLAGS2) + sqe_flags |= (__u32) READ_ONCE(sqe->flags2) << 8; req->flags = (__force io_req_flags_t) sqe_flags; req->cqe.user_data = READ_ONCE(sqe->user_data); req->file = NULL; @@ -2095,8 +2098,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } } - personality = READ_ONCE(sqe->personality); - if (personality) { + personality = 0; + if (req->flags & REQ_F_PERSONALITY) + personality = READ_ONCE(sqe->personality2); + else if (!(req->flags & REQ_F_FLAGS2)) + personality = READ_ONCE(sqe->personality); + if (unlikely(personality)) { int ret; req->creds = xa_load(&ctx->personalities, personality); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 535909a38e76..ee04e0c48672 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -200,7 +200,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - if (sqe->__pad1) + if (sqe->__pad1 || req->flags & REQ_F_PERSONALITY) return -EINVAL; ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags); -- Jens Axboe