sqe->flags is u8, and now we have used 7 bits, so take the last one for extending purpose. If bit7(IOSQE_HAS_EXT_FLAGS_BIT) is 1, it means this sqe carries ext flags from the last byte(.ext_flags), or bit23~bit16 of sqe->uring_cmd_flags for IORING_OP_URING_CMD. io_slot_flags() return value is converted to `ULL` because the affected bits are beyond 32bit now. Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- include/linux/io_uring_types.h | 6 ++++-- include/uapi/linux/io_uring.h | 13 +++++++++++++ io_uring/filetable.h | 2 +- io_uring/io_uring.c | 14 +++++++++++++- io_uring/uring_cmd.c | 3 ++- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3e72fa52f1e3..67347e5d06ec 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -435,6 +435,7 @@ struct io_tw_state { }; enum { + /* 1st byte is from sqe->flags, and 2nd is from sqe ext_flags */ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, @@ -442,9 +443,10 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + REQ_F_SQE_EXT_FLAGS_BIT = IOSQE_HAS_EXT_FLAGS_BIT, - /* first byte is taken by user flags, shift it to not overlap */ - REQ_F_FAIL_BIT = 8, + /* first 2 bytes are taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 16, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a7f847543a7f..4847d7cf1ac9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -98,6 +98,10 @@ struct io_uring_sqe { __u64 __pad2[1]; }; __u64 optval; + struct { + __u8 __pad4[15]; + __u8 ext_flags; + }; /* * If the ring is initialized with IORING_SETUP_SQE128, then * this field is used for 80 bytes of arbitrary command data @@ -123,6 +127,7 @@ enum io_uring_sqe_flags_bit { IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, IOSQE_CQE_SKIP_SUCCESS_BIT, + IOSQE_HAS_EXT_FLAGS_BIT, }; /* @@ -142,6 +147,11 @@ enum io_uring_sqe_flags_bit { #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) /* don't post CQE if request succeeded */ #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) +/* + * sqe ext flags carried in the last byte, or bit23~bit16 of + * sqe->uring_cmd_flags for IORING_URING_CMD. + */ +#define IOSQE_HAS_EXT_FLAGS (1U << IOSQE_HAS_EXT_FLAGS_BIT) /* * io_uring_setup() flags @@ -263,11 +273,14 @@ enum io_uring_op { /* * sqe->uring_cmd_flags top 8bits aren't available for userspace + * bit31 ~ bit24 kernel internal usage + * bit23 ~ bit16 sqe ext flags * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. */ #define IORING_URING_CMD_FIXED (1U << 0) #define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED +#define IORING_URING_CMD_EXT_MASK 0x00ff0000 /* diff --git a/io_uring/filetable.h b/io_uring/filetable.h index b2435c4dca1f..d25247c9b9f5 100644 --- a/io_uring/filetable.h +++ b/io_uring/filetable.h @@ -43,7 +43,7 @@ io_fixed_file_slot(struct io_file_table *table, unsigned i) #define FFS_ISREG 0x2UL #define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG) -static inline unsigned int io_slot_flags(struct io_fixed_file *slot) +static inline unsigned long io_slot_flags(struct io_fixed_file *slot) { return (slot->file_ptr & ~FFS_MASK) << REQ_F_SUPPORT_NOWAIT_BIT; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8df9ad010803..6d4def11aebf 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -109,7 +109,8 @@ IOSQE_IO_HARDLINK | IOSQE_ASYNC) #define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ - IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) + IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS | \ + IOSQE_HAS_EXT_FLAGS) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ @@ -2080,6 +2081,17 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, /* enforce forwards compatibility on users */ if (sqe_flags & ~SQE_VALID_FLAGS) return io_init_fail_req(req, -EINVAL); + if (sqe_flags & IOSQE_HAS_EXT_FLAGS) { + u32 sqe_ext_flags; + + if (opcode != IORING_OP_URING_CMD) + sqe_ext_flags = READ_ONCE(sqe->ext_flags); + else + sqe_ext_flags = (READ_ONCE(sqe->uring_cmd_flags) + & IORING_URING_CMD_EXT_MASK) >> 16; + req->flags |= sqe_ext_flags << 8; + } + if (sqe_flags & IOSQE_BUFFER_SELECT) { if (!def->buffer_select) return io_init_fail_req(req, -EOPNOTSUPP); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 334d31dd6628..43b71f29e7b3 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -202,7 +202,8 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->__pad1) return -EINVAL; - ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags); + ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags) & + ~IORING_URING_CMD_EXT_MASK; if (ioucmd->flags & ~IORING_URING_CMD_MASK) return -EINVAL; -- 2.42.0